7
7
from sglang .srt .utils import get_device_sm , kill_process_tree
8
8
from sglang .test .few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
9
9
from sglang .test .test_utils import (
10
- DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST ,
11
- DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST ,
12
10
DEFAULT_MLA_MODEL_NAME_FOR_TEST ,
13
11
DEFAULT_MODEL_NAME_FOR_TEST ,
14
12
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH ,
@@ -125,7 +123,7 @@ def get_server_args(cls):
125
123
class TestFlashAttention3SpeculativeDecode (BaseFlashAttentionTest ):
126
124
"""Test FlashAttention3 with speculative decode enabled."""
127
125
128
- model = DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST
126
+ model = "meta-llama/Llama-3.1-8B-Instruct"
129
127
130
128
@classmethod
131
129
def get_server_args (cls ):
@@ -137,7 +135,7 @@ def get_server_args(cls):
137
135
"--speculative-algorithm" ,
138
136
"EAGLE3" ,
139
137
"--speculative-draft" ,
140
- DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST ,
138
+ "jamesliu1/sglang-EAGLE3-Llama-3.1-Instruct-8B" ,
141
139
"--speculative-num-steps" ,
142
140
"3" ,
143
141
"--speculative-eagle-topk" ,
0 commit comments