Skip to content

Commit 2284d6b

Browse files
authored
Merge branch 'main' into zhyncs/upd
2 parents 9bf1d21 + f88f7e1 commit 2284d6b

File tree

4 files changed

+19
-9
lines changed

4 files changed

+19
-9
lines changed

python/sglang/bench_serving.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -490,14 +490,15 @@ def get_dataset(args, tokenizer):
490490
prompt_suffix=args.prompt_suffix,
491491
apply_chat_template=args.apply_chat_template,
492492
)
493-
elif args.dataset_name == "random":
493+
elif args.dataset_name.startswith("random"):
494494
input_requests = sample_random_requests(
495495
input_len=args.random_input_len,
496496
output_len=args.random_output_len,
497497
num_prompts=args.num_prompts,
498498
range_ratio=args.random_range_ratio,
499499
tokenizer=tokenizer,
500500
dataset_path=args.dataset_path,
501+
random_sample=args.dataset_name == "random",
501502
)
502503
elif args.dataset_name == "generated-shared-prefix":
503504
input_requests = sample_generated_shared_prefix_requests(
@@ -687,6 +688,7 @@ def sample_random_requests(
687688
range_ratio: float,
688689
tokenizer: PreTrainedTokenizerBase,
689690
dataset_path: str,
691+
random_sample: bool = True,
690692
) -> List[Tuple[str, int, int]]:
691693

692694
input_lens = np.random.randint(
@@ -700,11 +702,15 @@ def sample_random_requests(
700702
size=num_prompts,
701703
)
702704

703-
if True:
705+
if random_sample:
704706
# Sample token ids from ShareGPT and repeat/truncate them to satisfy the input_lens
705707

706708
# Download sharegpt if necessary
707709
if not os.path.isfile(dataset_path):
710+
print(
711+
"If you do not want to randomly sample from a dataset,"
712+
" please use --dataset-name random-ids."
713+
)
708714
dataset_path = download_and_cache_file(SHAREGPT_URL)
709715

710716
# Load the dataset.
@@ -1223,7 +1229,7 @@ async def limited_request_func(request_func_input, pbar):
12231229
output_file_name = args.output_file
12241230
else:
12251231
now = datetime.now().strftime("%m%d")
1226-
if args.dataset_name == "random":
1232+
if args.dataset_name.startswith("random"):
12271233
output_file_name = f"{args.backend}_{now}_{args.num_prompts}_{args.random_input_len}_{args.random_output_len}.jsonl"
12281234
else:
12291235
output_file_name = f"{args.backend}_{now}_{args.num_prompts}_sharegpt.jsonl"
@@ -1442,7 +1448,7 @@ def __call__(self, parser, namespace, values, option_string=None):
14421448
"--dataset-name",
14431449
type=str,
14441450
default="sharegpt",
1445-
choices=["sharegpt", "random", "generated-shared-prefix"],
1451+
choices=["sharegpt", "random", "random-ids", "generated-shared-prefix"],
14461452
help="Name of the dataset to benchmark on.",
14471453
)
14481454
parser.add_argument(

python/sglang/srt/layers/rotary_embedding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,7 @@ def forward_hip(self, *args, **kwargs):
649649
return self.forward_native(*args, **kwargs)
650650

651651
def forward(self, *args, **kwargs):
652-
if torch._dynamo.is_compiling:
652+
if torch.compiler.is_compiling():
653653
return self.forward_native(*args, **kwargs)
654654
if _is_cuda_available:
655655
return self.forward_cuda(*args, **kwargs)

test/srt/test_eval_fp8_accuracy.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import unittest
22
from types import SimpleNamespace
33

4-
from sglang.srt.utils import kill_process_tree
4+
from sglang.srt.utils import is_hip, kill_process_tree
55
from sglang.test.run_eval import run_eval
66
from sglang.test.test_utils import (
77
DEFAULT_FP8_MODEL_NAME_FOR_ACCURACY_TEST,
@@ -38,7 +38,11 @@ def test_mmlu(self):
3838
)
3939

4040
metrics = run_eval(args)
41-
self.assertGreaterEqual(metrics["score"], 0.61)
41+
if is_hip():
42+
# Another threshold for AMD because fp8 dtype is difference
43+
self.assertGreaterEqual(metrics["score"], 0.609375)
44+
else:
45+
self.assertGreaterEqual(metrics["score"], 0.61)
4246

4347

4448
class TestEvalFP8DynamicQuantAccuracy(CustomTestCase):

test/srt/test_moe_ep.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def test_mmlu(self):
4545
)
4646

4747
metrics = run_eval(args)
48-
self.assertGreater(metrics["score"], 0.5)
48+
self.assertGreaterEqual(metrics["score"], 0.5)
4949

5050
def test_mgsm_en(self):
5151
args = SimpleNamespace(
@@ -57,7 +57,7 @@ def test_mgsm_en(self):
5757
)
5858

5959
metrics = run_eval(args)
60-
self.assertGreater(metrics["score"], 0.8)
60+
self.assertGreaterEqual(metrics["score"], 0.8)
6161

6262

6363
class TestEpMoEFP8(CustomTestCase):

0 commit comments

Comments
 (0)