Merge branch 'main' into zhyncs/upd

zhyncs · web-flow · commit 2284d6b250e6 · 2025-04-15T01:39:11.000-07:00
diff --git a/python/sglang/bench_serving.py b/python/sglang/bench_serving.py
@@ -490,14 +490,15 @@ def get_dataset(args, tokenizer):
             prompt_suffix=args.prompt_suffix,
             apply_chat_template=args.apply_chat_template,
         )
-    elif args.dataset_name == "random":
+    elif args.dataset_name.startswith("random"):
         input_requests = sample_random_requests(
             input_len=args.random_input_len,
             output_len=args.random_output_len,
             num_prompts=args.num_prompts,
             range_ratio=args.random_range_ratio,
             tokenizer=tokenizer,
             dataset_path=args.dataset_path,
+            random_sample=args.dataset_name == "random",
         )
     elif args.dataset_name == "generated-shared-prefix":
         input_requests = sample_generated_shared_prefix_requests(
@@ -687,6 +688,7 @@ def sample_random_requests(
     range_ratio: float,
     tokenizer: PreTrainedTokenizerBase,
     dataset_path: str,
+    random_sample: bool = True,
 ) -> List[Tuple[str, int, int]]:
 
     input_lens = np.random.randint(
@@ -700,11 +702,15 @@ def sample_random_requests(
         size=num_prompts,
     )
 
-    if True:
+    if random_sample:
         # Sample token ids from ShareGPT and repeat/truncate them to satisfy the input_lens
 
         # Download sharegpt if necessary
         if not os.path.isfile(dataset_path):
+            print(
+                "If you do not want to randomly sample from a dataset,"
+                " please use --dataset-name random-ids."
+            )
             dataset_path = download_and_cache_file(SHAREGPT_URL)
 
         # Load the dataset.
@@ -1223,7 +1229,7 @@ async def limited_request_func(request_func_input, pbar):
         output_file_name = args.output_file
     else:
         now = datetime.now().strftime("%m%d")
-        if args.dataset_name == "random":
+        if args.dataset_name.startswith("random"):
             output_file_name = f"{args.backend}_{now}_{args.num_prompts}_{args.random_input_len}_{args.random_output_len}.jsonl"
         else:
             output_file_name = f"{args.backend}_{now}_{args.num_prompts}_sharegpt.jsonl"
@@ -1442,7 +1448,7 @@ def __call__(self, parser, namespace, values, option_string=None):
         "--dataset-name",
         type=str,
         default="sharegpt",
-        choices=["sharegpt", "random", "generated-shared-prefix"],
+        choices=["sharegpt", "random", "random-ids", "generated-shared-prefix"],
         help="Name of the dataset to benchmark on.",
     )
     parser.add_argument(
diff --git a/python/sglang/srt/layers/rotary_embedding.py b/python/sglang/srt/layers/rotary_embedding.py
@@ -649,7 +649,7 @@ def forward_hip(self, *args, **kwargs):
         return self.forward_native(*args, **kwargs)
 
     def forward(self, *args, **kwargs):
-        if torch._dynamo.is_compiling:
+        if torch.compiler.is_compiling():
             return self.forward_native(*args, **kwargs)
         if _is_cuda_available:
             return self.forward_cuda(*args, **kwargs)
diff --git a/test/srt/test_eval_fp8_accuracy.py b/test/srt/test_eval_fp8_accuracy.py
@@ -1,7 +1,7 @@
 import unittest
 from types import SimpleNamespace
 
-from sglang.srt.utils import kill_process_tree
+from sglang.srt.utils import is_hip, kill_process_tree
 from sglang.test.run_eval import run_eval
 from sglang.test.test_utils import (
     DEFAULT_FP8_MODEL_NAME_FOR_ACCURACY_TEST,
@@ -38,7 +38,11 @@ def test_mmlu(self):
         )
 
         metrics = run_eval(args)
-        self.assertGreaterEqual(metrics["score"], 0.61)
+        if is_hip():
+            # Another threshold for AMD because fp8 dtype is difference
+            self.assertGreaterEqual(metrics["score"], 0.609375)
+        else:
+            self.assertGreaterEqual(metrics["score"], 0.61)
 
 
 class TestEvalFP8DynamicQuantAccuracy(CustomTestCase):
diff --git a/test/srt/test_moe_ep.py b/test/srt/test_moe_ep.py
@@ -45,7 +45,7 @@ def test_mmlu(self):
         )
 
         metrics = run_eval(args)
-        self.assertGreater(metrics["score"], 0.5)
+        self.assertGreaterEqual(metrics["score"], 0.5)
 
     def test_mgsm_en(self):
         args = SimpleNamespace(
@@ -57,7 +57,7 @@ def test_mgsm_en(self):
         )
 
         metrics = run_eval(args)
-        self.assertGreater(metrics["score"], 0.8)
+        self.assertGreaterEqual(metrics["score"], 0.8)
 
 
 class TestEpMoEFP8(CustomTestCase):

Original file line number	Diff line number	Diff line change
`@@ -45,7 +45,7 @@ def test_mmlu(self):`
`45`	`45`	`)`
`46`	`46`
`47`	`47`	`metrics = run_eval(args)`
`48`		`- self.assertGreater(metrics["score"], 0.5)`
	`48`	`+ self.assertGreaterEqual(metrics["score"], 0.5)`
`49`	`49`
`50`	`50`	`def test_mgsm_en(self):`
`51`	`51`	`args = SimpleNamespace(`
`@@ -57,7 +57,7 @@ def test_mgsm_en(self):`
`57`	`57`	`)`
`58`	`58`
`59`	`59`	`metrics = run_eval(args)`
`60`		`- self.assertGreater(metrics["score"], 0.8)`
	`60`	`+ self.assertGreaterEqual(metrics["score"], 0.8)`
`61`	`61`
`62`	`62`
`63`	`63`	`class TestEpMoEFP8(CustomTestCase):`