Skip to content

Commit 3601167

Browse files
zhyncsjimoosciuc
authored andcommitted
feat: update experiment_runner (sgl-project#5360)
1 parent 707b43e commit 3601167

File tree

3 files changed

+81
-1
lines changed

3 files changed

+81
-1
lines changed

test/srt/configs/llama_405b.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
tasks:
2+
- name: sglang-8192-1024-concurrency1
3+
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
4+
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 1 --num-prompts 5 --output-file llama_405b_results.jsonl
5+
6+
- name: sglang-8192-1024-concurrency2
7+
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
8+
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 2 --num-prompts 10 --output-file llama_405b_results.jsonl
9+
10+
- name: sglang-8192-1024-concurrency4
11+
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
12+
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 4 --num-prompts 20 --output-file llama_405b_results.jsonl
13+
14+
- name: sglang-8192-1024-concurrency8
15+
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
16+
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 8 --num-prompts 32 --output-file llama_405b_results.jsonl
17+
18+
- name: sglang-8192-1024-concurrency16
19+
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
20+
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 16 --num-prompts 48 --output-file llama_405b_results.jsonl
21+
22+
- name: sglang-8192-1024-concurrency24
23+
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
24+
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 24 --num-prompts 72 --output-file llama_405b_results.jsonl
25+
26+
- name: sglang-8192-1024-concurrency32
27+
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
28+
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 32 --num-prompts 96 --output-file llama_405b_results.jsonl

test/srt/experiment_runner.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,11 @@ def format_results(results: List[TaskResult]) -> str:
317317
return "\n".join(output)
318318

319319

320+
def get_bool_env_var(name: str, default: str = "false") -> bool:
321+
value = os.getenv(name, default)
322+
return value.lower() in ("true", "1")
323+
324+
320325
def write_in_github_step_summary(results: List[TaskResult]):
321326
"""Write formatted results to GitHub step summary."""
322327
if not os.environ.get("GITHUB_STEP_SUMMARY"):
@@ -349,7 +354,8 @@ def main():
349354
result = runner.run_task(config)
350355
results.append(result)
351356

352-
write_in_github_step_summary(results)
357+
if get_bool_env_var("SGLANG_IS_IN_CI"):
358+
write_in_github_step_summary(results)
353359
except Exception as e:
354360
logger.error(f"Error: {e}")
355361
raise

test/srt/parse_results.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import json
2+
import pandas as pd
3+
import argparse
4+
import os
5+
from tabulate import tabulate
6+
7+
# Parse command-line arguments
8+
parser = argparse.ArgumentParser(description="Parse JSONL benchmark and summarize.")
9+
parser.add_argument("input_file", type=str, help="Path to input JSONL file")
10+
args = parser.parse_args()
11+
12+
input_file = args.input_file
13+
base_name = os.path.splitext(os.path.basename(input_file))[0]
14+
output_file = f"{base_name}_summary.csv"
15+
16+
fields = [
17+
"max_concurrency",
18+
"output_throughput",
19+
"mean_ttft_ms",
20+
"median_ttft_ms",
21+
"p99_ttft_ms",
22+
"mean_tpot_ms",
23+
"median_tpot_ms",
24+
"p99_tpot_ms",
25+
]
26+
27+
# Read JSONL and parse
28+
results = []
29+
with open(input_file, "r") as f:
30+
for line in f:
31+
data = json.loads(line)
32+
row = {field: data.get(field, None) for field in fields}
33+
max_conc = data.get("max_concurrency")
34+
out_tp = data.get("output_throughput")
35+
row["per_user_throughput"] = out_tp / max_conc if max_conc else None
36+
results.append(row)
37+
38+
# Convert to DataFrame
39+
df = pd.DataFrame(results)
40+
41+
# Save to CSV
42+
df.to_csv(output_file, index=False)
43+
print(f"\nSaved summary to: {output_file}\n")
44+
45+
# Print ASCII table
46+
print(tabulate(df, headers="keys", tablefmt="grid", floatfmt=".3f"))

0 commit comments

Comments
 (0)