|
| 1 | +tasks: |
| 2 | + - name: sglang-8192-1024-concurrency1 |
| 3 | + server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8 |
| 4 | + client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 1 --num-prompts 5 --output-file llama_405b_results.jsonl |
| 5 | + |
| 6 | + - name: sglang-8192-1024-concurrency2 |
| 7 | + server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8 |
| 8 | + client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 2 --num-prompts 10 --output-file llama_405b_results.jsonl |
| 9 | + |
| 10 | + - name: sglang-8192-1024-concurrency4 |
| 11 | + server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8 |
| 12 | + client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 4 --num-prompts 20 --output-file llama_405b_results.jsonl |
| 13 | + |
| 14 | + - name: sglang-8192-1024-concurrency8 |
| 15 | + server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8 |
| 16 | + client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 8 --num-prompts 32 --output-file llama_405b_results.jsonl |
| 17 | + |
| 18 | + - name: sglang-8192-1024-concurrency16 |
| 19 | + server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8 |
| 20 | + client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 16 --num-prompts 48 --output-file llama_405b_results.jsonl |
| 21 | + |
| 22 | + - name: sglang-8192-1024-concurrency24 |
| 23 | + server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8 |
| 24 | + client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 24 --num-prompts 72 --output-file llama_405b_results.jsonl |
| 25 | + |
| 26 | + - name: sglang-8192-1024-concurrency32 |
| 27 | + server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8 |
| 28 | + client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 32 --num-prompts 96 --output-file llama_405b_results.jsonl |
0 commit comments