25
25
accuracy-test-1-gpu-amd :
26
26
if : (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
27
27
github.event.pull_request.draft == false
28
- runs-on : linux-mi300-gpu-1
28
+ strategy :
29
+ matrix :
30
+ runner : [linux-mi300-gpu-1, linux-mi325-gpu-1]
31
+ runs-on : ${{matrix.runner}}
29
32
steps :
30
33
- name : Checkout code
31
34
uses : actions/checkout@v4
@@ -38,12 +41,12 @@ jobs:
38
41
else
39
42
DEVICE_FLAG="--device /dev/dri"
40
43
fi
41
- docker pull ghcr.io/saienduri/ sglang-aiter- v0.1.1:428
44
+ docker pull lmsysorg/ sglang: v0.4.6.post3-rocm630
42
45
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
43
46
-v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
44
47
--cap-add=SYS_PTRACE -e HF_TOKEN=${HF_TOKEN} --security-opt seccomp=unconfined \
45
48
-w /sglang-checkout --name ci_sglang \
46
- ghcr.io/saienduri/ sglang-aiter- v0.1.1:428
49
+ lmsysorg/ sglang: v0.4.6.post3-rocm630
47
50
48
51
- name : Install dependencies
49
52
run : |
@@ -66,10 +69,54 @@ jobs:
66
69
docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 ci_sglang python3 test_eval_fp8_accuracy.py
67
70
docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 ci_sglang python3 models/test_qwen_models.py
68
71
72
+ accuracy-test-2-gpu-amd :
73
+ if : (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
74
+ github.event.pull_request.draft == false
75
+ strategy :
76
+ matrix :
77
+ runner : [linux-mi300-gpu-2, linux-mi325-gpu-2]
78
+ runs-on : ${{matrix.runner}}
79
+ steps :
80
+ - name : Checkout code
81
+ uses : actions/checkout@v4
82
+
83
+ - name : Setup docker
84
+ run : |
85
+ # Ensure GPU isolation if pod is part of kubernetes setup with DEVICE_FLAG.
86
+ if [ -f "/etc/podinfo/gha-render-devices" ]; then
87
+ DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
88
+ else
89
+ DEVICE_FLAG="--device /dev/dri"
90
+ fi
91
+ docker pull lmsysorg/sglang:v0.4.6.post3-rocm630
92
+ docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
93
+ -v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
94
+ --cap-add=SYS_PTRACE -e HF_TOKEN=${HF_TOKEN} --security-opt seccomp=unconfined \
95
+ -w /sglang-checkout --name ci_sglang \
96
+ lmsysorg/sglang:v0.4.6.post3-rocm630
97
+
98
+ - name : Install dependencies
99
+ run : |
100
+ docker exec ci_sglang pip install --upgrade pip
101
+ docker exec ci_sglang pip uninstall sgl-kernel -y || true
102
+ docker exec -w /sglang-checkout/sgl-kernel ci_sglang bash -c "rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install"
103
+ docker exec ci_sglang pip install -e "python[dev_hip]"
104
+
105
+ docker exec -w / ci_sglang git clone https://github.com/merrymercy/human-eval.git
106
+ docker exec -w /human-eval ci_sglang pip install -e .
107
+
108
+ - name : Evaluate accuracy (TP=2)
109
+ timeout-minutes : 20
110
+ run : |
111
+ docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 ci_sglang python3 test_moe_eval_accuracy_large.py
112
+
69
113
mla-test-1-gpu-amd :
70
114
if : (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
71
115
github.event.pull_request.draft == false
72
- runs-on : linux-mi300-gpu-1
116
+ strategy :
117
+ matrix :
118
+ runner : [linux-mi300-gpu-1, linux-mi325-gpu-1]
119
+ runs-on : ${{matrix.runner}}
73
120
steps :
74
121
- name : Checkout code
75
122
uses : actions/checkout@v4
@@ -82,12 +129,12 @@ jobs:
82
129
else
83
130
DEVICE_FLAG="--device /dev/dri"
84
131
fi
85
- docker pull ghcr.io/saienduri/ sglang-aiter- v0.1.1:428
132
+ docker pull lmsysorg/ sglang: v0.4.6.post3-rocm630
86
133
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
87
134
-v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
88
135
--cap-add=SYS_PTRACE -e HF_TOKEN=${{ secrets.AMD_HF_TOKEN }} --security-opt seccomp=unconfined \
89
136
-w /sglang-checkout --name ci_sglang \
90
- ghcr.io/saienduri/ sglang-aiter- v0.1.1:428
137
+ lmsysorg/ sglang: v0.4.6.post3-rocm630
91
138
92
139
- name : Install dependencies
93
140
run : |
@@ -104,10 +151,126 @@ jobs:
104
151
run : |
105
152
docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 ci_sglang python3 test_mla.py
106
153
154
+ performance-test-1-gpu-part-1-amd :
155
+ if : (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
156
+ github.event.pull_request.draft == false
157
+ strategy :
158
+ matrix :
159
+ runner : [linux-mi300-gpu-1, linux-mi325-gpu-1]
160
+ runs-on : ${{matrix.runner}}
161
+ steps :
162
+ - name : Checkout code
163
+ uses : actions/checkout@v4
164
+
165
+ - name : Setup docker
166
+ run : |
167
+ # Ensure GPU isolation if pod is part of kubernetes setup with DEVICE_FLAG.
168
+ if [ -f "/etc/podinfo/gha-render-devices" ]; then
169
+ DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
170
+ else
171
+ DEVICE_FLAG="--device /dev/dri"
172
+ fi
173
+ docker pull lmsysorg/sglang:v0.4.6.post3-rocm630
174
+ docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
175
+ -v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
176
+ --cap-add=SYS_PTRACE -e HF_TOKEN=${HF_TOKEN} --security-opt seccomp=unconfined \
177
+ -w /sglang-checkout --name ci_sglang \
178
+ lmsysorg/sglang:v0.4.6.post3-rocm630
179
+
180
+ - name : Install dependencies
181
+ run : |
182
+ docker exec ci_sglang pip install --upgrade pip
183
+ docker exec ci_sglang pip uninstall sgl-kernel -y || true
184
+ docker exec -w /sglang-checkout/sgl-kernel ci_sglang bash -c "rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install"
185
+ docker exec ci_sglang pip install -e "python[dev_hip]"
186
+
187
+ docker exec -w / ci_sglang git clone https://github.com/merrymercy/human-eval.git
188
+ docker exec -w /human-eval ci_sglang pip install -e .
189
+
190
+ - name : Benchmark single latency
191
+ timeout-minutes : 10
192
+ run : |
193
+ docker exec -w /sglang-checkout/test/srt -e SGLANG_AMD_CI=1 -e SGLANG_IS_IN_CI=1 ci_sglang python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_small
194
+ docker exec -w /sglang-checkout/test/srt -e SGLANG_AMD_CI=1 -e SGLANG_IS_IN_CI=1 ci_sglang python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_default
195
+
196
+ - name : Benchmark online latency
197
+ timeout-minutes : 10
198
+ run : |
199
+ docker exec -w /sglang-checkout/test/srt -e SGLANG_AMD_CI=1 -e SGLANG_IS_IN_CI=1 ci_sglang python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_default
200
+
201
+ - name : Benchmark offline throughput
202
+ timeout-minutes : 10
203
+ run : |
204
+ docker exec -w /sglang-checkout/test/srt -e SGLANG_AMD_CI=1 -e SGLANG_IS_IN_CI=1 ci_sglang python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default
205
+
206
+ - name : Benchmark offline throughput (Non-streaming, small batch size)
207
+ timeout-minutes : 10
208
+ run : |
209
+ docker exec -w /sglang-checkout/test/srt -e SGLANG_AMD_CI=1 -e SGLANG_IS_IN_CI=1 ci_sglang python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
210
+
211
+ - name : Benchmark online latency (EAGLE)
212
+ timeout-minutes : 10
213
+ run : |
214
+ docker exec -w /sglang-checkout/test/srt -e SGLANG_AMD_CI=1 -e SGLANG_IS_IN_CI=1 ci_sglang python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_eagle
215
+
216
+ performance-test-1-gpu-part-2-amd :
217
+ if : (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
218
+ github.event.pull_request.draft == false
219
+ strategy :
220
+ matrix :
221
+ runner : [linux-mi300-gpu-1, linux-mi325-gpu-1]
222
+ runs-on : ${{matrix.runner}}
223
+ steps :
224
+ - name : Checkout code
225
+ uses : actions/checkout@v4
226
+
227
+ - name : Setup docker
228
+ run : |
229
+ # Ensure GPU isolation if pod is part of kubernetes setup with DEVICE_FLAG.
230
+ if [ -f "/etc/podinfo/gha-render-devices" ]; then
231
+ DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
232
+ else
233
+ DEVICE_FLAG="--device /dev/dri"
234
+ fi
235
+ docker pull lmsysorg/sglang:v0.4.6.post3-rocm630
236
+ docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
237
+ -v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
238
+ --cap-add=SYS_PTRACE -e HF_TOKEN=${HF_TOKEN} --security-opt seccomp=unconfined \
239
+ -w /sglang-checkout --name ci_sglang \
240
+ lmsysorg/sglang:v0.4.6.post3-rocm630
241
+
242
+ - name : Install dependencies
243
+ run : |
244
+ docker exec ci_sglang pip install --upgrade pip
245
+ docker exec ci_sglang pip uninstall sgl-kernel -y || true
246
+ docker exec -w /sglang-checkout/sgl-kernel ci_sglang bash -c "rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install"
247
+ docker exec ci_sglang pip install -e "python[dev_hip]"
248
+
249
+ docker exec -w / ci_sglang git clone https://github.com/merrymercy/human-eval.git
250
+ docker exec -w /human-eval ci_sglang pip install -e .
251
+
252
+ - name : Benchmark offline throughput (w/o RadixAttention)
253
+ timeout-minutes : 10
254
+ run : |
255
+ docker exec -w /sglang-checkout/test/srt -e SGLANG_AMD_CI=1 -e SGLANG_IS_IN_CI=1 ci_sglang python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_without_radix_cache
256
+
257
+ - name : Benchmark offline throughput (w/ Triton)
258
+ timeout-minutes : 10
259
+ run : |
260
+ docker exec -w /sglang-checkout/test/srt -e SGLANG_AMD_CI=1 -e SGLANG_IS_IN_CI=1 ci_sglang python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_with_triton_attention_backend
261
+
262
+ - name : Benchmark offline throughput (w/ FP8)
263
+ timeout-minutes : 10
264
+ run : |
265
+ docker exec -w /sglang-checkout/test/srt -e SGLANG_AMD_CI=1 -e SGLANG_IS_IN_CI=1 ci_sglang python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8
266
+
107
267
bench-test-2-gpu-amd :
108
268
if : (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
109
269
github.event.pull_request.draft == false
110
- runs-on : linux-mi300-gpu-2
270
+ strategy :
271
+ matrix :
272
+ runner : [linux-mi300-gpu-2, linux-mi325-gpu-2]
273
+ runs-on : ${{matrix.runner}}
111
274
steps :
112
275
- name : Checkout code
113
276
uses : actions/checkout@v4
@@ -120,12 +283,12 @@ jobs:
120
283
else
121
284
DEVICE_FLAG="--device /dev/dri"
122
285
fi
123
- docker pull ghcr.io/saienduri/ sglang-aiter- v0.1.1:428
286
+ docker pull lmsysorg/ sglang: v0.4.6.post3-rocm630
124
287
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
125
288
-v ${{ github.workspace }}:/sglang-checkout --ipc=host --group-add video \
126
289
--cap-add=SYS_PTRACE -e HF_TOKEN=${HF_TOKEN} --security-opt seccomp=unconfined \
127
290
-w /sglang-checkout --name ci_sglang \
128
- ghcr.io/saienduri/ sglang-aiter- v0.1.1:428
291
+ lmsysorg/ sglang: v0.4.6.post3-rocm630
129
292
130
293
- name : Install dependencies
131
294
run : |
@@ -141,15 +304,36 @@ jobs:
141
304
mkdir -p dummy-grok && wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json -O dummy-grok/config.json
142
305
docker cp ./dummy-grok ci_sglang:/
143
306
144
- - name : Evaluate Benchmark
307
+ - name : Benchmark dummy grok (TP=2)
145
308
timeout-minutes : 20
146
309
run : |
147
310
docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 ci_sglang python3 models/test_dummy_grok_models.py
148
311
312
+ - name : Benchmark single latency (TP=2)
313
+ timeout-minutes : 20
314
+ run : |
315
+ docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 -e SGLANG_AMD_CI=1 ci_sglang python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
316
+
317
+ - name : Benchmark single latency + torch.compile (TP=2)
318
+ timeout-minutes : 20
319
+ run : |
320
+ docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 ci_sglang python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_torch_compile_tp2_bs1
321
+
322
+ - name : Benchmark offline throughput (TP=2)
323
+ timeout-minutes : 20
324
+ run : |
325
+ docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 -e SGLANG_AMD_CI=1 ci_sglang python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_default
326
+
327
+ - name : Benchmark offline throughput (w/o RadixAttention) (TP=2)
328
+ timeout-minutes : 20
329
+ run : |
330
+ docker exec -w /sglang-checkout/test/srt -e SGLANG_IS_IN_CI=1 -e SGLANG_AMD_CI=1 ci_sglang python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
331
+
149
332
finish :
150
333
if : always()
151
334
needs : [
152
- accuracy-test-1-gpu-amd, mla-test-1-gpu-amd, bench-test-2-gpu-amd
335
+ accuracy-test-1-gpu-amd, mla-test-1-gpu-amd, bench-test-2-gpu-amd,
336
+ accuracy-test-2-gpu-amd, performance-test-1-gpu-part-1-amd, performance-test-1-gpu-part-2-amd
153
337
]
154
338
runs-on : ubuntu-latest
155
339
steps :
0 commit comments