Skip to content

Commit 40c7200

Browse files
zhyncsjimoosciuc
authored andcommitted
fix: solve cu118 issue for cutlass mla (sgl-project#5331)
1 parent dd48f6d commit 40c7200

File tree

3 files changed

+17
-7
lines changed

3 files changed

+17
-7
lines changed

.github/workflows/pr-test-sgl-kernel.yml

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,14 @@ jobs:
3535
runs-on: sgl-kernel-build-node
3636
strategy:
3737
matrix:
38-
python-version: ['3.9']
39-
cuda-version: ['12.4']
40-
38+
include:
39+
- python-version: '3.9'
40+
cuda-version: '11.8'
41+
- python-version: '3.9'
42+
cuda-version: '12.4'
43+
- python-version: '3.9'
44+
cuda-version: '12.8'
45+
name: Build Wheel (CUDA ${{ matrix.cuda-version }})
4146
steps:
4247
- name: Cleanup
4348
run: |
@@ -52,13 +57,14 @@ jobs:
5257
with:
5358
python-version: ${{ matrix.python-version }}
5459

55-
- name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
60+
- name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
5661
run: |
5762
cd sgl-kernel
5863
chmod +x ./build.sh
5964
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
6065
61-
- name: Upload artifacts
66+
- name: Upload artifacts (only for CUDA 12.4)
67+
if: ${{ matrix.cuda-version == '12.4' }}
6268
uses: actions/upload-artifact@v4
6369
with:
6470
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
@@ -128,7 +134,7 @@ jobs:
128134
pip3 uninstall sgl-kernel -y
129135
130136
finish:
131-
needs: [unit-test, mla-test, lint]
137+
needs: [unit-test, mla-test, lint, build-wheels]
132138
runs-on: ubuntu-latest
133139
steps:
134140
- name: Check all dependent job statuses

.github/workflows/release-whl-kernel.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ on:
1414
jobs:
1515
build-wheels:
1616
if: github.repository == 'sgl-project/sglang'
17-
runs-on: ubuntu-latest
17+
runs-on: sgl-kernel-build-node
1818
strategy:
1919
matrix:
2020
python-version: ['3.9']

sgl-kernel/csrc/attention/cutlass_mla_kernel.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ limitations under the License.
2525
#include <device/sm100_mla.hpp>
2626
#include <kernel/sm100_mla_tile_scheduler.hpp>
2727

28+
#if defined CUDA_VERSION && CUDA_VERSION >= 12040
29+
2830
#define CUTLASS_CHECK(status) \
2931
{ \
3032
cutlass::Status error = status; \
@@ -205,3 +207,5 @@ int64_t cutlass_mla_get_workspace_size(int64_t max_seq_len, int64_t num_batches,
205207

206208
return MlaSm100Type::Fmha::get_workspace_size(arguments);
207209
}
210+
211+
#endif

0 commit comments

Comments
 (0)