Skip to content

[minor] cleanup cmakelists.txt #5420

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/pr-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,6 @@ jobs:
timeout-minutes: 10
run: |
cd test/srt
USE_VLLM_CUSTOM_ALLREDUCE=1 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1

python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1

- name: Benchmark single latency + torch.compile (TP=2)
Expand Down
28 changes: 17 additions & 11 deletions sgl-kernel/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ project(sgl-kernel LANGUAGES CXX CUDA)
# CMake
cmake_policy(SET CMP0169 OLD)
include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
set(CMAKE_COLOR_DIAGNOSTICS ON)
set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_SHARED_LIBRARY_PREFIX "")

# Python
find_package(Python COMPONENTS Interpreter Development.Module ${SKBUILD_SABI_COMPONENT} REQUIRED)
Expand Down Expand Up @@ -82,8 +86,6 @@ include_directories(
${PROJECT_SOURCE_DIR}/csrc
${repo-cutlass_SOURCE_DIR}/include
${repo-cutlass_SOURCE_DIR}/tools/util/include
${repo-cutlass_SOURCE_DIR}/examples/77_blackwell_fmha
${repo-cutlass_SOURCE_DIR}/examples/common
${repo-flashinfer_SOURCE_DIR}/include
${repo-flashinfer_SOURCE_DIR}/csrc
)
Expand All @@ -109,6 +111,8 @@ set(SGL_KERNEL_CUDA_FLAGS
"--expt-relaxed-constexpr"
"--expt-extended-lambda"
"--threads=32"

# Supress warnings
"-Xcompiler=-Wconversion"
"-Xcompiler=-fno-strict-aliasing"

Expand Down Expand Up @@ -208,17 +212,19 @@ Python_add_library(common_ops MODULE USE_SABI ${SKBUILD_SABI_VERSION} WITH_SOABI

target_compile_options(common_ops PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${SGL_KERNEL_CUDA_FLAGS}>)
target_include_directories(common_ops PRIVATE
${TORCH_INCLUDE_DIRS}
${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/src)
${repo-cutlass_SOURCE_DIR}/examples/77_blackwell_fmha
${repo-cutlass_SOURCE_DIR}/examples/common
${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/src
)
target_link_libraries(common_ops PRIVATE ${TORCH_LIBRARIES} c10 cuda cublas cublasLt)

target_compile_definitions(common_ops PRIVATE
FLASHATTENTION_DISABLE_BACKWARD
FLASHATTENTION_DISABLE_DROPOUT
FLASHATTENTION_DISABLE_UNEVEN_K
)
FLASHATTENTION_DISABLE_BACKWARD
FLASHATTENTION_DISABLE_DROPOUT
FLASHATTENTION_DISABLE_UNEVEN_K
)

install(TARGETS common_ops LIBRARY DESTINATION "sgl_kernel")
install(TARGETS common_ops LIBRARY DESTINATION sgl_kernel)

# ============================ Optional Install ============================= #
# set flash-attention sources file
Expand Down Expand Up @@ -279,8 +285,8 @@ if (SGL_KERNEL_ENABLE_FA3)

target_compile_options(flash_ops PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${SGL_FLASH_KERNEL_CUDA_FLAGS}>)
target_include_directories(flash_ops PRIVATE
${TORCH_INCLUDE_DIRS}
${repo-flash-attention_SOURCE_DIR}/hopper)
${repo-flash-attention_SOURCE_DIR}/hopper
)
target_link_libraries(flash_ops PRIVATE ${TORCH_LIBRARIES} c10 cuda)

install(TARGETS flash_ops LIBRARY DESTINATION "sgl_kernel")
Expand Down
2 changes: 2 additions & 0 deletions sgl-kernel/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ docker run --rm \
${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core && \
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX' && \
export CUDA_VERSION=${CUDA_VERSION} && \
export CMAKE_BUILD_PARALLEL_LEVEL=96
export MAX_JOBS=96
mkdir -p /usr/lib/x86_64-linux-gnu/ && \
ln -s /usr/local/cuda-${CUDA_VERSION}/targets/x86_64-linux/lib/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so && \
cd /sgl-kernel && \
Expand Down
Loading