From 3806ab1805fa14dadf1f94b9692a63157d9786ef Mon Sep 17 00:00:00 2001 From: "Wu, Chunyuan" Date: Tue, 25 Mar 2025 17:11:31 +0800 Subject: [PATCH] Add RECORD_FUNCTION in bmm_cpu, int8 mm, per token quant --- sgl-kernel/csrc/cpu/bmm.cpp | 1 + sgl-kernel/csrc/cpu/gemm_int8.cpp | 3 +++ 2 files changed, 4 insertions(+) diff --git a/sgl-kernel/csrc/cpu/bmm.cpp b/sgl-kernel/csrc/cpu/bmm.cpp index 31a09b111e..337d6d4c67 100644 --- a/sgl-kernel/csrc/cpu/bmm.cpp +++ b/sgl-kernel/csrc/cpu/bmm.cpp @@ -77,6 +77,7 @@ void bmm_kernel_impl( // void bmm_cpu(at::Tensor& out, at::Tensor& mat1, at::Tensor& mat2, bool is_vnni, std::optional& scale) { + RECORD_FUNCTION("sgl-kernel::bmm_cpu", std::vector({out, mat1, mat2})); auto packed_w = is_vnni ? mat2 : convert_weight_packed(mat2); diff --git a/sgl-kernel/csrc/cpu/gemm_int8.cpp b/sgl-kernel/csrc/cpu/gemm_int8.cpp index f9931ada05..113e6ff047 100644 --- a/sgl-kernel/csrc/cpu/gemm_int8.cpp +++ b/sgl-kernel/csrc/cpu/gemm_int8.cpp @@ -249,6 +249,8 @@ void int8_scaled_mm_kernel_impl( } // anonymous namespace std::tuple per_token_quant_int8_cpu(at::Tensor& A) { + RECORD_FUNCTION("sgl-kernel::per_token_quant_int8_cpu", std::vector({A})); + CHECK_LAST_DIM_CONTIGUOUS_INPUT(A); CHECK_DIM(2, A); @@ -294,6 +296,7 @@ std::tuple per_token_quant_int8_cpu(at::Tensor& A) { at::Tensor int8_scaled_mm_cpu(at::Tensor& mat1, at::Tensor& mat2, at::Tensor& scales1, at::Tensor& scales2, std::optional& bias, at::ScalarType out_dtype, bool is_vnni) { + RECORD_FUNCTION("sgl-kernel::int8_scaled_mm_cpu", std::vector({mat1, mat2, scales1, scales2, bias})); auto packed_w = is_vnni ? mat2 : convert_weight_packed(mat2);