changing config for fp8 gemm to match triton 3.2 (pytorch#746)

adamomainz · facebook-github-bot · commit 074b15ac94c1 · 2025-02-21T15:00:36.000-08:00
Summary: X-link: pytorch#3668 Pull Request resolved: facebookresearch/FBGEMM#746 testing for now Reviewed By: karthik-man Differential Revision: D69322680 fbshipit-source-id: b2abbb62e342b7cc2dd848f6894ef1d348ff9e9d
diff --git a/fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py b/fbgemm_gpu/experimental/gemm/triton_gemm/fp8_gemm.py
@@ -2876,7 +2876,7 @@ def get_full_non_persistent_tuning_space():
     # For now we see better perf with num_stages=0 for all gemm configs we care
     # But keep this explicit so that we do not forget we may need to set it to
     # other values in the future
-    num_stage_range = [0]
+    num_stage_range = [2]
     waves_per_eu_range = [0]
     matrix_instr_nonkdim_range = [16, 32]
     kpack_range = [1, 2]