pytorch · zhaozhul · Apr 4, 2025
diff --git a/fbgemm_gpu/experimental/gen_ai/src/quantize/quantize.cu b/fbgemm_gpu/experimental/gen_ai/src/quantize/quantize.cu
@@ -1109,8 +1109,9 @@ std::vector<at::Tensor> quantize_fp8_per_row(
       "Invalid dim. The dim of input should be greater than or equal to 2");
   TORCH_CHECK(
       input.scalar_type() == torch::kBFloat16 ||
-          input.scalar_type() == torch::kFloat,
-      "Invalid datatype. input must be BF16 or FP32");
+          input.scalar_type() == torch::kFloat ||
+          input.scalar_type() == torch::kHalf,
+      "Invalid datatype. input must be BF16, FP16 or FP32");
   TORCH_CHECK(
       !stochastic_rounding || input.size(-1) % 4 == 0,
       "input row dim must be 4's multiple when stochastic_rounding is True");