Skip to content

Commit d715a1b

Browse files
Jiyuan Zhangfacebook-github-bot
authored andcommitted
add dynamic quantize gemm benchmark [step 2: fp16->int8 quantize] (pytorch#2295)
Summary: - Add FX Kernel benchmark for dynamic quantized gemm step-2 - Use `quantize_step` parameter to differentiate different stages - Separate Net modules for step-2 vs step-1 -- Differential Revision: D52136852
1 parent 2a7b3ab commit d715a1b

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

fbgemm_gpu/src/qlinear_channelwise/qlinear_channelwise_mtia.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,23 @@ static at::Tensor qlinear_channelwise(
2424
return x;
2525
}
2626

27+
static at::Tensor qlinear_quant(
28+
at::Tensor x,
29+
at::Tensor weight,
30+
at::Tensor bias,
31+
at::Tensor input_scale,
32+
at::Tensor weight_scale,
33+
at::Tensor weight_zero_point,
34+
at::Tensor relu) {
35+
assert(x.options().dtype() == at::kHalf);
36+
assert(weight.options().dtype() == at::kQInt8);
37+
assert(bias.options().dtype() == at::kFloat);
38+
assert(input_scale.options().dtype() == at::kFloat);
39+
assert(weight_scale.options().dtype() == at::kFloat);
40+
assert(weight_zero_point.options().dtype() == at::kQUInt8);
41+
return x;
42+
}
43+
2744
TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
2845
m.def(
2946
"qlinear_channelwise(Tensor x, Tensor weight, Tensor "
@@ -32,4 +49,13 @@ TORCH_LIBRARY_FRAGMENT(fbgemm, m) {
3249
m.impl(
3350
"qlinear_channelwise",
3451
torch::dispatch(c10::DispatchKey::CPU, TORCH_FN(qlinear_channelwise)));
52+
53+
m.def(
54+
"qlinear_quant(Tensor x, Tensor weight, Tensor "
55+
"bias, Tensor input_scale, Tensor weight_scale, Tensor "
56+
"weight_zero_point, Tensor relu) -> Tensor");
57+
58+
m.impl(
59+
"qlinear_quant",
60+
torch::dispatch(c10::DispatchKey::CPU, TORCH_FN(qlinear_quant)));
3561
}

0 commit comments

Comments
 (0)