Skip to content

Commit 1d6bce2

Browse files
q10facebook-github-bot
authored andcommitted
Break up quantize ops tests (pytorch#2260)
Summary: Pull Request resolved: pytorch#2260 - Break up quantize ops tests into multiple files for easier maintenance Reviewed By: jianyuh Differential Revision: D52646900 fbshipit-source-id: 107b96198b2b5d02b079d20d2885dfac024a3c63
1 parent 7cee6e3 commit 1d6bce2

13 files changed

+1664
-1451
lines changed

.github/scripts/fbgemm_gpu_test.bash

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,13 @@ run_python_test () {
3535
if exec_with_retries 2 conda run --no-capture-output ${env_prefix} python -m pytest -v -rsx -s -W ignore::pytest.PytestCollectionWarning "${python_test_file}"; then
3636
echo "[TEST] Python test suite PASSED: ${python_test_file}"
3737
echo ""
38+
echo ""
39+
echo ""
3840
else
3941
echo "[TEST] Python test suite FAILED: ${python_test_file}"
4042
echo ""
43+
echo ""
44+
echo ""
4145
return 1
4246
fi
4347
}
@@ -80,20 +84,20 @@ run_fbgemm_gpu_tests () {
8084

8185
# These are either non-tests or currently-broken tests in both FBGEMM_GPU and FBGEMM_GPU-CPU
8286
local files_to_skip=(
83-
test_utils.py
84-
split_table_batched_embeddings_test.py
85-
ssd_split_table_batched_embeddings_test.py
87+
./test_utils.py
88+
./split_table_batched_embeddings_test.py
89+
./ssd_split_table_batched_embeddings_test.py
8690
)
8791

8892
if [ "$fbgemm_variant" == "cpu" ]; then
8993
# These are tests that are currently broken in FBGEMM_GPU-CPU
9094
local ignored_tests=(
91-
uvm_test.py
95+
./uvm_test.py
9296
)
9397
elif [ "$fbgemm_variant" == "rocm" ]; then
9498
# https://github.com/pytorch/FBGEMM/issues/1559
9599
local ignored_tests=(
96-
batched_unary_embeddings_test.py
100+
./batched_unary_embeddings_test.py
97101
)
98102
else
99103
local ignored_tests=()
@@ -108,11 +112,14 @@ run_fbgemm_gpu_tests () {
108112
(test_python_import_package "${env_name}" fbgemm_gpu.split_embedding_codegen_lookup_invokers) || return 1
109113

110114
echo "[TEST] Enumerating test files ..."
111-
print_exec ls -lth ./*.py
115+
# shellcheck disable=SC2155
116+
local all_test_files=$(find . -type f -name '*_test.py' -print | sort)
117+
for f in $all_test_files; do echo "$f"; done
118+
echo ""
112119

113120
# NOTE: Tests running on single CPU core with a less powerful testing GPU in
114121
# GHA can take up to 5 hours.
115-
for test_file in *.py; do
122+
for test_file in $all_test_files; do
116123
if echo "${files_to_skip[@]}" | grep "${test_file}"; then
117124
echo "[TEST] Skipping test file known to be broken: ${test_file}"
118125
elif echo "${ignored_tests[@]}" | grep "${test_file}"; then

fbgemm_gpu/test/failures_dict.json

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,37 +2,6 @@
22
"_description": "This is a dict containing failures for tests autogenerated by generate_opcheck_tests. For more details, please see https://docs.google.com/document/d/1Pj5HRZvdOq3xpFpbEjUZp2hBovhy7Wnxw14m6lF2154/edit",
33
"_version": 1,
44
"data": {
5-
"fbgemm::FP8RowwiseQuantizedToFloat": {},
6-
"fbgemm::FloatToFP8RowwiseQuantized": {
7-
"TestFP8RowwiseQuantizationConversion.test_aot_dispatch_dynamic__test_quantize_and_dequantize_op_fp8_rowwise": {
8-
"comment": "",
9-
"status": "xsuccess"
10-
},
11-
"TestFP8RowwiseQuantizationConversion.test_faketensor__test_quantize_and_dequantize_op_fp8_rowwise": {
12-
"comment": "",
13-
"status": "xsuccess"
14-
}
15-
},
16-
"fbgemm::FloatToPaddedFP8RowwiseQuantized": {
17-
"TestFP8RowwiseQuantizationConversion.test_aot_dispatch_dynamic__test_quantize_and_dequantize_op_padded_fp8_rowwise": {
18-
"comment": "",
19-
"status": "xfail"
20-
},
21-
"TestFP8RowwiseQuantizationConversion.test_faketensor__test_quantize_and_dequantize_op_padded_fp8_rowwise": {
22-
"comment": "",
23-
"status": "xfail"
24-
}
25-
},
26-
"fbgemm::PaddedFP8RowwiseQuantizedToFloat": {
27-
"TestFP8RowwiseQuantizationConversion.test_aot_dispatch_dynamic__test_quantize_and_dequantize_op_padded_fp8_rowwise": {
28-
"comment": "",
29-
"status": "xfail"
30-
},
31-
"TestFP8RowwiseQuantizationConversion.test_faketensor__test_quantize_and_dequantize_op_padded_fp8_rowwise": {
32-
"comment": "",
33-
"status": "xfail"
34-
}
35-
},
365
"fbgemm::asynchronous_complete_cumsum": {},
376
"fbgemm::asynchronous_exclusive_cumsum": {},
387
"fbgemm::asynchronous_inclusive_cumsum": {},

fbgemm_gpu/test/failures_dict_fast.json

Lines changed: 0 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -2,84 +2,6 @@
22
"_description": "This is a dict containing failures for tests autogenerated by generate_opcheck_tests. For more details, please see https://docs.google.com/document/d/1Pj5HRZvdOq3xpFpbEjUZp2hBovhy7Wnxw14m6lF2154/edit",
33
"_version": 1,
44
"data": {
5-
"fbgemm::FloatToFused8BitRowwiseQuantized": {
6-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_cpu_int8": {
7-
"comment": "",
8-
"status": "xfail"
9-
},
10-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_fused_pooled_emb_quant": {
11-
"comment": "",
12-
"status": "xfail"
13-
},
14-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_gpu_no_cache_int8": {
15-
"comment": "",
16-
"status": "xfail"
17-
},
18-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_gpu_uvm_cache_int8": {
19-
"comment": "",
20-
"status": "xfail"
21-
},
22-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_fused_pooled_emb_quant": {
23-
"comment": "",
24-
"status": "xfail"
25-
}
26-
},
27-
"fbgemm::FloatToFusedNBitRowwiseQuantizedSBHalf": {
28-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_fused_pooled_emb_quant": {
29-
"comment": "",
30-
"status": "xfail"
31-
}
32-
},
33-
"fbgemm::FloatToHFP8Quantized": {
34-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_cpu": {
35-
"comment": "",
36-
"status": "xfail"
37-
},
38-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_gpu_no_cache": {
39-
"comment": "",
40-
"status": "xfail"
41-
},
42-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_gpu_no_cache_fp8_2048": {
43-
"comment": "",
44-
"status": "xfail"
45-
}
46-
},
47-
"fbgemm::Fused8BitRowwiseQuantizedToFloat": {
48-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_cpu_int8": {
49-
"comment": "",
50-
"status": "xfail"
51-
},
52-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_fused_pooled_emb_quant": {
53-
"comment": "",
54-
"status": "xfail"
55-
},
56-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_gpu_no_cache_int8": {
57-
"comment": "",
58-
"status": "xfail"
59-
},
60-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_forward_gpu_uvm_cache_int8": {
61-
"comment": "",
62-
"status": "xfail"
63-
},
64-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_fused_pooled_emb_quant": {
65-
"comment": "",
66-
"status": "xfail"
67-
}
68-
},
69-
"fbgemm::HFP8QuantizedToFloat": {
70-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_cpu": {
71-
"comment": "",
72-
"status": "xfail"
73-
},
74-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_gpu_no_cache": {
75-
"comment": "",
76-
"status": "xfail"
77-
},
78-
"SplitTableBatchedEmbeddingsTest.test_faketensor__test_nbit_forward_gpu_no_cache_fp8_2048": {
79-
"comment": "",
80-
"status": "xfail"
81-
}
82-
},
835
"fbgemm::asynchronous_complete_cumsum": {},
846
"fbgemm::bounds_check_indices": {},
857
"fbgemm::dense_embedding_codegen_lookup_function": {
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
import unittest
8+
from ctypes import c_float, c_int32, cast, POINTER, pointer
9+
from typing import Tuple
10+
11+
import hypothesis.strategies as st
12+
import numpy as np
13+
import torch
14+
from hypothesis import given, HealthCheck, settings
15+
16+
17+
try:
18+
# pyre-ignore[21]
19+
from fbgemm_gpu import open_source # noqa: F401
20+
21+
except Exception:
22+
if torch.version.hip:
23+
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_hip")
24+
else:
25+
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops")
26+
27+
torch.ops.load_library("//deeplearning/fbgemm/fbgemm_gpu:sparse_ops_cpu")
28+
29+
30+
class SparseNNOperatorsGPUTest(unittest.TestCase):
31+
# pyre-fixme[56]: Pyre was not able to infer the type of argument
32+
# `hypothesis.strategies.sampled_from(["BF16"])` to decorator factory
33+
# `hypothesis.given`.
34+
@given(
35+
precision=st.just("BF16"),
36+
batch_size=st.integers(min_value=1, max_value=256),
37+
k=st.integers(min_value=2, max_value=2),
38+
n=st.integers(min_value=2, max_value=2),
39+
)
40+
def test_dense_mlp_quantize_ops(
41+
self, precision: str, batch_size: int, k: int, n: int
42+
) -> None:
43+
if precision == "BF16":
44+
input_data = torch.rand((n, k), dtype=torch.float32)
45+
quantized_data = torch.ops.fbgemm.FloatToBfloat16Quantized(input_data)
46+
dequantized_data = torch.ops.fbgemm.Bfloat16QuantizedToFloat(quantized_data)
47+
torch.testing.assert_close(
48+
dequantized_data, input_data, rtol=1e-2, atol=1e-2
49+
)
50+
51+
52+
def bfloat_quantize(x_float: float) -> np.uint16:
53+
bits = cast(pointer(c_float(x_float)), POINTER(c_int32)).contents.value
54+
bits += 1 << 15
55+
bits = bits >> 16
56+
bits = np.uint16(bits)
57+
return bits
58+
59+
60+
def bfloat_dequantize(x_bfloat: np.uint16) -> float:
61+
bits = np.int32(x_bfloat) << 16
62+
return cast(pointer(c_int32(bits)), POINTER(c_float)).contents.value
63+
64+
65+
class TestBfloat16QuantizationConversion(unittest.TestCase):
66+
# pyre-fixme[56]: Pyre was not able to infer the type of argument
67+
# `hypothesis.strategies.integers($parameter$min_value = 0, $parameter$max_value =
68+
# 100)` to decorator factory `hypothesis.given`.
69+
@given(
70+
nrows=st.integers(min_value=0, max_value=100),
71+
ncols=st.integers(min_value=0, max_value=100),
72+
)
73+
@settings(deadline=10000, suppress_health_check=[HealthCheck.filter_too_much])
74+
def test_quantize_op(self, nrows: int, ncols: int) -> None:
75+
input_data = torch.rand(nrows, ncols).float()
76+
quantized_data = torch.ops.fbgemm.FloatToBfloat16Quantized(input_data)
77+
if nrows == 0 or ncols == 0:
78+
assert quantized_data.numel() == 0
79+
return
80+
f = np.vectorize(lambda x: bfloat_quantize(x))
81+
reference = f(input_data.numpy())
82+
quantized_data_uint16 = quantized_data.numpy()
83+
quantized_data_uint16.dtype = np.uint16
84+
np.testing.assert_array_almost_equal(quantized_data_uint16, reference)
85+
86+
if torch.cuda.is_available():
87+
input_data_gpu = input_data.cuda()
88+
quantized_data_gpu = torch.ops.fbgemm.FloatToBfloat16Quantized(
89+
input_data_gpu
90+
)
91+
quantized_data_numpy = quantized_data_gpu.cpu().numpy()
92+
quantized_data_numpy.dtype = np.uint16
93+
np.testing.assert_allclose(quantized_data_numpy, reference)
94+
95+
# pyre-fixme[56]: Pyre was not able to infer the type of argument
96+
# `hypothesis.strategies.integers($parameter$min_value = 0, $parameter$max_value =
97+
# 100)` to decorator factory `hypothesis.given`.
98+
@given(
99+
nrows=st.integers(min_value=0, max_value=100),
100+
ncols=st.integers(min_value=0, max_value=100),
101+
)
102+
@settings(deadline=10000, suppress_health_check=[HealthCheck.filter_too_much])
103+
def test_quantize_and_dequantize_op(self, nrows: int, ncols: int) -> None:
104+
input_data = torch.rand(nrows, ncols).float()
105+
quantized_data = torch.ops.fbgemm.FloatToBfloat16Quantized(input_data)
106+
dequantized_data = torch.ops.fbgemm.Bfloat16QuantizedToFloat(quantized_data)
107+
if nrows == 0 or ncols == 0:
108+
assert dequantized_data.numel() == 0
109+
return
110+
f = np.vectorize(lambda x: bfloat_quantize(x))
111+
ref_bfloat16 = f(input_data.numpy())
112+
f = np.vectorize(lambda x: bfloat_dequantize(x))
113+
ref_fp32 = torch.from_numpy(f(ref_bfloat16)).float()
114+
torch.testing.assert_close(dequantized_data, ref_fp32)
115+
116+
if torch.cuda.is_available():
117+
input_data_gpu = input_data.cuda()
118+
quantized_data_gpu = torch.ops.fbgemm.FloatToBfloat16Quantized(
119+
input_data_gpu
120+
)
121+
dequantized_data_gpu = torch.ops.fbgemm.Bfloat16QuantizedToFloat(
122+
quantized_data_gpu
123+
)
124+
# compare quantized data
125+
torch.testing.assert_close(dequantized_data_gpu.cpu(), ref_fp32)
126+
127+
@unittest.skipIf(not torch.cuda.is_available(), "Skip when CUDA is not available")
128+
# pyre-fixme[56]: Pyre was not able to infer the type of argument
129+
# `hypothesis.strategies.sampled_from([(65540, 256), (256, 65540)])` to decorator
130+
# factory `hypothesis.given`.
131+
@given(
132+
ncols_nrows=st.sampled_from([(65540, 256), (256, 65540)]),
133+
)
134+
@settings(deadline=10000, suppress_health_check=[HealthCheck.filter_too_much])
135+
def test_quantize_and_dequantize_op_cuda_large_nrows_bf16(
136+
self, ncols_nrows: Tuple[int, int]
137+
) -> None:
138+
ncols, nrows = ncols_nrows
139+
input_data = torch.rand(nrows, ncols).float()
140+
quantized_data = torch.ops.fbgemm.FloatToBfloat16Quantized(input_data)
141+
dequantized_data = torch.ops.fbgemm.Bfloat16QuantizedToFloat(quantized_data)
142+
143+
if torch.cuda.is_available():
144+
input_data_gpu = input_data.cuda()
145+
quantized_data_gpu = torch.ops.fbgemm.FloatToBfloat16Quantized(
146+
input_data_gpu
147+
)
148+
dequantized_data_gpu = torch.ops.fbgemm.Bfloat16QuantizedToFloat(
149+
quantized_data_gpu
150+
)
151+
# compare quantized data
152+
torch.testing.assert_close(dequantized_data_gpu.cpu(), dequantized_data)
153+
154+
155+
if __name__ == "__main__":
156+
unittest.main()
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"_description": "This is a dict containing failures for tests autogenerated by generate_opcheck_tests. For more details, please see https://docs.google.com/document/d/1Pj5HRZvdOq3xpFpbEjUZp2hBovhy7Wnxw14m6lF2154/edit",
3+
"_version": 1,
4+
"data": {
5+
"fbgemm::FP8RowwiseQuantizedToFloat": {},
6+
"fbgemm::FloatToFP8RowwiseQuantized": {
7+
"TestFP8RowwiseQuantizationConversion.test_aot_dispatch_dynamic__test_quantize_and_dequantize_op_fp8_rowwise": {
8+
"comment": "",
9+
"status": "xsuccess"
10+
},
11+
"TestFP8RowwiseQuantizationConversion.test_faketensor__test_quantize_and_dequantize_op_fp8_rowwise": {
12+
"comment": "",
13+
"status": "xsuccess"
14+
}
15+
},
16+
"fbgemm::FloatToPaddedFP8RowwiseQuantized": {
17+
"TestFP8RowwiseQuantizationConversion.test_aot_dispatch_dynamic__test_quantize_and_dequantize_op_padded_fp8_rowwise": {
18+
"comment": "",
19+
"status": "xfail"
20+
},
21+
"TestFP8RowwiseQuantizationConversion.test_faketensor__test_quantize_and_dequantize_op_padded_fp8_rowwise": {
22+
"comment": "",
23+
"status": "xfail"
24+
}
25+
},
26+
"fbgemm::PaddedFP8RowwiseQuantizedToFloat": {
27+
"TestFP8RowwiseQuantizationConversion.test_aot_dispatch_dynamic__test_quantize_and_dequantize_op_padded_fp8_rowwise": {
28+
"comment": "",
29+
"status": "xfail"
30+
},
31+
"TestFP8RowwiseQuantizationConversion.test_faketensor__test_quantize_and_dequantize_op_padded_fp8_rowwise": {
32+
"comment": "",
33+
"status": "xfail"
34+
}
35+
}
36+
}
37+
}

0 commit comments

Comments
 (0)