Skip to content

Commit bc1961d

Browse files
q10facebook-github-bot
authored andcommitted
Re-organize UVM tests (pytorch#2292)
Summary: Pull Request resolved: pytorch#2292 - Re-organize UVM tests Reviewed By: spcyppt Differential Revision: D53151319 fbshipit-source-id: 75847d62dd54f4da4977979496d484c8277792d7
1 parent d6edaab commit bc1961d

File tree

4 files changed

+159
-120
lines changed

4 files changed

+159
-120
lines changed

.github/scripts/fbgemm_gpu_test.bash

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,10 @@ run_fbgemm_gpu_tests () {
8888
)
8989

9090
if [ "$fbgemm_variant" == "cpu" ]; then
91-
# These are tests that are currently broken in FBGEMM_GPU-CPU
91+
# These tests have non-CPU operators referenced in @given
9292
local ignored_tests=(
93-
./uvm_test.py
93+
./uvm/copy_test.py
94+
./uvm/uvm_test.py
9495
)
9596
elif [ "$fbgemm_variant" == "rocm" ]; then
9697
local ignored_tests=(

fbgemm_gpu/test/uvm_cache_miss_emulate_test.cpp renamed to fbgemm_gpu/test/uvm/cache_miss_emulate_test.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ std::pair<at::Tensor, at::Tensor> run_emulate_cache_miss(
4545
return {lxu_cache_location_with_cache_misses.cpu(), uvm_cache_stats.cpu()};
4646
}
4747

48-
TEST(uvm_cache_miss_emulate_test, no_cache_miss) {
48+
TEST(UvmCacheMissEmulateTest, no_cache_miss) {
4949
constexpr int64_t num_requests = 10000;
5050
constexpr int64_t num_sets = 32768;
5151
constexpr int64_t associativity = 32;
@@ -60,7 +60,7 @@ TEST(uvm_cache_miss_emulate_test, no_cache_miss) {
6060
at::equal(lxu_cache_locations_cpu, lxu_cache_location_with_cache_misses));
6161
}
6262

63-
TEST(uvm_cache_miss_emulate_test, enforced_cache_miss) {
63+
TEST(UvmCacheMissEmulateTest, enforced_cache_miss) {
6464
constexpr int64_t num_requests = 10000;
6565
constexpr int64_t num_sets = 32768;
6666
constexpr int64_t associativity = 32;

fbgemm_gpu/test/uvm/copy_test.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
# pyre-ignore-all-errors[56]
9+
10+
import unittest
11+
from typing import List
12+
13+
import fbgemm_gpu
14+
import hypothesis.strategies as st
15+
import torch
16+
from hypothesis import given, settings, Verbosity
17+
18+
# pyre-fixme[16]: Module `fbgemm_gpu` has no attribute `open_source`.
19+
open_source: bool = getattr(fbgemm_gpu, "open_source", False)
20+
21+
if open_source:
22+
# pyre-ignore[21]
23+
from test_utils import gpu_available, gpu_unavailable, skipIfRocm
24+
else:
25+
from fbgemm_gpu.test.test_utils import gpu_available, gpu_unavailable, skipIfRocm
26+
27+
if gpu_available:
28+
# pyre-ignore[21]
29+
from fbgemm_gpu.uvm import cudaMemAdvise, cudaMemoryAdvise, cudaMemPrefetchAsync
30+
31+
32+
MAX_EXAMPLES = 40
33+
34+
35+
class CopyTest(unittest.TestCase):
36+
@unittest.skipIf(*gpu_unavailable)
37+
@given(
38+
sizes=st.lists(st.integers(min_value=1, max_value=8), min_size=1, max_size=4),
39+
uvm_op=st.sampled_from(
40+
[
41+
torch.ops.fbgemm.new_unified_tensor,
42+
torch.ops.fbgemm.new_managed_tensor,
43+
torch.ops.fbgemm.new_vanilla_managed_tensor,
44+
]
45+
),
46+
)
47+
@settings(verbosity=Verbosity.verbose, max_examples=MAX_EXAMPLES, deadline=None)
48+
# pyre-fixme[2]: Parameter must be annotated.
49+
def test_uvm_to_cpu(self, sizes: List[int], uvm_op) -> None:
50+
if uvm_op is torch.ops.fbgemm.new_unified_tensor:
51+
is_host_mapped = False
52+
uvm_t = uvm_op(
53+
torch.empty(0, device="cuda:0", dtype=torch.float),
54+
sizes,
55+
is_host_mapped,
56+
)
57+
else:
58+
uvm_t = uvm_op(torch.empty(0, device="cuda:0", dtype=torch.float), sizes)
59+
60+
cpu_t = torch.ops.fbgemm.uvm_to_cpu(uvm_t)
61+
assert not torch.ops.fbgemm.is_uvm_tensor(cpu_t)
62+
assert torch.ops.fbgemm.uvm_storage(cpu_t)
63+
64+
uvm_t.copy_(cpu_t)
65+
assert torch.ops.fbgemm.is_uvm_tensor(uvm_t)
66+
assert torch.ops.fbgemm.uvm_storage(uvm_t)
67+
68+
# Test use of cpu tensor after freeing the uvm tensor
69+
del uvm_t
70+
cpu_t.mul_(42)
71+
72+
@skipIfRocm()
73+
@unittest.skipIf(
74+
not torch.cuda.is_available() or torch.cuda.device_count() < 2,
75+
"Skip unless two CUDA devices are detected",
76+
)
77+
@given(
78+
sizes=st.lists(
79+
st.integers(min_value=1, max_value=(1024)), min_size=1, max_size=4
80+
),
81+
uvm_op=st.sampled_from(
82+
[
83+
torch.ops.fbgemm.new_unified_tensor,
84+
torch.ops.fbgemm.new_managed_tensor,
85+
torch.ops.fbgemm.new_vanilla_managed_tensor,
86+
]
87+
),
88+
)
89+
@settings(verbosity=Verbosity.verbose, max_examples=MAX_EXAMPLES, deadline=None)
90+
# pyre-fixme[2]: Parameter must be annotated.
91+
def test_uvm_to_device(self, sizes: List[int], uvm_op) -> None:
92+
if uvm_op is torch.ops.fbgemm.new_unified_tensor:
93+
is_host_mapped = False
94+
uvm_t = uvm_op(
95+
torch.empty(0, device="cuda:0", dtype=torch.float),
96+
sizes,
97+
is_host_mapped,
98+
)
99+
else:
100+
uvm_t = uvm_op(torch.empty(0, device="cuda:0", dtype=torch.float), sizes)
101+
102+
assert torch.ops.fbgemm.is_uvm_tensor(uvm_t)
103+
assert torch.ops.fbgemm.uvm_storage(uvm_t)
104+
105+
# Reference uvm tensor from second cuda device
106+
try:
107+
device_prototype = torch.empty(0, device="cuda:1")
108+
except RuntimeError:
109+
# Skip the tests if there is no "cuda:1" device
110+
return
111+
112+
second_t = torch.ops.fbgemm.uvm_to_device(uvm_t, device_prototype)
113+
114+
assert torch.ops.fbgemm.is_uvm_tensor(second_t)
115+
assert torch.ops.fbgemm.uvm_storage(second_t)
116+
assert second_t.device == device_prototype.device
117+
118+
@unittest.skipIf(*gpu_unavailable)
119+
@given(
120+
sizes=st.lists(
121+
st.integers(min_value=1, max_value=(512)), min_size=1, max_size=3
122+
),
123+
uvm_op=st.sampled_from(
124+
[
125+
torch.ops.fbgemm.new_unified_tensor,
126+
torch.ops.fbgemm.new_managed_tensor,
127+
torch.ops.fbgemm.new_vanilla_managed_tensor,
128+
]
129+
),
130+
)
131+
@settings(verbosity=Verbosity.verbose, max_examples=MAX_EXAMPLES, deadline=None)
132+
# pyre-fixme[2]: Parameter must be annotated.
133+
def test_uvm_to_cpu_clone(self, sizes: List[int], uvm_op) -> None:
134+
if uvm_op is torch.ops.fbgemm.new_unified_tensor:
135+
is_host_mapped = False
136+
uvm_t = uvm_op(
137+
torch.empty(0, device="cuda:0", dtype=torch.float),
138+
sizes,
139+
is_host_mapped,
140+
)
141+
else:
142+
uvm_t = uvm_op(torch.empty(0, device="cuda:0", dtype=torch.float), sizes)
143+
144+
assert torch.ops.fbgemm.is_uvm_tensor(uvm_t)
145+
assert torch.ops.fbgemm.uvm_storage(uvm_t)
146+
147+
cpu_clone = torch.ops.fbgemm.uvm_to_cpu_clone(uvm_t)
148+
149+
assert not torch.ops.fbgemm.is_uvm_tensor(cpu_clone)
150+
assert not torch.ops.fbgemm.uvm_storage(cpu_clone)
151+
152+
153+
if __name__ == "__main__":
154+
unittest.main()

fbgemm_gpu/test/uvm_test.py renamed to fbgemm_gpu/test/uvm/uvm_test.py

Lines changed: 0 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -61,42 +61,6 @@ def test_is_uvm_tensor(self, sizes: List[int], uvm_op) -> None:
6161
assert torch.ops.fbgemm.is_uvm_tensor(uvm_t)
6262
assert torch.ops.fbgemm.uvm_storage(uvm_t)
6363

64-
@unittest.skipIf(*gpu_unavailable)
65-
@given(
66-
sizes=st.lists(st.integers(min_value=1, max_value=8), min_size=1, max_size=4),
67-
uvm_op=st.sampled_from(
68-
[
69-
torch.ops.fbgemm.new_unified_tensor,
70-
torch.ops.fbgemm.new_managed_tensor,
71-
torch.ops.fbgemm.new_vanilla_managed_tensor,
72-
]
73-
),
74-
)
75-
@settings(verbosity=Verbosity.verbose, max_examples=MAX_EXAMPLES, deadline=None)
76-
# pyre-fixme[2]: Parameter must be annotated.
77-
def test_uvm_to_cpu(self, sizes: List[int], uvm_op) -> None:
78-
if uvm_op is torch.ops.fbgemm.new_unified_tensor:
79-
is_host_mapped = False
80-
uvm_t = uvm_op(
81-
torch.empty(0, device="cuda:0", dtype=torch.float),
82-
sizes,
83-
is_host_mapped,
84-
)
85-
else:
86-
uvm_t = uvm_op(torch.empty(0, device="cuda:0", dtype=torch.float), sizes)
87-
88-
cpu_t = torch.ops.fbgemm.uvm_to_cpu(uvm_t)
89-
assert not torch.ops.fbgemm.is_uvm_tensor(cpu_t)
90-
assert torch.ops.fbgemm.uvm_storage(cpu_t)
91-
92-
uvm_t.copy_(cpu_t)
93-
assert torch.ops.fbgemm.is_uvm_tensor(uvm_t)
94-
assert torch.ops.fbgemm.uvm_storage(uvm_t)
95-
96-
# Test use of cpu tensor after freeing the uvm tensor
97-
del uvm_t
98-
cpu_t.mul_(42)
99-
10064
@unittest.skipIf(*gpu_unavailable)
10165
def test_enum(self) -> None:
10266
# pyre-ignore[16]
@@ -168,52 +132,6 @@ def test_cudaMemPrefetchAsync(self, sizes: List[int], uvm_op) -> None:
168132

169133
torch.cuda.synchronize(torch.device("cuda:0"))
170134

171-
@skipIfRocm()
172-
@unittest.skipIf(
173-
not torch.cuda.is_available() or torch.cuda.device_count() < 2,
174-
"Skip unless two CUDA devices are detected",
175-
)
176-
@given(
177-
sizes=st.lists(
178-
st.integers(min_value=1, max_value=(1024)), min_size=1, max_size=4
179-
),
180-
uvm_op=st.sampled_from(
181-
[
182-
torch.ops.fbgemm.new_unified_tensor,
183-
torch.ops.fbgemm.new_managed_tensor,
184-
torch.ops.fbgemm.new_vanilla_managed_tensor,
185-
]
186-
),
187-
)
188-
@settings(verbosity=Verbosity.verbose, max_examples=MAX_EXAMPLES, deadline=None)
189-
# pyre-fixme[2]: Parameter must be annotated.
190-
def test_uvm_to_device(self, sizes: List[int], uvm_op) -> None:
191-
if uvm_op is torch.ops.fbgemm.new_unified_tensor:
192-
is_host_mapped = False
193-
uvm_t = uvm_op(
194-
torch.empty(0, device="cuda:0", dtype=torch.float),
195-
sizes,
196-
is_host_mapped,
197-
)
198-
else:
199-
uvm_t = uvm_op(torch.empty(0, device="cuda:0", dtype=torch.float), sizes)
200-
201-
assert torch.ops.fbgemm.is_uvm_tensor(uvm_t)
202-
assert torch.ops.fbgemm.uvm_storage(uvm_t)
203-
204-
# Reference uvm tensor from second cuda device
205-
try:
206-
device_prototype = torch.empty(0, device="cuda:1")
207-
except RuntimeError:
208-
# Skip the tests if there is no "cuda:1" device
209-
return
210-
211-
second_t = torch.ops.fbgemm.uvm_to_device(uvm_t, device_prototype)
212-
213-
assert torch.ops.fbgemm.is_uvm_tensor(second_t)
214-
assert torch.ops.fbgemm.uvm_storage(second_t)
215-
assert second_t.device == device_prototype.device
216-
217135
@skipIfRocm()
218136
@unittest.skipIf(*gpu_unavailable)
219137
@given(
@@ -289,40 +207,6 @@ def test_uvm_memadviceDontFork(self, sizes: List[int], uvm_op) -> None:
289207

290208
torch.ops.fbgemm.uvm_mem_advice_dont_fork(cpu_t)
291209

292-
@unittest.skipIf(*gpu_unavailable)
293-
@given(
294-
sizes=st.lists(
295-
st.integers(min_value=1, max_value=(512)), min_size=1, max_size=3
296-
),
297-
uvm_op=st.sampled_from(
298-
[
299-
torch.ops.fbgemm.new_unified_tensor,
300-
torch.ops.fbgemm.new_managed_tensor,
301-
torch.ops.fbgemm.new_vanilla_managed_tensor,
302-
]
303-
),
304-
)
305-
@settings(verbosity=Verbosity.verbose, max_examples=MAX_EXAMPLES, deadline=None)
306-
# pyre-fixme[2]: Parameter must be annotated.
307-
def test_uvm_to_cpu_clone(self, sizes: List[int], uvm_op) -> None:
308-
if uvm_op is torch.ops.fbgemm.new_unified_tensor:
309-
is_host_mapped = False
310-
uvm_t = uvm_op(
311-
torch.empty(0, device="cuda:0", dtype=torch.float),
312-
sizes,
313-
is_host_mapped,
314-
)
315-
else:
316-
uvm_t = uvm_op(torch.empty(0, device="cuda:0", dtype=torch.float), sizes)
317-
318-
assert torch.ops.fbgemm.is_uvm_tensor(uvm_t)
319-
assert torch.ops.fbgemm.uvm_storage(uvm_t)
320-
321-
cpu_clone = torch.ops.fbgemm.uvm_to_cpu_clone(uvm_t)
322-
323-
assert not torch.ops.fbgemm.is_uvm_tensor(cpu_clone)
324-
assert not torch.ops.fbgemm.uvm_storage(cpu_clone)
325-
326210
@unittest.skipIf(*gpu_unavailable)
327211
@given(
328212
sizes=st.lists(

0 commit comments

Comments
 (0)