Skip to content

Commit 50de711

Browse files
q10facebook-github-bot
authored andcommitted
Fix PT2 wrapper registrations (pytorch#805)
Summary: Pull Request resolved: facebookresearch/FBGEMM#805 - Fix PT2 wrapper registrations X-link: pytorch#3721 Reviewed By: spcyppt Differential Revision: D69971546 Pulled By: q10 fbshipit-source-id: 9e964f14b25c24ad3240e4e65d87fa2ae08d5b21
1 parent 319f926 commit 50de711

File tree

3 files changed

+16
-11
lines changed

3 files changed

+16
-11
lines changed

.github/scripts/fbgemm_gpu_install.bash

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ __install_check_operator_registrations () {
9898
else
9999
local test_operators=(
100100
"torch.ops.fbgemm.asynchronous_inclusive_cumsum"
101+
"torch.ops.fbgemm.split_embedding_codegen_lookup_sgd_function_pt2"
101102
)
102103
fi
103104

fbgemm_gpu/cmake/TbeTraining.cmake

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,15 @@ handle_genfiles_rocm(gen_gpu_files_forward_split)
2626
get_tbe_sources_list(static_cpu_files_training)
2727
get_tbe_sources_list(gen_cpu_files_training)
2828
get_tbe_sources_list(gen_gpu_files_training)
29+
get_tbe_sources_list(gen_cpu_files_training_pt2)
2930
get_tbe_sources_list(gen_gpu_files_training_pt2)
3031
get_tbe_sources_list(gen_gpu_files_training_dense)
3132
get_tbe_sources_list(gen_gpu_files_training_split_host)
3233
get_tbe_sources_list(gen_gpu_files_training_gwd)
3334
get_tbe_sources_list(gen_gpu_files_training_vbe)
3435
handle_genfiles_rocm(gen_cpu_files_training)
3536
handle_genfiles_rocm(gen_gpu_files_training)
37+
handle_genfiles_rocm(gen_cpu_files_training_pt2)
3638
handle_genfiles_rocm(gen_gpu_files_training_pt2)
3739
handle_genfiles_rocm(gen_gpu_files_training_dense)
3840
handle_genfiles_rocm(gen_gpu_files_training_split_host)
@@ -166,6 +168,8 @@ gpu_cpp_library(
166168
SHARED
167169
INCLUDE_DIRS
168170
${fbgemm_sources_include_directories}
171+
CPU_SRCS
172+
${gen_cpu_files_training_pt2}
169173
GPU_SRCS
170174
${gen_gpu_files_training_pt2}
171175
NVCC_FLAGS

fbgemm_gpu/cmake/tbe_sources.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -318,13 +318,13 @@
318318
static_cpu_files_common = [
319319
"codegen/utils/embedding_bounds_check_host_cpu.cpp",
320320
"codegen/training/forward/embedding_forward_split_cpu.cpp",
321+
"codegen/training/pt2/pt2_autograd_utils.cpp",
321322
]
322323

323324
static_gpu_files_common = [
324325
"codegen/utils/embedding_bounds_check_v1.cu",
325326
"codegen/utils/embedding_bounds_check_v2.cu",
326327
"codegen/utils/embedding_bounds_check_host.cpp",
327-
"codegen/training/pt2/pt2_autograd_utils.cpp",
328328
]
329329

330330
gen_cpu_files_training = (
@@ -335,17 +335,13 @@
335335
"gen_embedding_backward_split_{}_cpu.cpp".format(optimizer)
336336
for optimizer in ALL_OPTIMIZERS
337337
]
338-
+ [
339-
"gen_embedding_backward_split_{}_pt2_cpu_wrapper.cpp".format(optimizer)
340-
for optimizer in ALL_OPTIMIZERS
341-
]
342338
+ [
343339
"gen_embedding_backward_{}_split_cpu.cpp".format(optimizer)
344340
for optimizer in CPU_OPTIMIZERS
345341
]
346342
)
347343

348-
gen_gpu_files_training_pt2 = (
344+
gen_cpu_files_training_pt2 = (
349345
[
350346
"gen_embedding_split_{}_pt2_autograd.cpp".format(optimizer)
351347
for optimizer in ALL_OPTIMIZERS
@@ -355,15 +351,19 @@
355351
for optimizer in SSD_OPTIMIZERS
356352
]
357353
+ [
358-
"gen_embedding_backward_split_{}_pt2_cuda_wrapper.cpp".format(optimizer)
354+
"gen_embedding_backward_split_{}_pt2_cpu_wrapper.cpp".format(optimizer)
359355
for optimizer in ALL_OPTIMIZERS
360356
]
361-
+ [
362-
"gen_embedding_backward_ssd_{}_pt2_cuda_wrapper.cpp".format(optimizer)
363-
for optimizer in SSD_OPTIMIZERS
364-
]
365357
)
366358

359+
gen_gpu_files_training_pt2 = [
360+
"gen_embedding_backward_split_{}_pt2_cuda_wrapper.cpp".format(optimizer)
361+
for optimizer in ALL_OPTIMIZERS
362+
] + [
363+
"gen_embedding_backward_ssd_{}_pt2_cuda_wrapper.cpp".format(optimizer)
364+
for optimizer in SSD_OPTIMIZERS
365+
]
366+
367367
gen_gpu_files_training_dense = [
368368
# Dense host and kernel, and forward-quantized host src files
369369
fstring.format(wdesc)

0 commit comments

Comments
 (0)