Skip to content

Commit 8152da4

Browse files
committed
[fbgemm_gpu] Add benchmark workflow
- Add benchmark workflow for AMD TBE
1 parent 853e97c commit 8152da4

File tree

5 files changed

+481
-2
lines changed

5 files changed

+481
-2
lines changed
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
9+
# shellcheck disable=SC1091,SC2128
10+
. "$( dirname -- "$BASH_SOURCE"; )/utils_base.bash"
11+
12+
################################################################################
13+
# FBGEMM_GPU Test Helper Functions
14+
################################################################################
15+
16+
run_tbe_microbench_for_amd () {
17+
local env_name="$1"
18+
19+
__single_run() {
20+
local cache_type="$1"
21+
local embedding_location="$2"
22+
23+
echo "################################################################################"
24+
echo "# Running Benchmark: (${cache_type}, ${embedding_location})"
25+
echo "#"
26+
echo "# [$(date --utc +%FT%T.%3NZ)] + ${FUNCNAME[0]} ${*}"
27+
echo "################################################################################"
28+
echo ""
29+
30+
# shellcheck disable=SC2155
31+
local env_prefix=$(env_name_or_prefix "${env_name}")
32+
33+
if [ "$embedding_location" == "hbm" ]; then
34+
local managed="device"
35+
elif [ "$embedding_location" == "uvm" ]; then
36+
local managed="managed"
37+
fi
38+
39+
print_exec conda run --no-capture-output ${env_prefix} python split_table_batched_embeddings_benchmark.py device \
40+
--batch-size 131072 \
41+
--embedding-dim 256 \
42+
--iters 400 \
43+
--warmup-runs 50 \
44+
--alpha 1.15 \
45+
--bag-size 55 \
46+
--weights-precision fp16 \
47+
--cache-precision "${cache_type}" \
48+
--output-dtype bf16 \
49+
--managed="${managed}" \
50+
--num-embeddings 10000000 \
51+
--num-tables 1 \
52+
--row-wise \
53+
--pooling=none
54+
}
55+
56+
pushd fbgemm_gpu/bench || return 1
57+
58+
local cache_types=(
59+
fp16
60+
tp32
61+
)
62+
63+
local embedding_locations=(
64+
hbm
65+
uvm
66+
)
67+
68+
for cache_type in "${cache_types[@]}"; do
69+
for embedding_location in "${embedding_locations[@]}"; do
70+
__single_run "${cache_type}" "${embedding_location}" || return 1
71+
done
72+
done
73+
74+
popd || return 1
75+
}

.github/scripts/setup_env.bash

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,5 @@
3737
. "$( dirname -- "$BASH_SOURCE"; )/fbgemm_gpu_lint.bash"
3838
# shellcheck disable=SC1091,SC2128
3939
. "$( dirname -- "$BASH_SOURCE"; )/fbgemm_gpu_test.bash"
40+
# shellcheck disable=SC1091,SC2128
41+
. "$( dirname -- "$BASH_SOURCE"; )/fbgemm_gpu_benchmarks.bash"
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
# This workflow is used for FBGEMM_GPU-CUDA Benchmarking
7+
name: FBGEMM_GPU-CUDA Benchmark
8+
9+
on:
10+
# PR Trigger (enabled for regression checks and debugging)
11+
#
12+
pull_request:
13+
branches:
14+
- main
15+
16+
# Push Trigger (enable to catch errors coming out of multiple merges)
17+
#
18+
push:
19+
branches:
20+
- main
21+
22+
# Manual Trigger
23+
#
24+
workflow_dispatch:
25+
inputs:
26+
pytorch_channel_version:
27+
description: Package Channel + Version to Use for PyTorch Installation, in `<channel>[/<version>]` Format
28+
type: string
29+
required: false
30+
default: ""
31+
32+
concurrency:
33+
# Cancel previous runs in the PR if a new commit is pushed
34+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
35+
cancel-in-progress: true
36+
37+
jobs:
38+
# Build on CPU hosts and upload to GHA
39+
build_artifact:
40+
if: ${{ github.repository_owner == 'pytorch' }}
41+
runs-on: ${{ matrix.host-machine.instance }}
42+
container:
43+
image: amazonlinux:2023
44+
options: --user root
45+
defaults:
46+
run:
47+
shell: bash
48+
env:
49+
PRELUDE: .github/scripts/setup_env.bash
50+
BUILD_ENV: build_binary
51+
BUILD_VARIANT: cuda
52+
BUILD_CUDA_VERSION: ${{ matrix.cuda-version }}
53+
continue-on-error: true
54+
strategy:
55+
# Don't fast-fail all the other builds if one of the them fails
56+
fail-fast: false
57+
matrix:
58+
host-machine: [
59+
{ arch: x86, instance: "linux.24xlarge" },
60+
]
61+
python-version: [ "3.13" ]
62+
cuda-version: [ "11.8.0", "12.4.1", "12.6.3", "12.8.0" ]
63+
compiler: [ "gcc", "clang" ]
64+
65+
steps:
66+
- name: Setup Build Container
67+
run: yum update -y; yum install -y binutils findutils git pciutils sudo tar wget which
68+
69+
- name: Checkout the Repository
70+
uses: actions/checkout@v4
71+
with:
72+
submodules: true
73+
74+
- name: Display System Info
75+
run: . $PRELUDE; print_system_info
76+
77+
- name: Display GPU Info
78+
run: . $PRELUDE; print_gpu_info
79+
80+
- name: Setup Miniconda
81+
run: . $PRELUDE; setup_miniconda $HOME/miniconda
82+
83+
- name: Create Conda Environment
84+
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
85+
86+
- name: Install C/C++ Compilers
87+
run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }}
88+
89+
- name: Install Build Tools
90+
run: . $PRELUDE; install_build_tools $BUILD_ENV
91+
92+
- name: Install CUDA
93+
run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}
94+
95+
# Install via PIP to avoid defaulting to the CPU variant if the GPU variant of the day is not ready
96+
- name: Install PyTorch Nightly
97+
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.pytorch_channel_version) || 'nightly' }} cuda/${{ matrix.cuda-version }}
98+
99+
- name: Collect PyTorch Environment Info
100+
if: ${{ success() || failure() }}
101+
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
102+
103+
- name: Install cuDNN
104+
run: . $PRELUDE; install_cudnn $BUILD_ENV "$(pwd)/build_only/cudnn" ${{ matrix.cuda-version }}
105+
106+
- name: Prepare FBGEMM_GPU Build
107+
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
108+
109+
- name: Build FBGEMM_GPU Wheel
110+
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_package $BUILD_ENV nightly cuda
111+
112+
- name: Upload Built Wheel as GHA Artifact
113+
uses: actions/upload-artifact@v4
114+
with:
115+
name: fbgemm_gpu_nightly_cuda_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
116+
path: fbgemm_gpu/dist/*.whl
117+
if-no-files-found: error
118+
119+
120+
# Download the built artifact from GHA, test on GPU, and push to PyPI
121+
benchmark_artifact:
122+
if: ${{ github.repository_owner == 'pytorch' }}
123+
# runs-on: linux.4xlarge.nvidia.gpu
124+
# Use available instance types - https://github.com/pytorch/test-infra/blob/main/.github/scale-config.yml
125+
runs-on: ${{ matrix.host-machine.instance }}
126+
defaults:
127+
run:
128+
shell: bash
129+
env:
130+
PRELUDE: .github/scripts/setup_env.bash
131+
BUILD_ENV: build_binary
132+
BUILD_VARIANT: cuda
133+
BUILD_CUDA_VERSION: ${{ matrix.cuda-version }}
134+
ENFORCE_CUDA_DEVICE: 1
135+
strategy:
136+
fail-fast: false
137+
matrix:
138+
host-machine: [
139+
{ arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" },
140+
# TODO: Enable when A100 machine queues are reasonably small enough for doing per-PR CI
141+
# https://hud.pytorch.org/metrics
142+
# { arch: x86, instance: "linux.gcp.a100" },
143+
]
144+
python-version: [ "3.13" ]
145+
cuda-version: [ "11.8.0", "12.4.1", "12.6.3", "12.8.0" ]
146+
# Specify exactly ONE CUDA version for artifact publish
147+
cuda-version-publish: [ "12.4.1" ]
148+
compiler: [ "gcc", "clang" ]
149+
needs: build_artifact
150+
151+
steps:
152+
# Cannot upgrade to actions/checkout@v4 yet because GLIBC on the instance is too old
153+
- name: Checkout the Repository
154+
uses: actions/checkout@v4
155+
with:
156+
submodules: true
157+
158+
- name: Download Wheel Artifact from GHA
159+
# Cannot upgrade to actions/download-artifact@v4 yet because GLIBC on the instance is too old
160+
uses: actions/download-artifact@v4
161+
with:
162+
name: fbgemm_gpu_nightly_cuda_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
163+
164+
# Use PyTorch test infrastructure action - https://github.com/pytorch/test-infra/blob/main/.github/actions/setup-nvidia/action.yml
165+
- name: Install NVIDIA Drivers and NVIDIA-Docker Runtime
166+
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
167+
168+
- name: Display System Info
169+
run: . $PRELUDE; print_system_info; print_ec2_info
170+
171+
- name: Display GPU Info
172+
run: . $PRELUDE; print_gpu_info
173+
174+
- name: Setup Miniconda
175+
run: . $PRELUDE; setup_miniconda $HOME/miniconda
176+
177+
- name: Create Conda Environment
178+
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
179+
180+
- name: Install C/C++ Compilers for Updated LIBGCC
181+
# NOTE: gcc is required for torch dynamo to work properly, as some of
182+
# the compilation flags used by torch dynamo are gcc-specific:
183+
#
184+
# clang-16: error: unknown argument: '-fno-tree-loop-vectorize'
185+
run: . $PRELUDE; install_cxx_compiler $BUILD_ENV gcc
186+
187+
- name: Install CUDA
188+
run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}
189+
190+
# Install via PIP to avoid defaulting to the CPU variant if the GPU variant of the day is not ready
191+
- name: Install PyTorch Nightly
192+
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.pytorch_channel_version) || 'nightly' }} cuda/${{ matrix.cuda-version }}
193+
194+
- name: Collect PyTorch Environment Info
195+
if: ${{ success() || failure() }}
196+
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
197+
198+
- name: Prepare FBGEMM_GPU Build
199+
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
200+
201+
- name: Install FBGEMM_GPU Wheel
202+
run: . $PRELUDE; install_fbgemm_gpu_wheel $BUILD_ENV *.whl
203+
204+
- name: Run FBGEMM_GPU Benchmark
205+
timeout-minutes: 40
206+
run: . $PRELUDE; run_tbe_microbench_for_amd $BUILD_ENV

0 commit comments

Comments
 (0)