Skip to content

Commit fcc9e80

Browse files
committed
[fbgemm_gpu] Add benchmark workflow
- Add benchmark workflow for AMD TBE
1 parent 944dff0 commit fcc9e80

8 files changed

+687
-19
lines changed
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
9+
# shellcheck disable=SC1091,SC2128
10+
. "$( dirname -- "$BASH_SOURCE"; )/utils_base.bash"
11+
12+
################################################################################
13+
# FBGEMM_GPU Test Helper Functions
14+
################################################################################
15+
16+
run_tbe_microbench () {
17+
local env_name="$1"
18+
19+
__single_run() {
20+
local cache_type="$1"
21+
local embedding_location="$2"
22+
23+
echo "################################################################################"
24+
echo "# Running Benchmark: (${cache_type}, ${embedding_location})"
25+
echo "#"
26+
echo "# [$(date --utc +%FT%T.%3NZ)] + ${FUNCNAME[0]} ${*}"
27+
echo "################################################################################"
28+
echo ""
29+
30+
# shellcheck disable=SC2155
31+
local env_prefix=$(env_name_or_prefix "${env_name}")
32+
33+
if [ "$embedding_location" == "hbm" ]; then
34+
local managed="device"
35+
elif [ "$embedding_location" == "uvm" ]; then
36+
local managed="managed"
37+
fi
38+
39+
# Old TBE benchmark script
40+
print_exec conda run --no-capture-output ${env_prefix} python tbe/split_table_batched_embeddings_benchmark.py device \
41+
--batch-size 13107 \
42+
--embedding-dim 256 \
43+
--iters 400 \
44+
--warmup-runs 50 \
45+
--alpha 1.15 \
46+
--bag-size 55 \
47+
--weights-precision fp16 \
48+
--cache-precision "${cache_type}" \
49+
--output-dtype bf16 \
50+
--managed="${managed}" \
51+
--num-embeddings 10000000 \
52+
--num-tables 1 \
53+
--row-wise
54+
55+
# New TBE benchmark script
56+
#
57+
# Invoke `python tbe/tbe_training_benchmark.py device --help` for
58+
# documentation on all available flags
59+
print_exec conda run --no-capture-output ${env_prefix} python tbe/tbe_training_benchmark.py device \
60+
--bench-iterations 400 \
61+
--bench-warmup-iterations 50 \
62+
--bench-num-requests 10 \
63+
--tbe-batch-size 13107 \
64+
--tbe-embedding-dim 256 \
65+
--tbe-pooling-size 55 \
66+
--tbe-num-embeddings 10000000 \
67+
--tbe-num-tables 1 \
68+
--weights-precision fp16 \
69+
--cache-precision "${cache_type}" \
70+
--output-dtype bf16 \
71+
--managed="${managed}" \
72+
--row-wise
73+
}
74+
75+
pushd fbgemm_gpu/bench || return 1
76+
77+
local cache_types=(
78+
# fp16
79+
fp32
80+
)
81+
82+
local embedding_locations=(
83+
# uvm
84+
hbm
85+
)
86+
87+
for cache_type in "${cache_types[@]}"; do
88+
for embedding_location in "${embedding_locations[@]}"; do
89+
__single_run "${cache_type}" "${embedding_location}" || return 1
90+
echo ""
91+
echo ""
92+
done
93+
done
94+
95+
popd || return 1
96+
}

.github/scripts/setup_env.bash

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,5 @@
3737
. "$( dirname -- "$BASH_SOURCE"; )/fbgemm_gpu_lint.bash"
3838
# shellcheck disable=SC1091,SC2128
3939
. "$( dirname -- "$BASH_SOURCE"; )/fbgemm_gpu_test.bash"
40+
# shellcheck disable=SC1091,SC2128
41+
. "$( dirname -- "$BASH_SOURCE"; )/fbgemm_gpu_benchmarks.bash"

.github/scripts/utils_system.bash

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -165,22 +165,22 @@ print_gpu_info () {
165165
if [[ "${ENFORCE_ROCM_DEVICE}" ]]; then
166166
# Ensure that rocm-smi is available and returns GPU entries
167167
if ! rocm-smi; then
168-
echo "[CHECK] ROCm drivers and ROCm device are required for this workflow, but does not appear to be installed or available!"
168+
echo "[CHECK] ROCm drivers and ROCm device(s) are required for this workflow, but does not appear to be installed or available!"
169169
return 1
170170
fi
171171
else
172-
if which rocminfo; then
173-
# If rocminfo is installed on a machine without GPUs, this will return error
174-
(print_exec rocminfo) || true
175-
else
176-
echo "[CHECK] rocminfo not found"
177-
fi
178-
if which rocm-smi; then
179-
# If rocm-smi is installed on a machine without GPUs, this will return error
180-
(print_exec rocm-smi) || true
181-
else
182-
echo "[CHECK] rocm-smi not found"
183-
fi
172+
local smi_programs=( rocminfo rocm-smi )
173+
174+
for smi_program in "${smi_programs[@]}"; do
175+
# shellcheck disable=SC2086
176+
if which $smi_program; then
177+
# If the program is installed on a machine without GPUs, invoking it will return error
178+
# shellcheck disable=SC2086
179+
(print_exec $smi_program) || true
180+
else
181+
echo "[CHECK] $smi_program not found"
182+
fi
183+
done
184184
fi
185185
}
186186

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
# This workflow is used for FBGEMM_GPU-CUDA Benchmarking
7+
name: FBGEMM_GPU-CPU Benchmark
8+
9+
on:
10+
# PR Trigger (enabled for regression checks and debugging)
11+
#
12+
pull_request:
13+
branches:
14+
- main
15+
16+
# Manual Trigger
17+
#
18+
workflow_dispatch:
19+
inputs:
20+
pytorch_channel_version:
21+
description: Package Channel + Version to Use for PyTorch Installation, in `<channel>[/<version>]` Format
22+
type: string
23+
required: false
24+
default: ""
25+
publish_to_pypi:
26+
description: Publish Artifact to PyPI
27+
type: boolean
28+
required: false
29+
default: false
30+
31+
concurrency:
32+
# Cancel previous runs in the PR if a new commit is pushed
33+
# https://stackoverflow.com/questions/66335225/how-to-cancel-previous-runs-in-the-pr-when-you-push-new-commitsupdate-the-curre
34+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
35+
cancel-in-progress: true
36+
37+
jobs:
38+
# Build on CPU hosts, run tests, and upload to GHA
39+
build_artifact:
40+
if: ${{ github.repository_owner == 'pytorch' }}
41+
runs-on: ${{ matrix.host-machine.instance }}
42+
container:
43+
image: amazonlinux:2023
44+
options: --user root
45+
defaults:
46+
run:
47+
shell: bash
48+
env:
49+
PRELUDE: .github/scripts/setup_env.bash
50+
BUILD_ENV: build_binary
51+
BUILD_VARIANT: cpu
52+
continue-on-error: true
53+
strategy:
54+
# Don't fast-fail all the other builds if one of the them fails
55+
fail-fast: false
56+
matrix:
57+
host-machine: [
58+
{ arch: x86, instance: "linux.4xlarge" },
59+
{ arch: arm, instance: "linux.arm64.2xlarge" },
60+
]
61+
python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
62+
compiler: [ "gcc", "clang" ]
63+
64+
steps:
65+
- name: Setup Build Container
66+
run: yum update -y; yum install -y binutils findutils git pciutils sudo wget which
67+
68+
- name: Checkout the Repository
69+
uses: actions/checkout@v4
70+
71+
- name: Display System Info
72+
run: . $PRELUDE; print_system_info
73+
74+
- name: Display GPU Info
75+
run: . $PRELUDE; print_gpu_info
76+
77+
- name: Setup Miniconda
78+
run: . $PRELUDE; setup_miniconda $HOME/miniconda
79+
80+
- name: Create Conda Environment
81+
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
82+
83+
- name: Install C/C++ Compilers
84+
run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }}
85+
86+
- name: Install Build Tools
87+
run: . $PRELUDE; install_build_tools $BUILD_ENV
88+
89+
- name: Install PyTorch-CPU Nightly
90+
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.pytorch_channel_version) || 'nightly' }} cpu
91+
92+
- name: Collect PyTorch Environment Info
93+
if: ${{ success() || failure() }}
94+
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
95+
96+
- name: Prepare FBGEMM_GPU Build
97+
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
98+
99+
- name: Build FBGEMM_GPU Wheel
100+
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_package $BUILD_ENV nightly cpu
101+
102+
- name: Upload Built Wheel as GHA Artifact
103+
uses: actions/upload-artifact@v4
104+
with:
105+
name: fbgemm_gpu_nightly_cpu_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}.whl
106+
path: fbgemm_gpu/dist/*.whl
107+
if-no-files-found: error
108+
109+
110+
# Download the built artifact from GHA, test on GPU, and push to PyPI
111+
benchmark_artifact:
112+
if: ${{ github.repository_owner == 'pytorch' }}
113+
runs-on: ${{ matrix.host-machine.instance }}
114+
container:
115+
image: amazonlinux:2023
116+
options: --user root
117+
defaults:
118+
run:
119+
shell: bash
120+
env:
121+
PRELUDE: .github/scripts/setup_env.bash
122+
BUILD_ENV: build_binary
123+
BUILD_VARIANT: cpu
124+
strategy:
125+
fail-fast: false
126+
matrix:
127+
host-machine: [
128+
{ arch: x86, instance: "linux.4xlarge", timeout: 20 },
129+
{ arch: arm, instance: "linux.arm64.2xlarge", timeout: 30 },
130+
]
131+
python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
132+
compiler: [ "gcc", "clang" ]
133+
needs: build_artifact
134+
135+
steps:
136+
- name: Setup Build Container
137+
run: yum update -y; yum install -y binutils findutils git pciutils sudo wget which
138+
139+
- name: Checkout the Repository
140+
uses: actions/checkout@v4
141+
142+
- name: Download Wheel Artifact from GHA
143+
uses: actions/download-artifact@v4
144+
with:
145+
name: fbgemm_gpu_nightly_cpu_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}.whl
146+
147+
- name: Display System Info
148+
run: . $PRELUDE; print_system_info; print_ec2_info
149+
150+
- name: Display GPU Info
151+
run: . $PRELUDE; print_gpu_info
152+
153+
- name: Setup Miniconda
154+
run: . $PRELUDE; setup_miniconda $HOME/miniconda
155+
156+
- name: Create Conda Environment
157+
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
158+
159+
- name: Install C/C++ Compilers for Updated LIBGCC
160+
run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }}
161+
162+
- name: Install PyTorch-CPU Nightly
163+
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.pytorch_channel_version) || 'nightly' }} cpu
164+
165+
- name: Collect PyTorch Environment Info
166+
if: ${{ success() || failure() }}
167+
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
168+
169+
- name: Prepare FBGEMM_GPU Build
170+
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
171+
172+
- name: Install FBGEMM_GPU Wheel
173+
run: . $PRELUDE; install_fbgemm_gpu_wheel $BUILD_ENV *.whl
174+
175+
- name: Run FBGEMM_GPU Benchmark
176+
timeout-minutes: 40
177+
run: . $PRELUDE; run_tbe_microbench $BUILD_ENV

0 commit comments

Comments
 (0)