ROCm
diff --git a/‎.github/scripts/setup_env.bash
Lines changed: 547 additions & 119 deletions b/‎.github/scripts/setup_env.bash
Lines changed: 547 additions & 119 deletions
diff --git a/‎.github/workflows/fbgemm_docs.yml
Lines changed: 6 additions & 2 deletions b/‎.github/workflows/fbgemm_docs.yml
Lines changed: 6 additions & 2 deletions
diff --git a/‎.github/workflows/fbgemm_gpu_ci.yml
Lines changed: 65 additions & 153 deletions b/‎.github/workflows/fbgemm_gpu_ci.yml
Lines changed: 65 additions & 153 deletions
diff --git a/‎.github/workflows/pylint.yaml renamed to ‎.github/workflows/fbgemm_gpu_lint.yml
Lines changed: 15 additions & 6 deletions b/‎.github/workflows/pylint.yaml renamed to ‎.github/workflows/fbgemm_gpu_lint.yml
Lines changed: 15 additions & 6 deletions
@@ -1,5 +1,9 @@
-# This workflow builds the fbgemm_gpu docs and deploys them to gh-pages.
-name: Generate documentation
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+name: FBGEMM Documentation
 on:
   push:
     branches:
 
@@ -14,8 +14,7 @@ on:
       - main
 
 jobs:
-  build_nvidia_gpu:
-    if: ${{ false }}  # Disable the job for now
+  build_and_test_amd:
     runs-on: ${{ matrix.os }}
     defaults:
       run:
@@ -24,13 +23,11 @@ jobs:
       PRELUDE: .github/scripts/setup_env.bash
       BUILD_ENV: build_binary
     strategy:
-      # Don't fast-fail all the other builds if one of the them fails
       fail-fast: false
       matrix:
         os: [ ubuntu-20.04 ]
-        python-version: [ "3.8" ]
-        # As of version 2.0, PyTorch has dropped support for CUDA 11.6
-        cuda-version: [ 11.7.1 ]
+        python-version: [ "3.10" ]
+        rocm-version: [ "5.3" ]
 
     steps:
     - name: Checkout the Repository
@@ -41,120 +38,52 @@ jobs:
     - name: Display System Info
       run: . $PRELUDE; print_system_info
 
+    - name: Display GPU Info
+      run: . $PRELUDE; print_gpu_info
+
+    - name: Free Disk Space
+      run: . $PRELUDE; free_disk_space
+
     - name: Setup Miniconda
       run: |
         . $PRELUDE; setup_miniconda $HOME/miniconda
         echo "${HOME}/miniconda/bin" >> $GITHUB_PATH
         echo "CONDA=${HOME}/miniconda" >> $GITHUB_PATH
 
     - name: Create Conda Environment
-      run: |
-        . $PRELUDE
-        create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
+      run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
 
-        # This hack is needed to get builds running properly on Ubuntu 20.04
-        echo "[SETUP] Creating symlink \$CONDA_PREFIX/lib64 -> \$CONDA_PREFIX/lib ..."
-        conda_prefix=$(conda run -n "${env_name}" printenv CONDA_PREFIX)
-        ln -s "${conda_prefix}/lib" "${conda_prefix}/lib64"
-
-    # - name: Install C/C++ Compilers
-    #   run: . $PRELUDE; install_cxx_compiler $BUILD_ENV
+    - name: Install ROCm
+      run: . $PRELUDE; install_rocm_ubuntu $BUILD_ENV ${{ matrix.rocm-version }}
 
     - name: Install Build Tools
       run: . $PRELUDE; install_build_tools $BUILD_ENV
 
-    - name: Install CUDA
-      run: . $PRELUDE; install_cuda $BUILD_ENV "${{ matrix.cuda-version }}"
-
-    - name: Install PyTorch
-      run:  . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cuda "${{ matrix.cuda-version }}"
-
-    - name: Install cuDNN
-      run: . $PRELUDE; install_cudnn $BUILD_ENV "$(pwd)/build_only/cudnn" "${{ matrix.cuda-version }}"
+    - name: Install PyTorch-ROCm Nightly
+      run:  . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly rocm ${{ matrix.rocm-version }}
 
     - name: Prepare FBGEMM Build
       run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
 
-    - name: Build and Install FBGEMM_GPU
-      run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_install $BUILD_ENV
-
-    - name: Test FBGEMM_GPU installation
-      shell: bash
-      run: |
-        . $PRELUDE;
-        cd fbgemm_gpu/test
-        print_exec conda run -n $BUILD_ENV python input_combine_test.py -v
-        print_exec conda run -n $BUILD_ENV python quantize_ops_test.py -v
-        print_exec conda run -n $BUILD_ENV python sparse_ops_test.py -v
-        conda run -n $BUILD_ENV python -c "import fbgemm_gpu"
-        conda run -n $BUILD_ENV python -c "import fbgemm_gpu.split_embedding_codegen_lookup_invokers"
-
-  build_amd_gpu:
-    if: ${{ false }}  # Disable the job for now
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        os: [ubuntu-20.04]
-        config: [[pip, 5.3]]
-
-    steps:
-    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
-    - uses: actions/checkout@v3
-
-    - name: Install ROCm
-      shell: bash
-      run: |
-        sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 10
-        wget https://repo.radeon.com/amdgpu-install/5.3/ubuntu/focal/amdgpu-install_5.3.50300-1_all.deb
-        export DEBIAN_FRONTEND=noninteractive
-        sudo apt install -y ./amdgpu-install_5.3.50300-1_all.deb
-        amdgpu-install -y --usecase=hiplibsdk,rocm --no-dkms
-        sudo rm amdgpu-install_5.3.50300-1_all.deb
-
-    - name: Install dependencies
-      shell: bash
-      run: |
-        sudo apt-get update
-        sudo apt-get install -y git pip python3-dev mesa-common-dev clang comgr libopenblas-dev jp intel-mkl-full locales libnuma-dev
-        sudo apt-get install -y hipify-clang || true
-        sudo apt-get install -y miopen-hip miopen-hip-dev
-        sudo pip install cmake scikit-build ninja jinja2 numpy hypothesis --no-input
-        sudo apt-get clean
-        # Install PyTorch (nightly) as required by fbgemm_gpu
-        sudo pip install --pre torch torchvision --extra-index-url https://download.pytorch.org/whl/nightly/rocm5.3/
-
-    - name: Checkout submodules
-      shell: bash
+    - name: Build FBGEMM_GPU-ROCM Nightly
       run: |
+        . $PRELUDE
         cd fbgemm_gpu
-        git submodule sync
-        git submodule update --init --recursive
 
-    - name: Build fbgemm_gpu
-      shell: bash
-      run: |
-        sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 10
-        cd fbgemm_gpu
-        # build for MI250 only to save time.
-        sudo PYTORCH_ROCM_ARCH=gfx90a python3 setup.py build develop
+        # Build for MI250 only to save time.
+        print_exec conda env config vars set -n $BUILD_ENV PYTORCH_ROCM_ARCH=gfx90a
+        print_exec conda run -n $BUILD_ENV python setup.py build develop
+
+    - name: Test FBGEMM_GPU-ROCM Nightly installation
+      timeout-minutes: 10
+      run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV rocm
 
-    - name: Test fbgemm_gpu installation
-      shell: bash
-      run: |
-        cd fbgemm_gpu
-        cd test
-        python3 input_combine_test.py -v
-        python3 quantize_ops_test.py -v
-        python3 sparse_ops_test.py -v
-        python3 -c "import fbgemm_gpu"
-        python3 -c "import fbgemm_gpu.split_embedding_codegen_lookup_invokers"
 
   test_amd_gpu:
     if: ${{ false }}  # Disable the job for now
     runs-on: rocm
     strategy:
+      fail-fast: false
       matrix:
         os: [ubuntu-latest]
 
@@ -200,71 +129,54 @@ jobs:
         "
         docker run $DOCKER_OPTIONS $DOCKER_IMAGE $JENKINS_REPO_DIR_DOCKER/.jenkins/rocm/build_and_test.sh $JENKINS_REPO_DIR_DOCKER
 
-  test_nvidia_gpu:
-    if: ${{ false }}  # Disable the job for now
-    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-    with:
-      job-name: cuda 11.7, A10
-      runner: linux.g5.4xlarge.nvidia.gpu # A10
-      repository: pytorch/fbgemm
-      gpu-arch-type: cuda
-      gpu-arch-version: 11.7
-      timeout: 150
-      script: |
-        set -x
-        # Checkout FBGEMM_GPU
-        git submodule update --init
-
-        # Build FBGEMM_GPU with pytorch-nightly
-        CUDA_VERSION="11.7.1"
-        PYTHON_VERSION="3.10"
-        bash .github/scripts/build_wheel.bash -v -p "$PYTHON_VERSION" -o fbgemm_gpu_test -P pytorch-nightly -c "$CUDA_VERSION" -m /opt/conda
-
-        # Test FBGEMM_GPU using a generated wheel file
-        WHEEL_PATH="$(ls fbgemm_gpu/dist/*.whl)"
-        bash .github/scripts/test_wheel.bash -v -p "$PYTHON_VERSION" -P pytorch-nightly -c "$CUDA_VERSION" -w "$WHEEL_PATH" -m /opt/conda
-
-  build_cpu_only:
+
+  build_and_test_cpu:
     runs-on: ${{ matrix.os }}
+    defaults:
+      run:
+        shell: bash
+    env:
+      PRELUDE: .github/scripts/setup_env.bash
+      BUILD_ENV: build_binary
     strategy:
+      fail-fast: false
       matrix:
-        os: [ubuntu-latest]
+        os: [ ubuntu-20.04, ubuntu-latest ]
+        python-version: [ "3.8", "3.9", "3.10" ]
 
     steps:
-    - uses: actions/checkout@v3
+    - name: Checkout the Repository
+      uses: actions/checkout@v3
+      with:
+        submodules: true
 
-    - name: Install dependencies
-      shell: bash
-      run: |
-        sudo apt-get update
-        sudo apt-get -y install git pip python3-dev
-        sudo pip install cmake scikit-build ninja jinja2 numpy hypothesis --no-input
-        # Install PyTorch (nightly) as required by fbgemm_gpu
-        sudo pip install --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
+    - name: Display System Info
+      run: . $PRELUDE; print_system_info
 
-    - name: Checkout submodules
-      shell: bash
-      run: |
-        cd fbgemm_gpu
-        git submodule sync
-        git submodule update --init --recursive
+    - name: Display GPU Info
+      run: . $PRELUDE; print_gpu_info
 
-    - name: Build fbgemm_gpu
-      shell: bash
+    - name: Setup Miniconda
       run: |
-        cd fbgemm_gpu
-        # to avoid "Permission denied" error in '/usr/local/lib/python3.8/dist-packages/' folder
-        sudo python setup.py install --cpu_only
+        . $PRELUDE; setup_miniconda $HOME/miniconda
+        echo "${HOME}/miniconda/bin" >> $GITHUB_PATH
+        echo "CONDA=${HOME}/miniconda" >> $GITHUB_PATH
 
-    - name: Test fbgemm_gpu cpu-only installation
-      shell: bash
-      run: |
-        cd fbgemm_gpu
-        cd test
-        python batched_unary_embeddings_test.py -v
-        python input_combine_test.py -v
-        python layout_transform_ops_test.py -v
-        python merge_pooled_embeddings_test.py -v
-        python permute_pooled_embedding_modules_test.py -v
-        python quantize_ops_test.py -v
-        python sparse_ops_test.py -v
+    - name: Create Conda Environment
+      run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
+
+    - name: Install Build Tools
+      run: . $PRELUDE; install_build_tools $BUILD_ENV
+
+    - name: Install PyTorch
+      run:  . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cpu
+
+    - name: Prepare FBGEMM Build
+      run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
+
+    - name: Build and Install FBGEMM_GPU (CPU version)
+      run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_install $BUILD_ENV cpu
+
+    - name: Test with PyTest
+      timeout-minutes: 10
+      run: . $PRELUDE; cd fbgemm_gpu/test; run_fbgemm_gpu_tests $BUILD_ENV cpu
@@ -1,3 +1,8 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
 name: FBGEMM_GPU Lint
 
 on:
@@ -8,36 +13,40 @@ on:
   pull_request:
     branches:
       - main
+
 jobs:
-  build:
+  run_pylint:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8"]
+        python-version: [ "3.8" ]
     steps:
     - uses: actions/checkout@v3
+
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
+
+    - name: Install Dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install ufmt
-        pip install click
-        pip install flake8
+        pip install click flake8 ufmt
+
     - name: Analyzing the code with flake8
       run: |
         echo "::add-matcher::fbgemm_gpu/test/lint/flake8_problem_matcher.json"
         flake8 --ignore=E501,W503,E203 .
         # E501 = line too long
         # W503 = line break before binary operator (deprecated)
         # E203 = whitespace before ":"
+
     - name: Analyzing the code with ufmt
       run: |
         ufmt diff fbgemm_gpu/fbgemm_gpu
         ufmt diff fbgemm_gpu/test
         ufmt diff fbgemm_gpu/bench
+
     - name: Check Meta copyright header
       run: |
         python fbgemm_gpu/test/lint/check_meta_header.py --path=./fbgemm_gpu/fbgemm_gpu --fixit=False