add lint to oss (pytorch#1427)

airidas-meta · facebook-github-bot · commit 99a19b2d8599 · 2022-10-28T14:58:23.000-07:00
Summary: Pull Request resolved: pytorch#1427 Pull Request resolved: pytorch#1417 Enforce format check to fbgemm OSS. Leave this to Airidas Korolkovas as the starter rampup task. Differential Revision: D40620055 fbshipit-source-id: 632a8c9f2d800fbc4963eeb72a5e21f192dc0557
diff --git a/.github/workflows/pylint.yaml b/.github/workflows/pylint.yaml
@@ -0,0 +1,42 @@
+name: Lint
+
+on:
+  push:
+    branches:
+      - main
+
+  pull_request:
+    branches:
+      - main
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8"]
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install ufmt
+        pip install click
+        pip install flake8
+    - name: Analyzing the code with flake8
+      run: |
+        echo "::add-matcher::fbgemm_gpu/test/lint/flake8_problem_matcher.json"
+        flake8 --ignore=E501,E402,E231,W503,F841,F401,W291,E302,E503,E203,E266   .  # Ignore errors
+    - name: Analyzing the code with ufmt
+      run: |
+        ufmt diff fbgemm_gpu/fbgemm_gpu
+        ufmt diff fbgemm_gpu/test
+        ufmt diff fbgemm_gpu/bench
+    - name: Check Meta copyright header
+      run: |
+        python fbgemm_gpu/test/lint/check_meta_header.py --path=./fbgemm_gpu/fbgemm_gpu --fixit=False
+        python fbgemm_gpu/test/lint/check_meta_header.py --path=./fbgemm_gpu/test --fixit=False
+        python fbgemm_gpu/test/lint/check_meta_header.py --path=./fbgemm_gpu/bench --fixit=False
diff --git a/fbgemm_gpu/bench/merge_embeddings_benchmark.py b/fbgemm_gpu/bench/merge_embeddings_benchmark.py
@@ -72,7 +72,7 @@ def generate_requests(
     E: int,
     # inter-batch indices reuse rate
     reuse: float = 0.0,
-) -> List[Tuple[torch.IntTensor, torch.IntTensor,]]:
+) -> List[Tuple[torch.IntTensor, torch.IntTensor, ]]:
     rs = []
     for gpu_num in range(num_gpus):
         all_indices = torch.randint(
diff --git a/fbgemm_gpu/fbgemm_gpu/_fbgemm_gpu_docs.py b/fbgemm_gpu/fbgemm_gpu/_fbgemm_gpu_docs.py
@@ -3,9 +3,11 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import torch
+
 import fbgemm_gpu
 import fbgemm_gpu.split_table_batched_embeddings_ops
-import torch
+
 
 Tensor = torch.Tensor
 
@@ -85,12 +87,12 @@ def add_docs(method, docstr):
     """
 dense_to_jagged(dense, x_offsets, total_L) -> (Tensor, Tensor[])
 
-Converts a dense tensor into a jagged tensor, given the desired offsets of the resulting dense tensor. 
+Converts a dense tensor into a jagged tensor, given the desired offsets of the resulting dense tensor.
 
 Args:
     dense (Tensor): A dense input tensor to be converted
 
-    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension. 
+    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
 
     total_L (int, Optional): Total number of values in the resulting jagged tensor.
 
@@ -119,7 +121,7 @@ def add_docs(method, docstr):
 Args:
     values (Tensor): Jagged tensor values
 
-    offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension. 
+    offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
 
     max_lengths (int[]): A list with max_length for each jagged dimension.
 
@@ -147,13 +149,13 @@ def add_docs(method, docstr):
     """
 jagged_dense_elementwise_add(x_values, x_offsets, y) -> Tensor
 
-Adds a jagged tensor to a dense tensor, resulting in dense tensor. Jagged 
+Adds a jagged tensor to a dense tensor, resulting in dense tensor. Jagged
 tensor input will be padded with zeros for the purposes of the addition.
 
 Args:
     x_values (Tensor): Jagged tensor values
 
-    offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension. 
+    offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
 
     y (Tensor): A dense tensor
 
@@ -174,7 +176,7 @@ def add_docs(method, docstr):
 Args:
     x_values (Tensor): Jagged tensor values
 
-    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension. 
+    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
 
     y (Tensor): A dense tensor
 
@@ -195,7 +197,7 @@ def add_docs(method, docstr):
 Args:
     x_values (Tensor): Jagged tensor values
 
-    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension. 
+    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
 
     y_0 (Tensor): A dense tensor
 
@@ -218,7 +220,7 @@ def add_docs(method, docstr):
 Args:
     x_values (Tensor): Jagged tensor values
 
-    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension. 
+    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
 
     y (Tensor): A dense tensor
 
diff --git a/fbgemm_gpu/fbgemm_gpu/docs/__init__.py b/fbgemm_gpu/fbgemm_gpu/docs/__init__.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/fbgemm_gpu/fbgemm_gpu/quantize_comm.py b/fbgemm_gpu/fbgemm_gpu/quantize_comm.py
@@ -14,6 +14,7 @@
 from typing import Optional, TypeVar
 
 import torch
+from torch.autograd.profiler import record_function
 
 from fbgemm_gpu.quantize_utils import (
     bf16_to_fp32,
@@ -24,7 +25,7 @@
     hfp8_to_fp32,
 )
 from fbgemm_gpu.split_embedding_configs import SparseType
-from torch.autograd.profiler import record_function
+
 
 logger: logging.Logger = logging.getLogger()
 
diff --git a/fbgemm_gpu/fbgemm_gpu/split_embedding_inference_converter.py b/fbgemm_gpu/fbgemm_gpu/split_embedding_inference_converter.py
@@ -11,12 +11,13 @@
 import math
 from typing import Optional, Tuple
 
-import fbgemm_gpu.split_table_batched_embeddings_ops as split_table_batched_embeddings_ops
 import numpy as np
 import torch
-from fbgemm_gpu.split_embedding_configs import QuantizationConfig, SparseType
 from torch import nn, Tensor
 
+import fbgemm_gpu.split_table_batched_embeddings_ops as split_table_batched_embeddings_ops
+from fbgemm_gpu.split_embedding_configs import QuantizationConfig, SparseType
+
 # TODO: add per-feature based converter option (based on embedding_specs during inference)
 # TODO: optimize embedding pruning and quantization latency.
 class SplitEmbInferenceConverter:
diff --git a/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops.py b/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops.py
@@ -14,11 +14,12 @@
 from math import log2
 from typing import Dict, List, NamedTuple, Optional, Tuple, Type, Union
 
-import fbgemm_gpu.split_embedding_codegen_lookup_invokers as invokers
 import torch
-from fbgemm_gpu.split_embedding_configs import EmbOptimType as OptimType, SparseType
 from torch import nn, Tensor
 
+import fbgemm_gpu.split_embedding_codegen_lookup_invokers as invokers
+from fbgemm_gpu.split_embedding_configs import EmbOptimType as OptimType, SparseType
+
 DEFAULT_ASSOC = 32 if torch.version.hip is None else 64
 # Maximum number of times prefetch() can be called without
 # a corresponding forward() call
diff --git a/fbgemm_gpu/fbgemm_gpu/uvm.py b/fbgemm_gpu/fbgemm_gpu/uvm.py
@@ -9,6 +9,7 @@
 from typing import Optional
 
 import torch
+
 from fbgemm_gpu.enums import create_enums
 
 try:
diff --git a/fbgemm_gpu/test/lint/check_meta_header.py b/fbgemm_gpu/test/lint/check_meta_header.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+"""Check Python source code contains Meta copyright header
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+
+import click
+
+
+def process_header(header, comment):
+    lines = header.split("\n")
+    new_lines = []
+    for line in lines:
+        if line is None or line == "":
+            new_lines.append(comment)
+        else:
+            new_lines.append(comment + " " + line)
+    return "\n".join(new_lines) + "\n"
+
+
+HEADER = """Copyright (c) Meta Platforms, Inc. and affiliates.
+All rights reserved.
+This source code is licensed under the BSD-style license found in the
+LICENSE file in the root directory of this source tree.
+"""
+HEADER_lines = HEADER.splitlines()[1:]
+PY_HEADER = process_header(HEADER, "#")
+CPP_HEADER = process_header(HEADER, "//")
+
+
+def dfs(root_path: str) -> list[str]:
+    """DFS source code tree to find python files missing header
+
+    Parameters
+    ----------
+    root_path : str
+        root source directory path
+
+    Returns
+    -------
+    list[str]
+        file list missing header
+    """
+    ret = []
+    for root, _, files in os.walk(root_path, topdown=False):
+        for name in files:
+            path = os.path.join(root, name)
+            if path.endswith(".py"):
+                with open(path) as fi:
+                    src = fi.read()
+                    flag = True
+                    for line in HEADER_lines:
+                        if line not in src:
+                            flag = False
+                            break
+                    if not flag:
+                        ret.append(path)
+    return ret
+
+
+def fix_header(file_list: list[str]) -> None:
+    """Adding Meta header to to source files
+
+    Parameters
+    ----------
+    file_list : list[str]
+        file list missing header
+    """
+    for path in file_list:
+        src = ""
+        with open(path) as fi:
+            src = fi.read()
+        with open(path, "w") as fo:
+            fo.write(PY_HEADER)
+            fo.write(src)
+
+
+@click.command()
+@click.option(
+    "--path", help="Root directory of source to be checked", required=True, type=str
+)
+@click.option(
+    "--fixit", default=False, help="Fix missing header", required=False, type=bool
+)
+def check_header(path, fixit):
+    ret = dfs(path)
+    if len(ret) == 0:
+        sys.exit(0)
+    print("Need to add Meta header to the following files.")
+    print("----------------File List----------------")
+    for line in ret:
+        print(line)
+    print("-----------------------------------------")
+    if fixit:
+        fix_header(ret)
+    sys.exit(1)
+
+
+if __name__ == "__main__":
+    check_header()
diff --git a/fbgemm_gpu/test/lint/flake8_problem_matcher.json b/fbgemm_gpu/test/lint/flake8_problem_matcher.json
@@ -0,0 +1,17 @@
+{
+  "problemMatcher": [
+    {
+      "owner": "flake8",
+      "severity": "error",
+      "pattern": [
+        {
+          "regexp": "^([^:]+):(\\d+):(\\d+):\\s+(.*)$",
+          "file": 1,
+          "line": 2,
+          "column": 3,
+          "message": 4
+        }
+      ]
+    }
+  ]
+}