Native convolution and dw convolution (#61)

alessandropalla · web-flow · commit fa4c82a5e0d1 · 2024-06-19T12:03:24.000+02:00
* Add bindings for convolution

* Add nn classes

* Fix bias shape

* Bugfix

* fix style

* Support depthwise convolution

* Add new tests

* Add fallback to older method
diff --git a/include/intel_npu_acceleration_library/nn_factory.h b/include/intel_npu_acceleration_library/nn_factory.h
@@ -99,11 +99,21 @@ class ModelFactory : public intel_npu_acceleration_library::OVInferenceModel {
      * @param pads_begin convolution padding begin
      * @param pads_ends convolution padding end
      * @param dilations convolution dilations
+     * @param groups convolution groups
      * @return ov::op::Op*
      */
     ov::op::Op* convolution(ov::op::Op* input, ov::op::Op*& weights, std::vector<size_t> strides,
                             std::vector<size_t> pads_begin, std::vector<size_t> pads_ends,
-                            std::vector<size_t> dilations) {
+                            std::vector<size_t> dilations, size_t groups = 1) {
+        if (groups > 1) {
+            auto conv = std::make_shared<ov::opset8::GroupConvolution>(
+                    input->output(0), weights->output(0), ov::Strides(strides),
+                    ov::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_begin.begin(), pads_begin.end())),
+                    ov::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_ends.begin(), pads_ends.end())),
+                    ov::Strides(dilations));
+            operations.push_back(conv);
+            return conv.get();
+        }
         auto conv = std::make_shared<ov::opset8::Convolution>(
                 input->output(0), weights->output(0), ov::Strides(strides),
                 ov::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_begin.begin(), pads_begin.end())),
diff --git a/intel_npu_acceleration_library/backend/__init__.py b/intel_npu_acceleration_library/backend/__init__.py
@@ -5,6 +5,7 @@
 from .bindings import lib
 from .utils import npu_available, get_driver_version, check_npu_and_driver_version
 from .mlp import MLP
+from .convolution import Convolution
 from .matmul import MatMul
 from .linear import Linear
 from .qmatmul import QMatMul
@@ -22,6 +23,7 @@
     "Linear",
     "QMatMul",
     "QLinear",
+    "Convolution",
     "SDPA",
     "run_matmul",
     "run_factory",
diff --git a/intel_npu_acceleration_library/backend/base.py b/intel_npu_acceleration_library/backend/base.py
@@ -25,7 +25,7 @@ def adapt_weight(w: np.ndarray) -> np.ndarray:
     elif len(w.shape) == 2:
         return w, w.shape
     else:
-        w_adapted = w.flatten().reshape((1, -1))
+        w_adapted = w.reshape((1, -1))
         return w_adapted, w_adapted.shape
 
 
diff --git a/intel_npu_acceleration_library/backend/bindings.py b/intel_npu_acceleration_library/backend/bindings.py
@@ -126,6 +126,26 @@ def init_network_factory(lib: ctypes.CDLL):
     ]
     lib.linear.restype = handler
 
+    lib.convolution.argtypes = [
+        handler,
+        handler,
+        ctypes.c_int,
+        c_u32_array,
+        ctypes.c_int,
+        c_u32_array,
+        ctypes.c_int,
+        c_u32_array,
+        ctypes.c_int,
+        c_u32_array,
+        ctypes.c_int,
+        c_u32_array,
+        ctypes.c_int,
+        ctypes.c_bool,
+        ctypes.c_char_p,
+        ctypes.c_char_p,
+    ]
+    lib.convolution.restype = handler
+
     for op in get_supported_ops():
         fn = getattr(lib, op.name)
         fn.argtypes = [handler] * (op.inputs + 1) + list(op.parameters)
diff --git a/intel_npu_acceleration_library/backend/convolution.py b/intel_npu_acceleration_library/backend/convolution.py
@@ -0,0 +1,71 @@
+#
+# Copyright © 2024 Intel Corporation
+# SPDX-License-Identifier: Apache 2.0
+#
+
+from intel_npu_acceleration_library.backend.factory import NNFactory
+from typing import Sequence, Union
+import numpy as np
+
+
+class Convolution(NNFactory):
+    """Linear class, computing a matrix matrix multiplication with weights prefetching."""
+
+    def __init__(
+        self,
+        input_shape: Sequence[int],
+        weights_shape: Sequence[int],
+        bias: bool = False,
+        strides: Union[int, Sequence[int]] = 1,
+        padding: Union[int, Sequence[int]] = 0,
+        dilation: Union[int, Sequence[int]] = 1,
+        groups: int = 1,
+        profile: bool = False,
+        device: str = "NPU",
+    ):
+        """Initialize the Linear class.
+
+        Args:
+            input_shape (Sequence[int]): input shape
+            weights_shape (Sequence[int]): weights shape
+            bias (bool): Enable/Disable bias. Defaults to False.
+            strides (Union[int, Sequence[int]], optional): Strides. Defaults to 1.
+            padding (Union[int, Sequence[int]], optional): Padding. Defaults to 0.
+            dilation (Union[int, Sequence[int]], optional): Dilation. Defaults to 1.
+            groups (int, optional): Groups. Defaults to 1.
+            profile (Optional[bool], optional): Enable/Disable profiling. Defaults to False.
+            device (str): Target device, default to "NPU".
+        """
+        super().__init__(profile, device)
+        input = self.parameter(input_shape)
+
+        # Get the number of spatial dimensions
+        n_spatial_dims = len(input_shape) - 2
+
+        if isinstance(strides, int):
+            strides = [strides] * n_spatial_dims
+
+        if isinstance(padding, int):
+            padding_begins = [padding] * n_spatial_dims
+            padding_ends = [padding] * n_spatial_dims
+        else:
+            padding_begins = list(padding)
+            padding_ends = list(padding)
+
+        if isinstance(dilation, int):
+            dilation = [dilation] * n_spatial_dims
+
+        conv = self.convolution(
+            input,
+            weights_shape,
+            bias=bias,
+            strides=strides,
+            padding_begins=padding_begins,
+            padding_ends=padding_ends,
+            dilation=dilation,
+            groups=groups,
+            act_dtype=np.float16,
+            wt_dtype=np.float16,
+        )
+
+        self.compile(conv)
diff --git a/intel_npu_acceleration_library/backend/factory.py b/intel_npu_acceleration_library/backend/factory.py
@@ -6,7 +6,7 @@
 from intel_npu_acceleration_library.backend.base import BaseNPUBackendWithPrefetch
 from intel_npu_acceleration_library.backend.ops import get_supported_ops
 from intel_npu_acceleration_library.backend.bindings import lib as backend_lib
-from typing import Optional, Tuple, Any, Union
+from typing import Optional, Tuple, Any, Union, Sequence
 from functools import partial
 import numpy.typing as npt
 import numpy as np
@@ -75,12 +75,12 @@ def get_backend_dtype(self, dtype) -> ctypes.c_char_p:
         return ctypes.c_char_p(str_dtype.encode())
 
     def parameter(
-        self, shape: Tuple[int, int], dtype: npt.DTypeLike = np.float16
+        self, shape: Sequence[int], dtype: npt.DTypeLike = np.float16
     ) -> ctypes._Pointer:
         """Generate a model input parameter.
 
         Args:
-            shape (Tuple[int, int]): Parameter shape (only 2D tensors supported atm)
+            shape (Sequence[int]): Parameter shape
             dtype (np.dtype, optional): parameter type np.int8, np.uint8 and np.float16 supported. Defaults to np.float16. Unit8 represents packed i4 dtypes
 
         Returns:
@@ -92,6 +92,61 @@ def parameter(
             self._mm, shape_ptr.size, shape_ptr, self.get_backend_dtype(dtype)
         )
 
+    def convolution(
+        self,
+        input_node: ctypes._Pointer,
+        weights_shape: Sequence[int],
+        bias: bool,
+        strides: Sequence[int] = (1, 1),
+        padding_begins: Sequence[int] = (0, 0),
+        padding_ends: Sequence[int] = (0, 0),
+        dilation: Sequence[int] = (1, 1),
+        groups: int = 1,
+        act_dtype: npt.DTypeLike = np.float16,
+        wt_dtype: npt.DTypeLike = np.float16,
+    ) -> ctypes._Pointer:
+        """Generate a convolution layer.
+
+        Args:
+            input_node (ctypes._Pointer): layer input node
+            weights_shape (Sequence[int]): weights shape
+            strides (Sequence[int]): strides
+            padding_begins (Sequence[int]): padding
+            padding_ends (Sequence[int]): padding
+            dilation (Sequence[int]): dilation
+            groups (int): groups
+            bias (bool): enable/disable bias
+            act_dtype (npt.DTypeLike, optional): activation dtype. Defaults to np.float16.
+            wt_dtype (npt.DTypeLike, optional): weight dtype. Defaults to np.float16.
+
+        Returns:
+            ctypes._Pointer: output node
+        """
+        weights_shape_ptr = np.array(weights_shape, dtype=np.uint32)
+        strides_ptr = np.array(strides, dtype=np.uint32)
+        padding_begins_ptr = np.array(padding_begins, dtype=np.uint32)
+        padding_ends_ptr = np.array(padding_ends, dtype=np.uint32)
+        dilation_ptr = np.array(dilation, dtype=np.uint32)
+
+        return backend_lib.convolution(
+            self._mm,
+            input_node,
+            weights_shape_ptr.size,
+            weights_shape_ptr,
+            strides_ptr.size,
+            strides_ptr,
+            padding_begins_ptr.size,
+            padding_begins_ptr,
+            padding_ends_ptr.size,
+            padding_ends_ptr,
+            dilation_ptr.size,
+            dilation_ptr,
+            groups,
+            bias,
+            self.get_backend_dtype(act_dtype),
+            self.get_backend_dtype(wt_dtype),
+        )
+
     def linear(
         self,
         input_node: ctypes._Pointer,
@@ -112,7 +167,7 @@ def linear(
             wt_dtype (npt.DTypeLike, optional): weight dtype. Defaults to np.float16.
 
         Returns:
-            ctypes._Pointer: _description_
+            ctypes._Pointer: output node
         """
         return backend_lib.linear(
             self._mm,
diff --git a/intel_npu_acceleration_library/nn/conv.py b/intel_npu_acceleration_library/nn/conv.py
@@ -3,13 +3,17 @@
 # SPDX-License-Identifier: Apache 2.0
 #
 
-import intel_npu_acceleration_library.nn as nn
+from intel_npu_acceleration_library.backend import run_factory, Convolution
+from intel_npu_acceleration_library.nn import Linear
+from typing import Optional, Sequence, Union
+from functools import partial
 import torch
+import uuid
 
 
-class Conv2d(torch.nn.Module):
+class Im2ColConv2d(torch.nn.Module):
     """
-    2D convolutional layer implementation.
+    2D convolutional layer implementation using Im2Col.
 
     Attrs:
         weight (torch.Tensor): The weight tensor of the layer.
@@ -118,7 +122,7 @@ def forward(self, x) -> torch.Tensor:
         return out
 
     @staticmethod
-    def fromTorch(layer, dtype: torch.dtype = torch.float16) -> "Conv2d":
+    def fromTorch(layer, dtype: torch.dtype = torch.float16) -> "Im2ColConv2d":
         """
         Create a Conv2d layer from a torch.nn.Conv2d layer.
 
@@ -127,11 +131,11 @@ def fromTorch(layer, dtype: torch.dtype = torch.float16) -> "Conv2d":
             dtype (torch.dtype, optional): Data type of the layer.
 
         Returns:
-            Conv2d: The converted Conv2d layer.
+            Im2ColConv2d: The converted Im2ColConv2d layer.
         """
         weight = layer.weight.view(layer.weight.shape[0], -1)
-        matmul = nn.Linear.fromTensor(weight, getattr(layer, "bias", None), dtype)
-        new_layer = Conv2d(
+        matmul = Linear.fromTensor(weight, getattr(layer, "bias", None), dtype)
+        new_layer = Im2ColConv2d(
             matmul,
             layer.in_channels,
             layer.out_channels,
@@ -142,3 +146,112 @@ def fromTorch(layer, dtype: torch.dtype = torch.float16) -> "Conv2d":
         )
 
         return new_layer
+
+
+class Conv2d(torch.nn.Module):
+    """
+    2D convolutional layer implementation.
+
+    Attrs:
+        weight (torch.Tensor): The weight tensor of the layer.
+        bias (torch.Tensor): The bias tensor of the layer.
+    """
+
+    def __init__(
+        self,
+        weights: torch.Tensor,
+        bias: Optional[torch.Tensor] = None,
+        strides: Union[int, Sequence[int]] = 1,
+        padding: Union[int, Sequence[int]] = 0,
+        dilation: Union[int, Sequence[int]] = 1,
+        groups: int = 1,
+    ) -> None:
+        """Initialize a Convolutional layer.
+
+        Args:
+            weights (torch.Tensor): The weight tensor of the layer.
+            bias (Optional[torch.Tensor], optional): The bias tensor of the layer. Defaults to None.
+            strides (Union[int, Sequence[int]], optional): Strides. Defaults to 1.
+            padding (Union[int, Sequence[int]], optional): Padding. Defaults to 0.
+            dilation (Union[int, Sequence[int]], optional): Dilation. Defaults to 1.
+            groups (int, optional): Groups. Defaults to 1.
+        """
+        super().__init__()
+
+        self.op_id = str(uuid.uuid4())
+        if groups > 1:
+            new_shape = [groups, weights.shape[0] // groups] + list(weights.shape[1:])
+            weights = weights.view(*new_shape)
+
+        self.parameters = [weights]
+        if bias is not None:
+            self.parameters.append(bias)
+        self.backend_cls = partial(
+            Convolution,
+            weights_shape=weights.shape,
+            bias=bias is not None,
+            strides=strides,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+        )
+
+    @property
+    def weight(self) -> torch.Tensor:
+        """
+        Get the weight tensor of the layer.
+
+        Returns:
+            torch.Tensor: The weight tensor.
+        """
+        return self.parameters[0]
+
+    @property
+    def bias(self) -> torch.Tensor:
+        """
+        Get the bias tensor of the layer.
+
+        Returns:
+            torch.Tensor: The bias tensor.
+        """
+        if len(self.parameters) > 1:
+            return self.parameters[1]
+        return None
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Torch module forward method.
+
+        Args:
+            x (torch.Tensor): Input tensor
+
+        Returns:
+            torch.Tensor: result
+        """
+        return run_factory(x, self.parameters, self.backend_cls, self.op_id)
+
+    @staticmethod
+    def fromTorch(layer, dtype: torch.dtype = torch.float16) -> "Conv2d":
+        """
+        Create a Conv2d layer from a torch.nn.Conv2d layer.
+
+        Args:
+            layer (torch.nn.Conv2d): The torch Conv2d layer.
+            dtype (torch.dtype, optional): Data type of the layer.
+
+        Returns:
+            Conv2d: The converted Conv2d layer.
+        """
+        # In case of unsupported configuration, fallback to Im2ColConv2d
+        if any(dim > 11 for dim in layer.kernel_size):
+            return Im2ColConv2d.fromTorch(layer, dtype)
+
+        new_layer = Conv2d(
+            layer.weight,
+            layer.bias,
+            layer.stride,
+            layer.padding,
+            layer.dilation,
+            layer.groups,
+        )
+
+        return new_layer
diff --git a/src/bindings.cpp b/src/bindings.cpp
diff --git a/test/python/test_conv.py b/test/python/test_conv.py