Add torch.nn.functional.conv2d (#70)

alessandropalla · web-flow · commit ed0993bd5df8 · 2024-06-24T13:24:07.000+02:00
* Refactor convolution code

* Support torch.nn.functional.conv2d

* Support for same and valid padding

* View supports also args
diff --git a/intel_npu_acceleration_library/backend/bindings.py b/intel_npu_acceleration_library/backend/bindings.py
@@ -161,6 +161,8 @@ def init_network_factory(lib: ctypes.CDLL):
     lib.linear.restype = handler
 
     lib.convolution.argtypes = [
+        handler,
+        handler,
         handler,
         handler,
         ctypes.c_int,
@@ -172,10 +174,6 @@ def init_network_factory(lib: ctypes.CDLL):
         ctypes.c_int,
         c_u32_array,
         ctypes.c_int,
-        c_u32_array,
-        ctypes.c_int,
-        ctypes.c_bool,
-        ctypes.c_char_p,
         ctypes.c_char_p,
     ]
     lib.convolution.restype = handler
diff --git a/intel_npu_acceleration_library/backend/convolution.py b/intel_npu_acceleration_library/backend/convolution.py
@@ -38,34 +38,21 @@ def __init__(
         """
         super().__init__(profile, device)
         input = self.parameter(input_shape)
-
-        # Get the number of spatial dimensions
-        n_spatial_dims = len(input_shape) - 2
-
-        if isinstance(strides, int):
-            strides = [strides] * n_spatial_dims
-
-        if isinstance(padding, int):
-            padding_begins = [padding] * n_spatial_dims
-            padding_ends = [padding] * n_spatial_dims
+        weights = self.parameter(weights_shape)
+        if bias is not None:
+            bias_node = self.parameter((1, weights_shape[0], 1, 1))
         else:
-            padding_begins = list(padding)
-            padding_ends = list(padding)
-
-        if isinstance(dilation, int):
-            dilation = [dilation] * n_spatial_dims
+            bias_node = None
 
         conv = self.convolution(
             input,
-            weights_shape,
-            bias=bias,
+            weights,
+            bias=bias_node,
             strides=strides,
-            padding_begins=padding_begins,
-            padding_ends=padding_ends,
+            padding=padding,
             dilation=dilation,
             groups=groups,
             act_dtype=np.float16,
-            wt_dtype=np.float16,
         )
 
         self.compile(conv)
diff --git a/intel_npu_acceleration_library/backend/factory.py b/intel_npu_acceleration_library/backend/factory.py
@@ -188,6 +188,8 @@ def constant(
             data = np.array([data], dtype=np.float32)
         elif isinstance(data, torch.Tensor):
             data = data.detach().numpy()
+        elif data is None:
+            return ctypes.cast(ctypes.c_void_p(0), ctypes.POINTER(ctypes.c_char))
 
         dst = data.ctypes.data_as(ctypes.c_void_p)
         shape_ptr = np.array(data.shape, dtype=np.uint32)
@@ -199,44 +201,59 @@ def constant(
     def convolution(
         self,
         input_node: ctypes._Pointer,
-        weights_shape: Sequence[int],
-        bias: bool,
-        strides: Sequence[int] = (1, 1),
-        padding_begins: Sequence[int] = (0, 0),
-        padding_ends: Sequence[int] = (0, 0),
-        dilation: Sequence[int] = (1, 1),
+        weights_node: ctypes._Pointer,
+        bias: Optional[ctypes._Pointer] = None,
+        strides: Union[int, Sequence[int]] = 1,
+        padding: Union[int, Sequence[int]] = 0,
+        dilation: Union[int, Sequence[int]] = 1,
         groups: int = 1,
         act_dtype: npt.DTypeLike = np.float16,
-        wt_dtype: npt.DTypeLike = np.float16,
+        n_spatial_dims: int = 2,
     ) -> ctypes._Pointer:
         """Generate a convolution layer.
 
         Args:
             input_node (ctypes._Pointer): layer input node
-            weights_shape (Sequence[int]): weights shape
+            weights_node (ctypes._Pointer): weights node
+            bias (Optional[ctypes._Pointer}): bias node
             strides (Sequence[int]): strides
-            padding_begins (Sequence[int]): padding
-            padding_ends (Sequence[int]): padding
+            padding (Sequence[int]): padding
             dilation (Sequence[int]): dilation
             groups (int): groups
-            bias (bool): enable/disable bias
             act_dtype (npt.DTypeLike, optional): activation dtype. Defaults to np.float16.
-            wt_dtype (npt.DTypeLike, optional): weight dtype. Defaults to np.float16.
+            n_spatial_dims (int): number of spatial dimensions
 
         Returns:
             ctypes._Pointer: output node
         """
-        weights_shape_ptr = np.array(weights_shape, dtype=np.uint32)
+        if isinstance(strides, int):
+            strides = [strides] * n_spatial_dims
+
+        if isinstance(padding, int):
+            padding_begins = [padding] * n_spatial_dims
+            padding_ends = [padding] * n_spatial_dims
+        else:
+            padding_begins = list(padding)
+            padding_ends = list(padding)
+
+        if isinstance(dilation, int):
+            dilation = [dilation] * n_spatial_dims
+
         strides_ptr = np.array(strides, dtype=np.uint32)
         padding_begins_ptr = np.array(padding_begins, dtype=np.uint32)
         padding_ends_ptr = np.array(padding_ends, dtype=np.uint32)
         dilation_ptr = np.array(dilation, dtype=np.uint32)
 
+        if bias is not None:
+            bias_node = bias
+        else:
+            bias_node = ctypes.cast(ctypes.c_void_p(0), ctypes.POINTER(ctypes.c_char))
+
         return backend_lib.convolution(
             self._mm,
             input_node,
-            weights_shape_ptr.size,
-            weights_shape_ptr,
+            weights_node,
+            bias_node,
             strides_ptr.size,
             strides_ptr,
             padding_begins_ptr.size,
@@ -246,9 +263,7 @@ def convolution(
             dilation_ptr.size,
             dilation_ptr,
             groups,
-            bias,
             self.get_backend_dtype(act_dtype),
-            self.get_backend_dtype(wt_dtype),
         )
 
     @return_tensor
diff --git a/intel_npu_acceleration_library/backend/tensor.py b/intel_npu_acceleration_library/backend/tensor.py
@@ -335,16 +335,19 @@ def reshape(self, *shape: Union[int, Sequence[int]]) -> "Tensor":
             shape = shape[0]  # type: ignore
         return generate_op([self], "reshape", shape)
 
-    def view(self, shape: Sequence[int]) -> "Tensor":
+    def view(self, *shape: Union[Sequence[int], int]) -> "Tensor":
         """
         Return the transpose of the tensor.
 
         Args:
-            shape (Sequence[int]): The new shape of the tensor.
+            shape (Union[Sequence[int], int]): The new shape of the tensor.
 
         Returns:
             Tensor: The transposed tensor.
         """
+        if len(shape) == 1 and isinstance(shape[0], (list, tuple)):
+            shape = shape[0]  # type: ignore
+
         return self.reshape(*shape)
 
     def flatten(self, start_dim=0, end_dim=-1) -> "Tensor":
diff --git a/intel_npu_acceleration_library/nn/conv.py b/intel_npu_acceleration_library/nn/conv.py
@@ -189,7 +189,7 @@ def __init__(
         self.backend_cls = partial(
             Convolution,
             weights_shape=weights.shape,
-            bias=bias is not None,
+            bias=bias,
             strides=strides,
             padding=padding,
             dilation=dilation,
diff --git a/intel_npu_acceleration_library/nn/functional.py b/intel_npu_acceleration_library/nn/functional.py
@@ -928,3 +928,57 @@ def batch_norm(
         result = result + bias.view(1, -1, 1, 1)
 
     return result
+
+
+@implements(torch.nn.functional.conv2d)
+def conv2d(
+    input: Tensor,
+    weight: Union[Tensor, torch.Tensor],
+    bias: Optional[Union[Tensor, torch.Tensor]] = None,
+    stride: int = 1,
+    padding: Union[int, str] = 0,
+    dilation: int = 1,
+    groups: int = 1,
+) -> Tensor:
+    """Generate a convolution layer.
+
+    Args:
+        input (Tensor): layer input node
+        weight (Union[Tensor, torch.Tensor]): weight
+        bias (Union[Tensor, torch.Tensor]): bias
+        stride (int): stride
+        padding (Union[int, str]): padding
+        dilation (int): dilation
+        groups (int): groups
+
+    Raises:
+        ValueError: Padding mode not supported
+
+    Returns:
+        Tensor: output node
+    """
+    if isinstance(padding, str):
+        if padding == "valid":
+            padding = 0
+        elif padding == "same":
+            padding = weight.shape[2] // 2
+        else:
+            raise ValueError(f"Padding mode {padding} not supported")
+
+    if bias is not None:
+        bias = bias.view((1, weight.shape[0], 1, 1))
+
+    if groups > 1:
+        new_shape = [groups, weight.shape[0] // groups] + list(weight.shape[1:])
+        weight = weight.view(new_shape)
+
+    conv = generate_op(
+        [input, weight, bias],
+        "convolution",
+        strides=stride,
+        padding=padding,
+        dilation=dilation,
+        groups=groups,
+    )
+
+    return conv
diff --git a/src/bindings.cpp b/src/bindings.cpp
@@ -425,24 +425,25 @@ intel_npu_acceleration_library_DLL_API ov::op::Op* linear(intel_npu_acceleration
     return mm;
 }
 
-intel_npu_acceleration_library_DLL_API ov::op::Op* convolution(
-        intel_npu_acceleration_library::ModelFactory* factory, ov::op::Op* in0, size_t weight_shape_size,
-        unsigned int* weight_shape_data, size_t strides_size, unsigned int* strides_data, size_t pad_begins_size,
-        unsigned int* pad_begins_data, size_t pad_ends_size, unsigned int* pad_ends_data, size_t dilations_size,
-        unsigned int* dilations_data, size_t groups, bool bias, char* act_dtype, char* wt_dtype) {
+intel_npu_acceleration_library_DLL_API ov::op::Op* convolution(intel_npu_acceleration_library::ModelFactory* factory,
+                                                               ov::op::Op* in0, ov::op::Op* weights, ov::op::Op* bias,
+                                                               size_t strides_size, unsigned int* strides_data,
+                                                               size_t pad_begins_size, unsigned int* pad_begins_data,
+                                                               size_t pad_ends_size, unsigned int* pad_ends_data,
+                                                               size_t dilations_size, unsigned int* dilations_data,
+                                                               size_t groups, char* act_dtype) {
     ov::element::Type_t act_ov_dtype = intel_npu_acceleration_library::dtype_from_string(std::string(act_dtype));
-    ov::element::Type_t wt_ov_dtype = intel_npu_acceleration_library::dtype_from_string(std::string(wt_dtype));
 
     // Create vectors from the input data
-    std::vector<size_t> weight_shape(weight_shape_data, weight_shape_data + weight_shape_size);
     std::vector<size_t> strides(strides_data, strides_data + strides_size);
     std::vector<size_t> pad_begins(pad_begins_data, pad_begins_data + pad_begins_size);
     std::vector<size_t> pad_ends(pad_ends_data, pad_ends_data + pad_ends_size);
     std::vector<size_t> dilations(dilations_data, dilations_data + dilations_size);
 
-    bool quantized = wt_ov_dtype == ov::element::Type_t::i8 || wt_ov_dtype == ov::element::Type_t::i4;
+    auto weight_shape = weights->get_output_shape(0);
+    auto wt_ov_dtype = static_cast<ov::element::Type_t>(weights->get_output_element_type(0));
 
-    auto weights = factory->parameter(weight_shape, wt_ov_dtype);
+    bool quantized = wt_ov_dtype == ov::element::Type_t::i8 || wt_ov_dtype == ov::element::Type_t::i4;
 
     if (quantized) {
         weights = factory->convert_to(weights, act_ov_dtype);
@@ -459,7 +460,6 @@ intel_npu_acceleration_library_DLL_API ov::op::Op* convolution(
     }
 
     if (bias) {
-        auto bias = factory->parameter({1, weight_shape[0], 1, 1}, act_ov_dtype);
         return factory->eltwise_add(mm, bias);
     }
     return mm;
diff --git a/test/python/test_op.py b/test/python/test_op.py
@@ -322,3 +322,52 @@ def test_batch_norm(shape, mean, variance, weight, bias):
     result = model.run(x.numpy())
 
     assert 1 - r2_score(reference.flatten(), result.flatten()) < 0.01
+
+
+@pytest.mark.parametrize("in_channels", [32, 128, 256])
+@pytest.mark.parametrize("out_channels", [32, 128, 256])
+@pytest.mark.parametrize("kernels", [1, 3])
+@pytest.mark.parametrize("dim", [16, 32])
+@pytest.mark.parametrize("bias", [True, False])
+@pytest.mark.parametrize("dtype", [torch.float16])
+@pytest.mark.parametrize("stride", [1, 2])
+@pytest.mark.parametrize("padding", [0, 1, "same", "valid"])
+@pytest.mark.parametrize("groups", [1, -1])
+def test_conv(
+    in_channels, out_channels, kernels, dim, bias, dtype, stride, padding, groups
+):
+    torch.manual_seed(42)
+
+    if groups != 1 and in_channels != out_channels:
+        pytest.skip("DW convolutions require in_channels == out_channels")
+
+    if padding == "same" and stride > 1:
+        pytest.skip("padding='same' is not supported for strided convolutions")
+
+    if groups == -1:
+        groups = in_channels
+
+    x = torch.rand((1, in_channels, dim, dim)).to(torch.float16)
+
+    weight = torch.rand((out_channels, in_channels // groups, kernels, kernels)).to(
+        torch.float16
+    )
+    bias = torch.rand((out_channels,)).to(torch.float16) if bias else None
+
+    reference = (
+        torch.nn.functional.conv2d(x, weight, bias, stride, padding, groups=groups)
+        .detach()
+        .numpy()
+    )
+
+    model = NNFactory()
+    par = model.parameter(x.shape, np.float16)
+
+    out = torch.nn.functional.conv2d(par, weight, bias, stride, padding, groups=groups)
+    model.compile(out)
+
+    assert out.shape == list(reference.shape)
+
+    result = model.run(x.numpy())
+
+    assert 1 - r2_score(reference.flatten(), result.flatten()) < 0.01
diff --git a/test/python/test_tensor.py b/test/python/test_tensor.py
@@ -99,7 +99,11 @@ def test_model_creation():
 
     assert ff.dim() == 3
 
-    model.compile(ff)
+    gg = ff.view(1, -1, 1, 1)
+
+    assert gg.shape == [1, 32 * 128 * 64, 1, 1]
+
+    model.compile(gg)
 
 
 def test_slice():