Skip to content
This repository was archived by the owner on Apr 24, 2025. It is now read-only.

Commit fa4c82a

Browse files
Native convolution and dw convolution (#61)
* Add bindings for convolution * Add nn classes * Fix bias shape * Bugfix * fix style * Support depthwise convolution * Add new tests * Add fallback to older method
1 parent bcb1315 commit fa4c82a

File tree

9 files changed

+371
-25
lines changed

9 files changed

+371
-25
lines changed

include/intel_npu_acceleration_library/nn_factory.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,21 @@ class ModelFactory : public intel_npu_acceleration_library::OVInferenceModel {
9999
* @param pads_begin convolution padding begin
100100
* @param pads_ends convolution padding end
101101
* @param dilations convolution dilations
102+
* @param groups convolution groups
102103
* @return ov::op::Op*
103104
*/
104105
ov::op::Op* convolution(ov::op::Op* input, ov::op::Op*& weights, std::vector<size_t> strides,
105106
std::vector<size_t> pads_begin, std::vector<size_t> pads_ends,
106-
std::vector<size_t> dilations) {
107+
std::vector<size_t> dilations, size_t groups = 1) {
108+
if (groups > 1) {
109+
auto conv = std::make_shared<ov::opset8::GroupConvolution>(
110+
input->output(0), weights->output(0), ov::Strides(strides),
111+
ov::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_begin.begin(), pads_begin.end())),
112+
ov::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_ends.begin(), pads_ends.end())),
113+
ov::Strides(dilations));
114+
operations.push_back(conv);
115+
return conv.get();
116+
}
107117
auto conv = std::make_shared<ov::opset8::Convolution>(
108118
input->output(0), weights->output(0), ov::Strides(strides),
109119
ov::CoordinateDiff(std::vector<std::ptrdiff_t>(pads_begin.begin(), pads_begin.end())),

intel_npu_acceleration_library/backend/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from .bindings import lib
66
from .utils import npu_available, get_driver_version, check_npu_and_driver_version
77
from .mlp import MLP
8+
from .convolution import Convolution
89
from .matmul import MatMul
910
from .linear import Linear
1011
from .qmatmul import QMatMul
@@ -22,6 +23,7 @@
2223
"Linear",
2324
"QMatMul",
2425
"QLinear",
26+
"Convolution",
2527
"SDPA",
2628
"run_matmul",
2729
"run_factory",

intel_npu_acceleration_library/backend/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def adapt_weight(w: np.ndarray) -> np.ndarray:
2525
elif len(w.shape) == 2:
2626
return w, w.shape
2727
else:
28-
w_adapted = w.flatten().reshape((1, -1))
28+
w_adapted = w.reshape((1, -1))
2929
return w_adapted, w_adapted.shape
3030

3131

intel_npu_acceleration_library/backend/bindings.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,26 @@ def init_network_factory(lib: ctypes.CDLL):
126126
]
127127
lib.linear.restype = handler
128128

129+
lib.convolution.argtypes = [
130+
handler,
131+
handler,
132+
ctypes.c_int,
133+
c_u32_array,
134+
ctypes.c_int,
135+
c_u32_array,
136+
ctypes.c_int,
137+
c_u32_array,
138+
ctypes.c_int,
139+
c_u32_array,
140+
ctypes.c_int,
141+
c_u32_array,
142+
ctypes.c_int,
143+
ctypes.c_bool,
144+
ctypes.c_char_p,
145+
ctypes.c_char_p,
146+
]
147+
lib.convolution.restype = handler
148+
129149
for op in get_supported_ops():
130150
fn = getattr(lib, op.name)
131151
fn.argtypes = [handler] * (op.inputs + 1) + list(op.parameters)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#
2+
# Copyright © 2024 Intel Corporation
3+
# SPDX-License-Identifier: Apache 2.0
4+
#
5+
6+
from intel_npu_acceleration_library.backend.factory import NNFactory
7+
from typing import Sequence, Union
8+
import numpy as np
9+
10+
11+
class Convolution(NNFactory):
12+
"""Linear class, computing a matrix matrix multiplication with weights prefetching."""
13+
14+
def __init__(
15+
self,
16+
input_shape: Sequence[int],
17+
weights_shape: Sequence[int],
18+
bias: bool = False,
19+
strides: Union[int, Sequence[int]] = 1,
20+
padding: Union[int, Sequence[int]] = 0,
21+
dilation: Union[int, Sequence[int]] = 1,
22+
groups: int = 1,
23+
profile: bool = False,
24+
device: str = "NPU",
25+
):
26+
"""Initialize the Linear class.
27+
28+
Args:
29+
input_shape (Sequence[int]): input shape
30+
weights_shape (Sequence[int]): weights shape
31+
bias (bool): Enable/Disable bias. Defaults to False.
32+
strides (Union[int, Sequence[int]], optional): Strides. Defaults to 1.
33+
padding (Union[int, Sequence[int]], optional): Padding. Defaults to 0.
34+
dilation (Union[int, Sequence[int]], optional): Dilation. Defaults to 1.
35+
groups (int, optional): Groups. Defaults to 1.
36+
profile (Optional[bool], optional): Enable/Disable profiling. Defaults to False.
37+
device (str): Target device, default to "NPU".
38+
"""
39+
super().__init__(profile, device)
40+
input = self.parameter(input_shape)
41+
42+
# Get the number of spatial dimensions
43+
n_spatial_dims = len(input_shape) - 2
44+
45+
if isinstance(strides, int):
46+
strides = [strides] * n_spatial_dims
47+
48+
if isinstance(padding, int):
49+
padding_begins = [padding] * n_spatial_dims
50+
padding_ends = [padding] * n_spatial_dims
51+
else:
52+
padding_begins = list(padding)
53+
padding_ends = list(padding)
54+
55+
if isinstance(dilation, int):
56+
dilation = [dilation] * n_spatial_dims
57+
58+
conv = self.convolution(
59+
input,
60+
weights_shape,
61+
bias=bias,
62+
strides=strides,
63+
padding_begins=padding_begins,
64+
padding_ends=padding_ends,
65+
dilation=dilation,
66+
groups=groups,
67+
act_dtype=np.float16,
68+
wt_dtype=np.float16,
69+
)
70+
71+
self.compile(conv)

intel_npu_acceleration_library/backend/factory.py

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from intel_npu_acceleration_library.backend.base import BaseNPUBackendWithPrefetch
77
from intel_npu_acceleration_library.backend.ops import get_supported_ops
88
from intel_npu_acceleration_library.backend.bindings import lib as backend_lib
9-
from typing import Optional, Tuple, Any, Union
9+
from typing import Optional, Tuple, Any, Union, Sequence
1010
from functools import partial
1111
import numpy.typing as npt
1212
import numpy as np
@@ -75,12 +75,12 @@ def get_backend_dtype(self, dtype) -> ctypes.c_char_p:
7575
return ctypes.c_char_p(str_dtype.encode())
7676

7777
def parameter(
78-
self, shape: Tuple[int, int], dtype: npt.DTypeLike = np.float16
78+
self, shape: Sequence[int], dtype: npt.DTypeLike = np.float16
7979
) -> ctypes._Pointer:
8080
"""Generate a model input parameter.
8181
8282
Args:
83-
shape (Tuple[int, int]): Parameter shape (only 2D tensors supported atm)
83+
shape (Sequence[int]): Parameter shape
8484
dtype (np.dtype, optional): parameter type np.int8, np.uint8 and np.float16 supported. Defaults to np.float16. Unit8 represents packed i4 dtypes
8585
8686
Returns:
@@ -92,6 +92,61 @@ def parameter(
9292
self._mm, shape_ptr.size, shape_ptr, self.get_backend_dtype(dtype)
9393
)
9494

95+
def convolution(
96+
self,
97+
input_node: ctypes._Pointer,
98+
weights_shape: Sequence[int],
99+
bias: bool,
100+
strides: Sequence[int] = (1, 1),
101+
padding_begins: Sequence[int] = (0, 0),
102+
padding_ends: Sequence[int] = (0, 0),
103+
dilation: Sequence[int] = (1, 1),
104+
groups: int = 1,
105+
act_dtype: npt.DTypeLike = np.float16,
106+
wt_dtype: npt.DTypeLike = np.float16,
107+
) -> ctypes._Pointer:
108+
"""Generate a convolution layer.
109+
110+
Args:
111+
input_node (ctypes._Pointer): layer input node
112+
weights_shape (Sequence[int]): weights shape
113+
strides (Sequence[int]): strides
114+
padding_begins (Sequence[int]): padding
115+
padding_ends (Sequence[int]): padding
116+
dilation (Sequence[int]): dilation
117+
groups (int): groups
118+
bias (bool): enable/disable bias
119+
act_dtype (npt.DTypeLike, optional): activation dtype. Defaults to np.float16.
120+
wt_dtype (npt.DTypeLike, optional): weight dtype. Defaults to np.float16.
121+
122+
Returns:
123+
ctypes._Pointer: output node
124+
"""
125+
weights_shape_ptr = np.array(weights_shape, dtype=np.uint32)
126+
strides_ptr = np.array(strides, dtype=np.uint32)
127+
padding_begins_ptr = np.array(padding_begins, dtype=np.uint32)
128+
padding_ends_ptr = np.array(padding_ends, dtype=np.uint32)
129+
dilation_ptr = np.array(dilation, dtype=np.uint32)
130+
131+
return backend_lib.convolution(
132+
self._mm,
133+
input_node,
134+
weights_shape_ptr.size,
135+
weights_shape_ptr,
136+
strides_ptr.size,
137+
strides_ptr,
138+
padding_begins_ptr.size,
139+
padding_begins_ptr,
140+
padding_ends_ptr.size,
141+
padding_ends_ptr,
142+
dilation_ptr.size,
143+
dilation_ptr,
144+
groups,
145+
bias,
146+
self.get_backend_dtype(act_dtype),
147+
self.get_backend_dtype(wt_dtype),
148+
)
149+
95150
def linear(
96151
self,
97152
input_node: ctypes._Pointer,
@@ -112,7 +167,7 @@ def linear(
112167
wt_dtype (npt.DTypeLike, optional): weight dtype. Defaults to np.float16.
113168
114169
Returns:
115-
ctypes._Pointer: _description_
170+
ctypes._Pointer: output node
116171
"""
117172
return backend_lib.linear(
118173
self._mm,

intel_npu_acceleration_library/nn/conv.py

Lines changed: 120 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,17 @@
33
# SPDX-License-Identifier: Apache 2.0
44
#
55

6-
import intel_npu_acceleration_library.nn as nn
6+
from intel_npu_acceleration_library.backend import run_factory, Convolution
7+
from intel_npu_acceleration_library.nn import Linear
8+
from typing import Optional, Sequence, Union
9+
from functools import partial
710
import torch
11+
import uuid
812

913

10-
class Conv2d(torch.nn.Module):
14+
class Im2ColConv2d(torch.nn.Module):
1115
"""
12-
2D convolutional layer implementation.
16+
2D convolutional layer implementation using Im2Col.
1317
1418
Attrs:
1519
weight (torch.Tensor): The weight tensor of the layer.
@@ -118,7 +122,7 @@ def forward(self, x) -> torch.Tensor:
118122
return out
119123

120124
@staticmethod
121-
def fromTorch(layer, dtype: torch.dtype = torch.float16) -> "Conv2d":
125+
def fromTorch(layer, dtype: torch.dtype = torch.float16) -> "Im2ColConv2d":
122126
"""
123127
Create a Conv2d layer from a torch.nn.Conv2d layer.
124128
@@ -127,11 +131,11 @@ def fromTorch(layer, dtype: torch.dtype = torch.float16) -> "Conv2d":
127131
dtype (torch.dtype, optional): Data type of the layer.
128132
129133
Returns:
130-
Conv2d: The converted Conv2d layer.
134+
Im2ColConv2d: The converted Im2ColConv2d layer.
131135
"""
132136
weight = layer.weight.view(layer.weight.shape[0], -1)
133-
matmul = nn.Linear.fromTensor(weight, getattr(layer, "bias", None), dtype)
134-
new_layer = Conv2d(
137+
matmul = Linear.fromTensor(weight, getattr(layer, "bias", None), dtype)
138+
new_layer = Im2ColConv2d(
135139
matmul,
136140
layer.in_channels,
137141
layer.out_channels,
@@ -142,3 +146,112 @@ def fromTorch(layer, dtype: torch.dtype = torch.float16) -> "Conv2d":
142146
)
143147

144148
return new_layer
149+
150+
151+
class Conv2d(torch.nn.Module):
152+
"""
153+
2D convolutional layer implementation.
154+
155+
Attrs:
156+
weight (torch.Tensor): The weight tensor of the layer.
157+
bias (torch.Tensor): The bias tensor of the layer.
158+
"""
159+
160+
def __init__(
161+
self,
162+
weights: torch.Tensor,
163+
bias: Optional[torch.Tensor] = None,
164+
strides: Union[int, Sequence[int]] = 1,
165+
padding: Union[int, Sequence[int]] = 0,
166+
dilation: Union[int, Sequence[int]] = 1,
167+
groups: int = 1,
168+
) -> None:
169+
"""Initialize a Convolutional layer.
170+
171+
Args:
172+
weights (torch.Tensor): The weight tensor of the layer.
173+
bias (Optional[torch.Tensor], optional): The bias tensor of the layer. Defaults to None.
174+
strides (Union[int, Sequence[int]], optional): Strides. Defaults to 1.
175+
padding (Union[int, Sequence[int]], optional): Padding. Defaults to 0.
176+
dilation (Union[int, Sequence[int]], optional): Dilation. Defaults to 1.
177+
groups (int, optional): Groups. Defaults to 1.
178+
"""
179+
super().__init__()
180+
181+
self.op_id = str(uuid.uuid4())
182+
if groups > 1:
183+
new_shape = [groups, weights.shape[0] // groups] + list(weights.shape[1:])
184+
weights = weights.view(*new_shape)
185+
186+
self.parameters = [weights]
187+
if bias is not None:
188+
self.parameters.append(bias)
189+
self.backend_cls = partial(
190+
Convolution,
191+
weights_shape=weights.shape,
192+
bias=bias is not None,
193+
strides=strides,
194+
padding=padding,
195+
dilation=dilation,
196+
groups=groups,
197+
)
198+
199+
@property
200+
def weight(self) -> torch.Tensor:
201+
"""
202+
Get the weight tensor of the layer.
203+
204+
Returns:
205+
torch.Tensor: The weight tensor.
206+
"""
207+
return self.parameters[0]
208+
209+
@property
210+
def bias(self) -> torch.Tensor:
211+
"""
212+
Get the bias tensor of the layer.
213+
214+
Returns:
215+
torch.Tensor: The bias tensor.
216+
"""
217+
if len(self.parameters) > 1:
218+
return self.parameters[1]
219+
return None
220+
221+
def forward(self, x: torch.Tensor) -> torch.Tensor:
222+
"""Torch module forward method.
223+
224+
Args:
225+
x (torch.Tensor): Input tensor
226+
227+
Returns:
228+
torch.Tensor: result
229+
"""
230+
return run_factory(x, self.parameters, self.backend_cls, self.op_id)
231+
232+
@staticmethod
233+
def fromTorch(layer, dtype: torch.dtype = torch.float16) -> "Conv2d":
234+
"""
235+
Create a Conv2d layer from a torch.nn.Conv2d layer.
236+
237+
Args:
238+
layer (torch.nn.Conv2d): The torch Conv2d layer.
239+
dtype (torch.dtype, optional): Data type of the layer.
240+
241+
Returns:
242+
Conv2d: The converted Conv2d layer.
243+
"""
244+
# In case of unsupported configuration, fallback to Im2ColConv2d
245+
if any(dim > 11 for dim in layer.kernel_size):
246+
return Im2ColConv2d.fromTorch(layer, dtype)
247+
248+
new_layer = Conv2d(
249+
layer.weight,
250+
layer.bias,
251+
layer.stride,
252+
layer.padding,
253+
layer.dilation,
254+
layer.groups,
255+
)
256+
257+
return new_layer

0 commit comments

Comments
 (0)