Skip to content
This repository was archived by the owner on Apr 24, 2025. It is now read-only.

Commit b67bd8b

Browse files
Better torch integration (#66)
* Initial implementation of the nn.Model patching mechanism * Start the support for native pytorch functions * Testing NPU Module in a challenging scenario * Remove model saving * Convert module directly from pytorch * Further additions * Support reshape using sequence or multiple args * Add l2 and gelu * Add additional bindings * Support both gelu operators * Add additional methods * Add adaptive pool and flatten * Implemented avg pooling * Adding max pooling * Add common operations * Fix CI * Implementation of batchnorm * Different sdpa tests
1 parent e4d0d61 commit b67bd8b

File tree

13 files changed

+2099
-24
lines changed

13 files changed

+2099
-24
lines changed

include/intel_npu_acceleration_library/nn_factory.h

Lines changed: 84 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,75 @@ class ModelFactory : public intel_npu_acceleration_library::OVInferenceModel {
122122
return conv.get();
123123
}
124124

125+
/**
126+
* @brief Create a new average pooling operation
127+
* @param input pooling input
128+
* @param strides pooling strides
129+
* @param pads_begin pooling padding begin
130+
* @param pads_ends pooling padding end
131+
* @param kernel pooling kernel
132+
* @param exclude_pad exclude padding from the average calculation
133+
* @param rounding_type rounding type
134+
* @param auto_pad padding type
135+
* @return ov::op::Op*
136+
*/
137+
ov::op::Op* average_pooling(ov::op::Op* input, std::vector<size_t> strides, std::vector<size_t> pads_begin,
138+
std::vector<size_t> pads_ends, std::vector<size_t> kernel, bool exclude_pad = false,
139+
ov::op::RoundingType rounding_type = ov::op::RoundingType::FLOOR,
140+
ov::op::PadType auto_pad = ov::op::PadType::EXPLICIT) {
141+
auto pool = std::make_shared<ov::opset1::AvgPool>(input->output(0), ov::Strides(strides), pads_begin, pads_ends,
142+
kernel, exclude_pad, rounding_type, auto_pad);
143+
operations.push_back(pool);
144+
return pool.get();
145+
}
146+
147+
/**
148+
* @brief Create a new adaptive average pooling operation
149+
* @param input pooling input
150+
* @param output_shape output shape
151+
* @return ov::op::Op*
152+
*/
153+
ov::op::Op* adaptive_average_pool(ov::op::Op* input, ov::op::Op* output_shape) {
154+
auto pool = std::make_shared<ov::opset8::AdaptiveAvgPool>(input->output(0), output_shape->output(0));
155+
operations.push_back(pool);
156+
return pool.get();
157+
}
158+
159+
/**
160+
* @brief Create a new max pooling operation
161+
* @param input pooling input
162+
* @param strides pooling strides
163+
* @param pads_begin pooling padding begin
164+
* @param pads_ends pooling padding end
165+
* @param kernel pooling kernel
166+
* @param exclude_pad exclude padding from the max calculation
167+
* @param rounding_type rounding type
168+
* @param auto_pad padding type
169+
* @return ov::op::Op*
170+
*/
171+
ov::op::Op* max_pooling(ov::op::Op* input, std::vector<size_t> strides, std::vector<size_t> pads_begin,
172+
std::vector<size_t> pads_ends, std::vector<size_t> kernel,
173+
ov::op::RoundingType rounding_type = ov::op::RoundingType::FLOOR,
174+
ov::op::PadType auto_pad = ov::op::PadType::EXPLICIT) {
175+
auto pool = std::make_shared<ov::opset1::MaxPool>(input->output(0), ov::Strides(strides), pads_begin, pads_ends,
176+
kernel, rounding_type, auto_pad);
177+
operations.push_back(pool);
178+
return pool.get();
179+
}
180+
181+
/**
182+
* @brief Create a new adaptive max pooling operation
183+
* @param input pooling input
184+
* @param output_shape output shape
185+
* @return ov::op::Op*
186+
*/
187+
ov::op::Op* adaptive_max_pool(ov::op::Op* input, ov::op::Op* output_shape) {
188+
auto pool = std::make_shared<ov::opset8::AdaptiveMaxPool>(input->output(0), output_shape->output(0),
189+
ov::element::i64);
190+
operations.push_back(pool);
191+
return pool.get();
192+
}
193+
125194
/**
126195
* @brief Create a new gather operation
127196
*
@@ -374,8 +443,8 @@ class ModelFactory : public intel_npu_acceleration_library::OVInferenceModel {
374443
* @param input operation's input node
375444
* @return ov::op::Op*
376445
*/
377-
ov::op::Op* gelu(ov::op::Op* input) {
378-
auto gelu = std::make_shared<ov::opset7::Gelu>(input->output(0), ov::op::GeluApproximationMode::TANH);
446+
ov::op::Op* gelu(ov::op::Op* input, ov::op::GeluApproximationMode mode) {
447+
auto gelu = std::make_shared<ov::opset7::Gelu>(input->output(0), mode);
379448
operations.push_back(gelu);
380449
return gelu.get();
381450
}
@@ -695,11 +764,19 @@ class ModelFactory : public intel_npu_acceleration_library::OVInferenceModel {
695764
*/
696765
ov::op::Op* scaled_dot_product_attention(ov::op::Op* query, ov::op::Op* key, ov::op::Op* value,
697766
ov::op::Op* attn_mask, bool is_causal) {
698-
auto sdpa = std::make_shared<ov::opset13::ScaledDotProductAttention>(
699-
query->output(0), key->output(0), value->output(0), attn_mask->output(0), is_causal);
700-
701-
operations.push_back(sdpa);
702-
return sdpa.get();
767+
if (attn_mask == nullptr) {
768+
auto sdpa = std::make_shared<ov::opset13::ScaledDotProductAttention>(query->output(0), key->output(0),
769+
value->output(0), is_causal);
770+
771+
operations.push_back(sdpa);
772+
return sdpa.get();
773+
} else {
774+
auto sdpa = std::make_shared<ov::opset13::ScaledDotProductAttention>(
775+
query->output(0), key->output(0), value->output(0), attn_mask->output(0), is_causal);
776+
777+
operations.push_back(sdpa);
778+
return sdpa.get();
779+
}
703780
}
704781

705782
/**

intel_npu_acceleration_library/backend/bindings.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,39 @@ def init_network_factory(lib: ctypes.CDLL):
180180
]
181181
lib.convolution.restype = handler
182182

183+
lib.avg_pooling.argtypes = [
184+
handler,
185+
handler,
186+
ctypes.c_int,
187+
c_u32_array,
188+
ctypes.c_int,
189+
c_u32_array,
190+
ctypes.c_int,
191+
c_u32_array,
192+
ctypes.c_int,
193+
c_u32_array,
194+
ctypes.c_bool,
195+
ctypes.c_int,
196+
ctypes.c_int,
197+
]
198+
lib.avg_pooling.restype = handler
199+
200+
lib.max_pooling.argtypes = [
201+
handler,
202+
handler,
203+
ctypes.c_int,
204+
c_u32_array,
205+
ctypes.c_int,
206+
c_u32_array,
207+
ctypes.c_int,
208+
c_u32_array,
209+
ctypes.c_int,
210+
c_u32_array,
211+
ctypes.c_int,
212+
ctypes.c_int,
213+
]
214+
lib.max_pooling.restype = handler
215+
183216
for op in get_supported_ops():
184217
fn = getattr(lib, op.name)
185218
fn.argtypes = [handler] * (op.inputs + 1) + list(op.parameters)

intel_npu_acceleration_library/backend/factory.py

Lines changed: 138 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import numpy.typing as npt
1414
import numpy as np
1515
import ctypes
16+
import torch
1617

1718

1819
F = TypeVar("F", bound=Callable[..., Any])
@@ -165,12 +166,12 @@ def to(self, tensor: ctypes._Pointer, dtype: npt.DTypeLike) -> ctypes._Pointer:
165166
@return_tensor
166167
def constant(
167168
self,
168-
data: Union[np.array, Sequence[int], Sequence[float], int, float],
169+
data: Union[np.array, Sequence[int], Sequence[float], int, float, torch.Tensor],
169170
) -> ctypes._Pointer:
170171
"""Generate a model input constant.
171172
172173
Args:
173-
data (Union[np.array, Sequence[int], Sequence[float], int, float]): constant data
174+
data (Union[np.array, Sequence[int], Sequence[float], int, float, torch.Tensor]): constant data
174175
175176
Returns:
176177
ctypes._Pointer: an instance to a constant object
@@ -185,6 +186,8 @@ def constant(
185186
data = np.array([data], dtype=np.int64)
186187
elif isinstance(data, float):
187188
data = np.array([data], dtype=np.float32)
189+
elif isinstance(data, torch.Tensor):
190+
data = data.detach().numpy()
188191

189192
dst = data.ctypes.data_as(ctypes.c_void_p)
190193
shape_ptr = np.array(data.shape, dtype=np.uint32)
@@ -395,6 +398,139 @@ def normL2(
395398
axis_node = self.constant(axis).node # type: ignore
396399
return backend_lib.normL2(self._mm, input_node, axis_node, eps)
397400

401+
@return_tensor
402+
def avg_pooling(
403+
self,
404+
input: ctypes._Pointer,
405+
kernel_size: Union[int, Sequence[int]],
406+
strides: Optional[Union[int, Sequence[int]]] = None,
407+
padding: int = 0,
408+
ceil_mode: bool = False,
409+
count_include_pad: bool = True,
410+
divisor_override: Optional[int] = None,
411+
n_spatial_dims: int = 2,
412+
) -> ctypes._Pointer:
413+
"""Generate an average pooling layer.
414+
415+
Args:
416+
input (ctypes._Pointer): layer input node
417+
kernel_size (Sequence[int]): kernel size
418+
strides (Sequence[int]): strides
419+
padding (int): padding
420+
ceil_mode (bool): ceil mode
421+
count_include_pad (bool): count include pad
422+
divisor_override (int): divisor override
423+
n_spatial_dims (int): number of spatial dimensions
424+
425+
Raises:
426+
NotImplementedError: divisor_override is not supported
427+
428+
Returns:
429+
ctypes._Pointer: output node
430+
"""
431+
if isinstance(kernel_size, int):
432+
kernel_size = [kernel_size] * n_spatial_dims
433+
434+
if strides is None:
435+
strides = kernel_size
436+
elif isinstance(strides, int):
437+
strides = [strides] * n_spatial_dims
438+
439+
if isinstance(padding, int):
440+
padding_begins = [padding] * n_spatial_dims
441+
padding_ends = [padding] * n_spatial_dims
442+
else:
443+
padding_begins = list(padding)
444+
padding_ends = list(padding)
445+
446+
strides_ptr = np.array(strides, dtype=np.uint32)
447+
padding_begins_ptr = np.array(padding_begins, dtype=np.uint32)
448+
padding_ends_ptr = np.array(padding_ends, dtype=np.uint32)
449+
kernel_size_ptr = np.array(kernel_size, dtype=np.uint32)
450+
451+
rounding_type = 1 if ceil_mode else 0
452+
auto_pad = 0 # Hardcoded to explicit padding
453+
454+
if divisor_override:
455+
raise NotImplementedError("divisor_override is not supported")
456+
457+
return backend_lib.avg_pooling(
458+
self._mm,
459+
input,
460+
strides_ptr.size,
461+
strides_ptr,
462+
padding_begins_ptr.size,
463+
padding_begins_ptr,
464+
padding_ends_ptr.size,
465+
padding_ends_ptr,
466+
kernel_size_ptr.size,
467+
kernel_size_ptr,
468+
not count_include_pad, # exclude_pad
469+
rounding_type, # rounding_type
470+
auto_pad, # auto_pad
471+
)
472+
473+
@return_tensor
474+
def max_pooling(
475+
self,
476+
input: ctypes._Pointer,
477+
kernel_size: Union[int, Sequence[int]],
478+
strides: Optional[Union[int, Sequence[int]]] = None,
479+
padding: int = 0,
480+
ceil_mode: bool = False,
481+
n_spatial_dims: int = 2,
482+
) -> ctypes._Pointer:
483+
"""Generate an average pooling layer.
484+
485+
Args:
486+
input (ctypes._Pointer): layer input node
487+
kernel_size (Sequence[int]): kernel size
488+
strides (Sequence[int]): strides
489+
padding (int): padding
490+
ceil_mode (bool): ceil mode
491+
n_spatial_dims (int): number of spatial dimensions
492+
493+
Returns:
494+
ctypes._Pointer: output node
495+
"""
496+
if isinstance(kernel_size, int):
497+
kernel_size = [kernel_size] * n_spatial_dims
498+
499+
if strides is None:
500+
strides = kernel_size
501+
elif isinstance(strides, int):
502+
strides = [strides] * n_spatial_dims
503+
504+
if isinstance(padding, int):
505+
padding_begins = [padding] * n_spatial_dims
506+
padding_ends = [padding] * n_spatial_dims
507+
else:
508+
padding_begins = list(padding)
509+
padding_ends = list(padding)
510+
511+
strides_ptr = np.array(strides, dtype=np.uint32)
512+
padding_begins_ptr = np.array(padding_begins, dtype=np.uint32)
513+
padding_ends_ptr = np.array(padding_ends, dtype=np.uint32)
514+
kernel_size_ptr = np.array(kernel_size, dtype=np.uint32)
515+
516+
rounding_type = 1 if ceil_mode else 0
517+
auto_pad = 0 # Hardcoded to explicit padding
518+
519+
return backend_lib.max_pooling(
520+
self._mm,
521+
input,
522+
strides_ptr.size,
523+
strides_ptr,
524+
padding_begins_ptr.size,
525+
padding_begins_ptr,
526+
padding_ends_ptr.size,
527+
padding_ends_ptr,
528+
kernel_size_ptr.size,
529+
kernel_size_ptr,
530+
rounding_type, # rounding_type
531+
auto_pad, # auto_pad
532+
)
533+
398534
def get_output_tensor_shape(self):
399535
"""Get output tensor shape.
400536

intel_npu_acceleration_library/backend/ops.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def get_supported_ops() -> List[SupportedOp]:
5252
SupportedOp(name="floor_act", inputs=1),
5353
SupportedOp(name="grn", inputs=1, parameters=[ctypes.c_float]),
5454
SupportedOp(name="gelu", inputs=1),
55+
SupportedOp(name="gelu_erf", inputs=1),
5556
SupportedOp(name="log_act", inputs=1),
5657
SupportedOp(name="negative", inputs=1),
5758
SupportedOp(name="relu", inputs=1),
@@ -79,6 +80,11 @@ def get_supported_ops() -> List[SupportedOp]:
7980
inputs=4,
8081
parameters=[ctypes.c_bool],
8182
),
83+
SupportedOp(
84+
name="scaled_dot_product_attention_simple",
85+
inputs=3,
86+
parameters=[ctypes.c_bool],
87+
),
8288
SupportedOp(
8389
name="normL2",
8490
inputs=2,
@@ -93,5 +99,7 @@ def get_supported_ops() -> List[SupportedOp]:
9399
SupportedOp(name="transpose", inputs=2),
94100
SupportedOp(name="squeeze", inputs=1),
95101
SupportedOp(name="unsqueeze", inputs=2),
102+
SupportedOp(name="adaptive_avg_pool", inputs=2),
103+
SupportedOp(name="adaptive_max_pool", inputs=2),
96104
]
97105
return supported_ops

0 commit comments

Comments
 (0)