Add c++ examples (#86)

alessandropalla · web-flow · commit 74e23601bdb9 · 2024-07-03T16:30:52.000+02:00
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -91,7 +91,7 @@ Site map
 
 .. toctree::
    developer.md
-   adding_operation.md
+   adding_operations.md
    :maxdepth: 1
    :caption: Developements guide:
 
diff --git a/examples/cpp/main.cpp b/examples/cpp/main.cpp
@@ -17,12 +17,15 @@ int main() {
     // create parameter
     auto input = factory->parameter({batch, inC}, ov::element::f16);
     auto weights = factory->parameter({outC, inC}, ov::element::f16);
+    auto bias = factory->parameter({1, outC}, ov::element::f16);
 
     // create matmul
     auto matmul = factory->matmul(input, weights);
+    auto matmul_bias = factory->eltwise_add(matmul, bias);
+    factory->result(matmul_bias);
 
     // Compile the model
-    factory->compile(matmul);
+    factory->compile();
 
     // Save OV model
     std::cout << "Saving model to matmul.xml" << std::endl;
@@ -31,14 +34,17 @@ int main() {
     // Here you can create float16 buffers and run inference by using
     half_ptr input_buffer = new uint16_t[batch * inC];
     half_ptr weights_buffer = new uint16_t[outC * inC];
+    half_ptr bias_buffer = new uint16_t[outC];
     half_ptr output_buffer = new uint16_t[batch * outC];
 
-    memset(input_buffer, 0, 128 * 256 * sizeof(uint16_t));
-    memset(weights_buffer, 0, 128 * 256 * sizeof(uint16_t));
-    memset(output_buffer, 0, 128 * 512 * sizeof(uint16_t));
+    memset(input_buffer, 0, batch * inC * sizeof(uint16_t));
+    memset(weights_buffer, 0, outC * inC * sizeof(uint16_t));
+    memset(output_buffer, 0, batch * outC * sizeof(uint16_t));
+    memset(bias_buffer, 0, outC * sizeof(uint16_t));
 
     factory->setInputTensor(input_buffer, 0);
     factory->setInputTensor(weights_buffer, 1);
+    factory->setInputTensor(bias_buffer, 2);
     factory->setOutputTensor(output_buffer, 0);
 
     // Run inference
@@ -49,6 +55,7 @@ int main() {
 
     delete[] input_buffer;
     delete[] weights_buffer;
+    delete[] bias_buffer;
     delete[] output_buffer;
     return 0;
 }
diff --git a/include/intel_npu_acceleration_library/nn_factory.h b/include/intel_npu_acceleration_library/nn_factory.h
@@ -90,6 +90,22 @@ class ModelFactory : public intel_npu_acceleration_library::OVInferenceModel {
         return matmul.get();
     }
 
+    /**
+     * @brief Create a new linear operation
+     *
+     * @param input matmul lhs input
+     * @param weights matmul rhs input, a.k.a. weights
+     * @param bias matmul bias input
+     * @return ov::op::Op*
+     */
+    ov::op::Op* linear(ov::op::Op* input, ov::op::Op* weights, ov::op::Op* bias) {
+        auto mm_op = matmul(input, weights);
+        if (bias != nullptr) {
+            return eltwise_add(mm_op, bias);
+        }
+        return mm_op;
+    }
+
     /**
      * @brief Create a new convolution operation
      *