-
Notifications
You must be signed in to change notification settings - Fork 20
Expand file tree
/
Copy pathleaky_relu.cc
More file actions
48 lines (36 loc) · 1.47 KB
/
leaky_relu.cc
File metadata and controls
48 lines (36 loc) · 1.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
// SPDX-FileCopyrightText: Copyright (C) 2025 Advanced Micro Devices, Inc. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
#include "../aie_kernel_utils.h"
#include <aie_api/aie.hpp>
#include <stdint.h>
using namespace aie;
void leaky_relu_vectorized_bf16(bfloat16 *restrict a,
bfloat16 *restrict c,
const int32_t vector_size,
const bfloat16 alpha)
{
event0();
auto it_in = aie::begin_restrict_vector<32>((bfloat16 *)a);
auto it_out = aie::begin_restrict_vector<32>((bfloat16 *)c);
// Broadcast alpha to a vector
vector<bfloat16, 32> alpha_vec = aie::broadcast<bfloat16, 32>(alpha);
vector<bfloat16, 32> zeroes = aie::zeros<bfloat16, 32>();
AIE_PREPARE_FOR_PIPELINING
AIE_LOOP_MIN_ITERATION_COUNT(32)
for (int i = 0; i < vector_size; i += 32) {
vector<bfloat16, 32> input = *it_in++;
// Leaky RELU: f(x) = max(x, alpha * x) where alpha is typically 0.01
// When alpha < 1: if x > 0 then x, else alpha * x
vector<bfloat16, 32> alpha_times_input = aie::mul(input, alpha_vec);
vector<bfloat16, 32> output = aie::max(input, alpha_times_input);
*it_out++ = output;
}
event1();
return;
}
extern "C" {
void leaky_relu_bf16(bfloat16 *restrict input, bfloat16 *restrict output, int input_size, bfloat16 alpha)
{
leaky_relu_vectorized_bf16(input, output, input_size, alpha);
}
} // extern "C"