Skip to content

Commit

Permalink
NMSIS/NN: fix Benchmark for NN v6.0
Browse files Browse the repository at this point in the history
  • Loading branch information
shuzhuo committed Sep 20, 2024
1 parent 51ff559 commit d6b63d5
Show file tree
Hide file tree
Showing 65 changed files with 843 additions and 441 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ int main()
kernel1x1_riscv_convolve_1x1_s8();
kernel1x1_riscv_convolve_1x1_s4_fast();
kernel1x1_riscv_convolve_1x1_s8_fast();
int16xint8_riscv_convolve_fast_s16();
basic_riscv_convolve_s4();
basic_riscv_convolve_s8();
int16xint8_riscv_convolve_s16();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ void conv_1_x_n_1_riscv_convolve_s8(void)
input_dims.c = CONV_1_X_N_1_IN_CH;
filter_dims.w = CONV_1_X_N_1_FILTER_X;
filter_dims.h = CONV_1_X_N_1_FILTER_Y;
filter_dims.c = CONV_1_X_N_1_IN_CH;
output_dims.w = CONV_1_X_N_1_OUTPUT_W;
output_dims.h = CONV_1_X_N_1_OUTPUT_H;
output_dims.c = CONV_1_X_N_1_OUT_CH;
Expand All @@ -72,7 +73,7 @@ void conv_1_x_n_1_riscv_convolve_s8(void)
generate_rand_s8(conv_1_x_n_1_input, CONV_1_X_N_1_INPUT_SIZE);
generate_rand_s8(conv_1_x_n_1_weights, CONV_1_X_N_1_KERNEL_SIZE);

int32_t buf_size = riscv_convolve_1_x_n_s8_get_buffer_size(&input_dims, &filter_dims);
int32_t buf_size = riscv_convolve_1_x_n_s8_get_buffer_size(&conv_params, &input_dims, &filter_dims, &output_dims);
ctx.buf = malloc(buf_size);
ctx.size = 0;
BENCH_START(riscv_convolve_1_x_n_s8);
Expand Down
128 changes: 0 additions & 128 deletions NMSIS/NN/Benchmark/ConvolutionFunctions/test_riscv_convolve_fast_s16.c

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ void int16xint8_riscv_convolve_s16(void)
nmsis_nn_dims bias_dims;
nmsis_nn_dims output_dims;

const int64_t *bias_data = int16xint8_biases;
const nmsis_nn_bias_data bias_data = {int16xint8_biases, false};
const int8_t *kernel_data = int16xint8_weights;
const int16_t *input_data = int16xint8_input;
const int16_t *output_ref = int16xint8_output_ref;
Expand All @@ -51,6 +51,7 @@ void int16xint8_riscv_convolve_s16(void)
input_dims.c = INT16XINT8_IN_CH;
filter_dims.w = INT16XINT8_FILTER_X;
filter_dims.h = INT16XINT8_FILTER_Y;
filter_dims.c = INT16XINT8_IN_CH;
output_dims.w = INT16XINT8_OUTPUT_W;
output_dims.h = INT16XINT8_OUTPUT_H;
output_dims.c = INT16XINT8_OUT_CH;
Expand Down Expand Up @@ -84,7 +85,7 @@ void int16xint8_riscv_convolve_s16(void)
&filter_dims,
kernel_data,
&bias_dims,
bias_data,
&bias_data,
&output_dims,
output);
BENCH_END(riscv_convolve_s16);
Expand All @@ -109,7 +110,7 @@ void int16xint8_riscv_convolve_s16(void)
&filter_dims,
kernel_data,
&bias_dims,
bias_data,
&bias_data,
&output_dims,
output);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ void basic_riscv_convolve_s8(void)
input_dims.c = BASIC_IN_CH;
filter_dims.w = BASIC_FILTER_X;
filter_dims.h = BASIC_FILTER_Y;
filter_dims.c = BASIC_IN_CH;
output_dims.w = BASIC_OUTPUT_W;
output_dims.h = BASIC_OUTPUT_H;
output_dims.c = BASIC_OUT_CH;
Expand Down
3 changes: 2 additions & 1 deletion NMSIS/NN/Benchmark/LSTMFunctions/riscv_nn_benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ int main()
{
printf("Start LSTMFunctions benchmark\n");

lstm_1_riscv_lstm_unidirectional_s16_s8();
lstm_1_riscv_lstm_unidirectional_s8();
lstm_1_riscv_lstm_unidirectional_s16();

printf("All tests are passed.\n");
printf("Finish LSTMFunctions benchmark\n");
Expand Down
183 changes: 183 additions & 0 deletions NMSIS/NN/Benchmark/LSTMFunctions/test_riscv_lstm_unidirectional_s16.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
/*
* SPDX-FileCopyrightText: Copyright 2022-2023 Arm Limited and/or its affiliates <[email protected]>
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <stdbool.h>
#include <stdlib.h>

#include <riscv_nnfunctions.h>


#include "TestData/lstm_1_s16/test_data.h"
#include "Utils/validate.h"
#include "nmsis_bench.h"

BENCH_DECLARE_VAR();

// Update the buffer size if adding a unit test with larger buffer.
#define LARGEST_BUFFER_SIZE lstm_1_s16_hidden_size *lstm_1_s16_batch_size *lstm_1_s16_time_steps

static int16_t buffer1[LARGEST_BUFFER_SIZE];
static int16_t buffer2[LARGEST_BUFFER_SIZE];
static int16_t buffer3[LARGEST_BUFFER_SIZE];

void lstm_1_riscv_lstm_unidirectional_s16(void)
{
int16_t output[lstm_1_s16_batch_size * lstm_1_s16_time_steps * lstm_1_s16_hidden_size] = {0};
const riscv_nmsis_nn_status expected = RISCV_NMSIS_NN_SUCCESS;
const int16_t *output_ref = &lstm_1_s16_output[0];
const int32_t output_ref_size = lstm_1_s16_batch_size * lstm_1_s16_time_steps * lstm_1_s16_hidden_size;

int64_t input_data_kernel_sum[lstm_1_s16_hidden_size];
int64_t forget_data_kernel_sum[lstm_1_s16_hidden_size];
int64_t cell_data_kernel_sum[lstm_1_s16_hidden_size];
int64_t output_data_kernel_sum[lstm_1_s16_hidden_size];

int64_t input_hidden_kernel_sum[lstm_1_s16_hidden_size];
int64_t forget_hidden_kernel_sum[lstm_1_s16_hidden_size];
int64_t cell_hidden_kernel_sum[lstm_1_s16_hidden_size];
int64_t output_hidden_kernel_sum[lstm_1_s16_hidden_size];

riscv_vector_sum_s8_s64(&input_data_kernel_sum[0],
lstm_1_s16_input_size,
lstm_1_s16_hidden_size,
&lstm_1_s16_input_gate_input_weights[0],
lstm_1_s16_input_zero_point,
&lstm_1_s16_input_gate_bias[0]);
riscv_vector_sum_s8_s64(&forget_data_kernel_sum[0],
lstm_1_s16_input_size,
lstm_1_s16_hidden_size,
&lstm_1_s16_forget_gate_input_weights[0],
lstm_1_s16_input_zero_point,
&lstm_1_s16_forget_gate_bias[0]);
riscv_vector_sum_s8_s64(&cell_data_kernel_sum[0],
lstm_1_s16_input_size,
lstm_1_s16_hidden_size,
&lstm_1_s16_cell_gate_input_weights[0],
lstm_1_s16_input_zero_point,
&lstm_1_s16_cell_gate_bias[0]);
riscv_vector_sum_s8_s64(&output_data_kernel_sum[0],
lstm_1_s16_input_size,
lstm_1_s16_hidden_size,
&lstm_1_s16_output_gate_input_weights[0],
lstm_1_s16_input_zero_point,
&lstm_1_s16_output_gate_bias[0]);

riscv_vector_sum_s8_s64(&input_hidden_kernel_sum[0],
lstm_1_s16_hidden_size,
lstm_1_s16_hidden_size,
&lstm_1_s16_input_gate_hidden_weights[0],
-lstm_1_s16_output_zero_point,
NULL);
riscv_vector_sum_s8_s64(&forget_hidden_kernel_sum[0],
lstm_1_s16_hidden_size,
lstm_1_s16_hidden_size,
&lstm_1_s16_forget_gate_hidden_weights[0],
-lstm_1_s16_output_zero_point,
NULL);
riscv_vector_sum_s8_s64(&cell_hidden_kernel_sum[0],
lstm_1_s16_hidden_size,
lstm_1_s16_hidden_size,
&lstm_1_s16_cell_gate_hidden_weights[0],
-lstm_1_s16_output_zero_point,
NULL);
riscv_vector_sum_s8_s64(&output_hidden_kernel_sum[0],
lstm_1_s16_hidden_size,
lstm_1_s16_hidden_size,
&lstm_1_s16_output_gate_hidden_weights[0],
-lstm_1_s16_output_zero_point,
NULL);

// INPUT GATE
const nmsis_nn_lstm_gate gate_input = {lstm_1_s16_input_gate_input_multiplier,
lstm_1_s16_input_gate_input_shift,
&lstm_1_s16_input_gate_input_weights[0],
&input_data_kernel_sum[0],
lstm_1_s16_input_gate_hidden_multiplier,
lstm_1_s16_input_gate_hidden_shift,
&lstm_1_s16_input_gate_hidden_weights[0],
&input_hidden_kernel_sum[0],
&lstm_1_s16_input_gate_bias[0],
RISCV_SIGMOID};

// FORGET GATE
const nmsis_nn_lstm_gate gate_forget = {lstm_1_s16_forget_gate_input_multiplier,
lstm_1_s16_forget_gate_input_shift,
&lstm_1_s16_forget_gate_input_weights[0],
&forget_data_kernel_sum[0],
lstm_1_s16_forget_gate_hidden_multiplier,
lstm_1_s16_forget_gate_hidden_shift,
&lstm_1_s16_forget_gate_hidden_weights[0],
&forget_hidden_kernel_sum[0],
&lstm_1_s16_forget_gate_bias[0],
RISCV_SIGMOID};

// CELL GATE
const nmsis_nn_lstm_gate gate_cell = {lstm_1_s16_cell_gate_input_multiplier,
lstm_1_s16_cell_gate_input_shift,
&lstm_1_s16_cell_gate_input_weights[0],
&cell_data_kernel_sum[0],
lstm_1_s16_cell_gate_hidden_multiplier,
lstm_1_s16_cell_gate_hidden_shift,
&lstm_1_s16_cell_gate_hidden_weights[0],
&cell_hidden_kernel_sum[0],
&lstm_1_s16_cell_gate_bias[0],
RISCV_TANH};

// OUTPUT GATE
const nmsis_nn_lstm_gate gate_output = {lstm_1_s16_output_gate_input_multiplier,
lstm_1_s16_output_gate_input_shift,
&lstm_1_s16_output_gate_input_weights[0],
&output_data_kernel_sum[0],
lstm_1_s16_output_gate_hidden_multiplier,
lstm_1_s16_output_gate_hidden_shift,
&lstm_1_s16_output_gate_hidden_weights[0],
&output_hidden_kernel_sum[0],
&lstm_1_s16_output_gate_bias[0],
RISCV_SIGMOID};

// LSTM DATA
const nmsis_nn_lstm_params params = {lstm_1_s16_time_major,
lstm_1_s16_batch_size,
lstm_1_s16_time_steps,
lstm_1_s16_input_size,
lstm_1_s16_hidden_size,
lstm_1_s16_input_zero_point,
lstm_1_s16_forget_to_cell_multiplier,
lstm_1_s16_forget_to_cell_shift,
lstm_1_s16_input_to_cell_multiplier,
lstm_1_s16_input_to_cell_shift,
lstm_1_s16_cell_clip,
lstm_1_s16_cell_scale_power,
lstm_1_s16_output_multiplier,
lstm_1_s16_output_shift,
lstm_1_s16_output_zero_point,
gate_forget,
gate_input,
gate_cell,
gate_output};

nmsis_nn_lstm_context buffers;
buffers.temp1 = buffer1;
buffers.temp2 = buffer2;
buffers.cell_state = buffer3;
BENCH_START(riscv_lstm_unidirectional_s16);
riscv_nmsis_nn_status result = riscv_lstm_unidirectional_s16(lstm_1_s16_input, output, &params, &buffers);
BENCH_END(riscv_lstm_unidirectional_s16);
TEST_ASSERT_EQUAL(expected, result);
// TEST_ASSERT_TRUE(validate_s16(output, output_ref, output_ref_size));
}
Loading

0 comments on commit d6b63d5

Please sign in to comment.