Skip to content

Commit

Permalink
SHL: version 2.9.0
Browse files Browse the repository at this point in the history
  • Loading branch information
chenf committed Jan 23, 2024
1 parent eb19642 commit 533ee30
Show file tree
Hide file tree
Showing 196 changed files with 14,834 additions and 1,775 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ example/*.elf
openvx_build
e907_build
rvv_build
rvv_nodot_build
rvm_build
c906_static_build
c906_so_build
Expand Down
17 changes: 3 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ if (CONFIG_CUSTOM_SOURCE_SELECT)
else()
set(CONFIG_USE_SHL_DEBUG ON)
set(CONFIG_SHL_LAYER_BENCHMARK ON)
set(CONFIG_SHL_TRACE ON)
endif()

file (STRINGS "version" SHL_VERSION)
Expand Down Expand Up @@ -47,18 +48,6 @@ if(CONFIG_BUILD_RISCV_RVV)
install(TARGETS rvv_static DESTINATION lib)
endif()

if(CONFIG_BUILD_RISCV_RVV_NODOT)
# build rvv a without xtheadvdot extension
include(cmake/rules.cmake)
LIST(APPEND RVV_LST ${NN2_SRCS} ${REF_SRCS} ${GREF_SRCS} ${THEAD_RVV_SRCS})
add_library(rvv_static STATIC ${RVV_LST})
SET_TARGET_PROPERTIES(rvv_static PROPERTIES OUTPUT_NAME "shl_rvv_nodot")
set(RVV_BUILD_FLAGS -ffp-contract=off -march=rv64gcv_zfh_xtheadc -mabi=lp64d -DSHL_BUILD_RVV -DSHL_BUILD_REF -DSHL_BUILD_GREF)
target_compile_options(rvv_static PRIVATE ${RVV_BUILD_FLAGS})

install(TARGETS rvv_static DESTINATION lib)
endif()

if(CONFIG_BUILD_RISCV_C906)
# build c906 lib
set(CONFIG_GRAPH_REFERENCE_TVMGEN ON)
Expand Down Expand Up @@ -102,8 +91,8 @@ if(CONFIG_BUILD_RISCV_C920)

set(SHL_LIB_TARGET "c920_lib")
set(SHL_LIB_NAME shl_c920)
LIST(APPEND SHL_BUILD_SRC_LST ${NN2_SRCS} ${REF_SRCS} ${GREF_SRCS} ${THEAD_RVV_SRCS} ${C920_SRCS})
set(SHL_BUILD_C_FLAGS -ffp-contract=off -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d -DSHL_BUILD_C920 -DSHL_BUILD_REF -DSHL_BUILD_GREF -DSHL_BUILD_RVV)
LIST(APPEND SHL_BUILD_SRC_LST ${NN2_SRCS} ${REF_SRCS} ${GREF_SRCS} ${THEAD_RVV_SRCS} ${C920_SRCS} ${LLM_SRCS})
set(SHL_BUILD_C_FLAGS -ffp-contract=off -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d -DSHL_BUILD_C920 -DSHL_BUILD_REF -DSHL_BUILD_GREF -DSHL_BUILD_RVV -fopenmp)
include(cmake/target_build.cmake)
target_include_directories(${SHL_LIB_TARGET} PRIVATE module/dlpack/include/)
endif()
Expand Down
3 changes: 0 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@ nn2_e907_elf:
nn2_rvv:
mkdir -p rvv_build; cd rvv_build; cmake ../ -DCONFIG_BUILD_RISCV_RVV=ON -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR}/rvv/; make -j${USE_CORE}; make install; cd -

nn2_rvv_nodot:
mkdir -p rvv_nodot_build; cd rvv_nodot_build; cmake ../ -DCONFIG_BUILD_RISCV_RVV_NODOT=ON -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR}/rvv_nodot/; make -j${USE_CORE}; make install; cd -

nn2_c906:
mkdir -p c906_static_build; cd c906_static_build; cmake ../ -DCONFIG_BUILD_RISCV_C906=ON -DCONFIG_SHL_BUILD_STATIC=ON -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR}/c906/; make -j${USE_CORE}; make install; cd -

Expand Down
9 changes: 9 additions & 0 deletions cmake/c906_elf.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,12 @@ set(CONFIG_THEAD_RVV_DECONVOLUTION_FP16 ON)
set(CONFIG_THEAD_RVV_DIV_FP32 ON)
set(CONFIG_THEAD_RVV_DIV_FP16 ON)
set(CONFIG_THEAD_RVV_DIV_INT8 ON)
set(CONFIG_THEAD_RVV_EMBEDDING_INT32 ON)
set(CONFIG_THEAD_RVV_ERF_FP32 ON)
set(CONFIG_THEAD_RVV_ERF_FP16 ON)
set(CONFIG_THEAD_RVV_ERF_INT8 ON)
set(CONFIG_THEAD_RVV_EXPAND_DIMS_FP32 ON)
set(CONFIG_THEAD_RVV_EXPAND_DIMS_FP16 ON)
set(CONFIG_THEAD_RVV_FULLYCONNECTED_FP32 ON)
set(CONFIG_THEAD_RVV_FULLYCONNECTED_FP16 ON)
set(CONFIG_THEAD_RVV_FULLYCONNECTED_INT8 ON)
Expand All @@ -383,6 +386,7 @@ set(CONFIG_THEAD_RVV_LAYER_NORM_INT8 ON)
set(CONFIG_THEAD_RVV_LEAKY_RELU_FP32 ON)
set(CONFIG_THEAD_RVV_LEAKY_RELU_FP16 ON)
set(CONFIG_THEAD_RVV_LEAKY_RELU_INT8 ON)
set(CONFIG_THEAD_RVV_LLM_POS_FP16 ON)
set(CONFIG_THEAD_RVV_MATMUL_FP32 ON)
set(CONFIG_THEAD_RVV_MATMUL_FP16 ON)
set(CONFIG_THEAD_RVV_MATMUL_INT8 ON)
Expand Down Expand Up @@ -411,6 +415,10 @@ set(CONFIG_THEAD_RVV_RESHAPE_INT8 ON)
set(CONFIG_THEAD_RVV_RMS_NORM_FP32 ON)
set(CONFIG_THEAD_RVV_RMS_NORM_FP16 ON)
set(CONFIG_THEAD_RVV_RMS_NORM_INT8 ON)
set(CONFIG_THEAD_RVV_ROPE_FP32 ON)
set(CONFIG_THEAD_RVV_ROPE_FP16 ON)
set(CONFIG_THEAD_RVV_SCALED_DOT_PRODUCT_ATTENTION_FP32 ON)
set(CONFIG_THEAD_RVV_SCALED_DOT_PRODUCT_ATTENTION_FP16 ON)
set(CONFIG_THEAD_RVV_SIGMOID_FP32 ON)
set(CONFIG_THEAD_RVV_SIGMOID_FP16 ON)
set(CONFIG_THEAD_RVV_SIGMOID_INT8 ON)
Expand Down Expand Up @@ -487,3 +495,4 @@ set(CONFIG_C906_SUB_FP32 ON)
set(CONFIG_C906_SUB_FP16 ON)
set(CONFIG_USE_SHL_DEBUG ON)
set(CONFIG_SHL_LAYER_BENCHMARK ON)
set(CONFIG_SHL_TRACE ON)
3 changes: 2 additions & 1 deletion cmake/e907.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -336,4 +336,5 @@ set(CONFIG_E907_OPT_MUL ON)
set(CONFIG_E907_OPT_SUM ON)
set(CONFIG_E907_OPT_SOFTMAX ON)
set(CONFIG_USE_SHL_DEBUG ON)
set(CONFIG_SHL_LAYER_BENCHMARK ON)
set(CONFIG_SHL_LAYER_BENCHMARK ON)
set(CONFIG_SHL_TRACE ON)
19 changes: 14 additions & 5 deletions cmake/rules.cmake
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
if (NOT CONFIG_USE_COMPILER_PATH)

# riscv linux compiler
if (CONFIG_BUILD_RISCV_RVV OR CONFIG_BUILD_RISCV_RVV_NODOT OR
CONFIG_BUILD_RISCV_C906 OR CONFIG_BUILD_RISCV_RVM OR
CONFIG_BUILD_RISCV_C908 OR CONFIG_BUILD_RISCV_C920 OR
CONFIG_BUILD_RISCV_C920V2 OR CONFIG_BUILD_RISCV_PNNA OR
CONFIG_BUILD_TH1520)
if (CONFIG_BUILD_RISCV_RVV OR CONFIG_BUILD_RISCV_C906 OR
CONFIG_BUILD_RISCV_RVM OR CONFIG_BUILD_RISCV_C908 OR
CONFIG_BUILD_RISCV_C920 OR CONFIG_BUILD_RISCV_C920V2 OR
CONFIG_BUILD_RISCV_PNNA OR CONFIG_BUILD_TH1520)
set(CMAKE_C_COMPILER riscv64-unknown-linux-gnu-gcc)
set(CMAKE_CXX_COMPILER riscv64-unknown-linux-gnu-g++)
set(CMAKE_ASM_COMPILER riscv64-unknown-linux-gnu-gcc)
Expand All @@ -30,6 +29,11 @@ if(CONFIG_USE_EXPORT_MODEL)
add_definitions(-D SHL_EXPORT_MODEL)
endif()

# SHL disable xtheadvdot extension
if(CONFIG_DISABLE_VDOT_EXTENSION)
add_definitions(-D SHL_DISABLE_VDOT)
endif()

if (CONFIG_BUILD_ANDROID_TH1520)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_ANDROID -Wno-deprecated-non-prototype")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBUILD_ANDROID")
Expand Down Expand Up @@ -66,6 +70,11 @@ if(CONFIG_SHL_LAYER_BENCHMARK)
message(STATUS "Print the execution time of each layer - ON")
endif()

if(CONFIG_SHL_TRACE)
add_definitions(-DSHL_TRACE)
message(STATUS "Generate trace data - ON")
endif()

if(CONFIG_GRAPH_REFERENCE_TVMGEN)
add_definitions(-DGRAPH_REFERENCE_TVMGEN)
LIST(APPEND GREF_SRCS source/tvm_gen/utils.c source/tvm_gen/setup.c)
Expand Down
122 changes: 122 additions & 0 deletions include/backend/c906/perf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* Copyright (C) 2016-2023 T-Head Semiconductor Co., Ltd. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef INCLUDE_SHL_C906_PERF_H_
#define INCLUDE_SHL_C906_PERF_H_

#include "csi_nn.h"
#include "shl_utils.h"

int shl_c906_conv2d_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_tensor *kernel, struct csinn_tensor *bias,
struct csinn_conv2d_params *params, struct csinn_perf_info *perf_info);

int shl_c906_depthwise_conv2d_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_tensor *kernel, struct csinn_tensor *bias,
struct csinn_conv2d_params *params,
struct csinn_perf_info *perf_info);

int shl_c906_conv1d_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_tensor *kernel, struct csinn_tensor *bias,
struct csinn_conv1d_params *params, struct csinn_perf_info *perf_info);

int shl_c906_depthwise_conv1d_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_tensor *kernel, struct csinn_tensor *bias,
struct csinn_conv1d_params *params,
struct csinn_perf_info *perf_info);

int shl_c906_fullyconnected_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_tensor *weights, struct csinn_tensor *bias,
struct csinn_fc_params *params, struct csinn_perf_info *perf_info);

int shl_c906_maxpool2d_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_pool_params *params, struct csinn_perf_info *perf_info);

int shl_c906_avgpool2d_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_pool_params *params, struct csinn_perf_info *perf_info);

int shl_c906_div_perf(struct csinn_tensor *input0, struct csinn_tensor *input1,
struct csinn_tensor *output, struct csinn_diso_params *params,
struct csinn_perf_info *perf_info);

int shl_c906_abs_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_siso_params *params, struct csinn_perf_info *perf_info);

int shl_c906_add_perf(struct csinn_tensor *input0, struct csinn_tensor *input1,
struct csinn_tensor *output, struct csinn_diso_params *params,
struct csinn_perf_info *perf_info);

int shl_c906_clip_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_clip_params *params, struct csinn_perf_info *perf_info);

int shl_c906_concat_perf(struct csinn_tensor **input, struct csinn_tensor *output,
struct csinn_clip_params *params, struct csinn_perf_info *perf_info);

int shl_c906_global_avgpool2d_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_pool_params *params,
struct csinn_perf_info *perf_info);

int shl_c906_global_maxpool2d_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_pool_params *params,
struct csinn_perf_info *perf_info);

int shl_c906_leaky_relu_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_relu_params *params, struct csinn_perf_info *perf_info);

int shl_c906_lrn_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_lrn_params *params, struct csinn_perf_info *perf_info);

int shl_c906_matmul_perf(struct csinn_tensor *mat0, struct csinn_tensor *mat1,
struct csinn_tensor *output, struct csinn_matmul_params *params,
struct csinn_perf_info *perf_info);

int shl_c906_minimum_perf(struct csinn_tensor *input0, struct csinn_tensor *input1,
struct csinn_tensor *output, struct csinn_diso_params *params,
struct csinn_perf_info *perf_info);

int shl_c906_mul_perf(struct csinn_tensor *input0, struct csinn_tensor *input1,
struct csinn_tensor *output, struct csinn_diso_params *params,
struct csinn_perf_info *perf_info);

int shl_c906_prelu_perf(struct csinn_tensor *input, struct csinn_tensor *alpha,
struct csinn_tensor *output, struct csinn_prelu_params *params,
struct csinn_perf_info *perf_info);

int shl_c906_relu_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_relu_params *params, struct csinn_perf_info *perf_info);

int shl_c906_relu1_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_relu_params *params, struct csinn_perf_info *perf_info);

int shl_c906_relu6_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_relu_params *params, struct csinn_perf_info *perf_info);

int shl_c906_split_perf(struct csinn_tensor *input, struct csinn_tensor **output,
struct csinn_split_params *params, struct csinn_perf_info *perf_info);

int shl_c906_sub_perf(struct csinn_tensor *input0, struct csinn_tensor *input1,
struct csinn_tensor *output, struct csinn_diso_params *params,
struct csinn_perf_info *perf_info);

int shl_c906_reshape_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_reshape_params *params, struct csinn_perf_info *perf_info);

int shl_c906_reduce_sum_perf(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_reduce_params *params, struct csinn_perf_info *perf_info);

#endif // INCLUDE_SHL_C906_PERF_H_
14 changes: 0 additions & 14 deletions include/backend/c908/c908.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,6 @@ int shl_c908_depthwise_conv2d_init_int8(struct csinn_tensor *input, struct csinn
struct csinn_tensor *kernel, struct csinn_tensor *bias,
struct csinn_conv2d_params *params);

int shl_c908_avgpool2d_init_fp32(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_pool_params *params);
int shl_c908_avgpool2d_init_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_pool_params *params);
int shl_c908_avgpool2d_init_int8(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_pool_params *params);

int shl_c908_maxpool2d_init_fp32(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_pool_params *params);
int shl_c908_maxpool2d_init_fp16(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_pool_params *params);
int shl_c908_maxpool2d_init_int8(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_pool_params *params);

int shl_c908_fullyconnected_init_fp32(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_tensor *weights, struct csinn_tensor *bias,
struct csinn_fc_params *params);
Expand Down
48 changes: 41 additions & 7 deletions include/backend/c920/c920.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,35 @@ void shl_c920_gemm_block_8xpack2n_fp16(__fp16 *dst, const __fp16 *sa, const __fp
__fp16 *bias, int m, int k, int n, const int M_BLK,
const int K_BLK, const int N_BLK);

/************************************ fullyconnected **********************************/
/************************************* gemm a0b1 *************************************/
void shl_c920_gemm_a0b1_8xpack2n_fp32(float *dst, const float *sa, const float *sb, float *bias,
int M, int K, int N);
void shl_c920_gemm_a0b1_8xpack2n_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb, __fp16 *bias,
int M, int K, int N);

void shl_c920_gemm_a0nb1r_8xpack2n_fp32(float *dst, const float *sa, const float *sb, float *bias,
int M, int K, int N);
void shl_c920_gemm_a0nb1n_dot_fp32_q8(float *dst, const float *sa, const int8_t *sb, float *bias,
int M, int K, int N, const __fp16 *scale);
void shl_c920_gemm_a0nb1n_dot_fp32_q4(float *dst, const float *sa, const int8_t *sb, float *bias,
int M, int K, int N, const __fp16 *scale);

void shl_c920_gemm_a0nb1r_8xpack2n_fp16(__fp16 *dst, const __fp16 *sa, const __fp16 *sb,
__fp16 *bias, int M, int K, int N);
void shl_c920_gemm_a0nb1n_dot_fp16_q8(__fp16 *dst, const __fp16 *sa, const int8_t *sb, __fp16 *bias,
int M, int K, int N, const __fp16 *scale);
void shl_c920_gemm_a0nb1n_dot_fp16_q4(__fp16 *dst, const __fp16 *sa, const int8_t *sb, __fp16 *bias,
int M, int K, int N, const __fp16 *scale);

void shl_c920_gemm_a0nb1_dot_fp16_q8_rearrange(__fp16 *dst, const __fp16 *sa, const int8_t *sb,
__fp16 *bias, int M, int K, int N,
const __fp16 *scale);

void shl_c920_gemm_a0nb1_dot_fp16_q4_rearrange(__fp16 *dst, const __fp16 *sa, const int8_t *sb,
__fp16 *bias, int M, int K, int N,
const __fp16 *scale);

/************************************ fullyconnected **********************************/
int shl_c920_fullyconnected_gemm_fp32(struct csinn_tensor *input, struct csinn_tensor *output,
struct csinn_tensor *weights, struct csinn_tensor *bias,
struct csinn_fc_params *params);
Expand All @@ -110,12 +133,23 @@ int shl_c920_fullyconnected_gemm_fp16(struct csinn_tensor *input, struct csinn_t
struct csinn_fc_params *params);

/*************************************** matmul ***************************************/
int shl_c920_matmul_fp32(struct csinn_tensor *mat0, struct csinn_tensor *mat1,
struct csinn_tensor *output, struct csinn_matmul_params *params);
int shl_c920_matmul_fp16(struct csinn_tensor *mat0, struct csinn_tensor *mat1,
struct csinn_tensor *output, struct csinn_matmul_params *params);
int shl_c920_matmul_fp16_w_int8(struct csinn_tensor *mat0, struct csinn_tensor *mat1,
struct csinn_tensor *output, struct csinn_matmul_params *params);
int shl_c920_matmul_a0b0_fp32(struct csinn_tensor *mat0, struct csinn_tensor *mat1,
struct csinn_tensor *output, struct csinn_matmul_params *params);
int shl_c920_matmul_a0b1_fp32(struct csinn_tensor *mat0, struct csinn_tensor *mat1,
struct csinn_tensor *output, struct csinn_matmul_params *params);
int shl_c920_matmul_a0b1_fp32_block_quant(struct csinn_tensor *mat0, struct csinn_tensor *mat1,
struct csinn_tensor *output,
struct csinn_matmul_params *params);
int shl_c920_matmul_a0b0_fp16(struct csinn_tensor *mat0, struct csinn_tensor *mat1,
struct csinn_tensor *output, struct csinn_matmul_params *params);
int shl_c920_matmul_a0b0_fp16_w_int8(struct csinn_tensor *mat0, struct csinn_tensor *mat1,
struct csinn_tensor *output,
struct csinn_matmul_params *params);
int shl_c920_matmul_a0b1_fp16(struct csinn_tensor *mat0, struct csinn_tensor *mat1,
struct csinn_tensor *output, struct csinn_matmul_params *params);
int shl_c920_matmul_a0b1_fp16_block_quant(struct csinn_tensor *mat0, struct csinn_tensor *mat1,
struct csinn_tensor *output,
struct csinn_matmul_params *params);

void shl_c920_u8_to_f32(const uint8_t *input, float *output, int32_t offset, float *scale,
uint32_t length);
Expand Down
Loading

0 comments on commit 533ee30

Please sign in to comment.