From ef36d79b7eaa114797d968f262e410197f6ac3da Mon Sep 17 00:00:00 2001 From: nihuini Date: Thu, 30 Aug 2018 17:33:43 +0800 Subject: [PATCH] implement the missing dequantize image on armv7, prefer neon-optimized 3-dim dequantize, fix #547 --- src/layer/arm/convolutiondepthwise_arm.cpp | 2 +- src/layer/arm/dequantize_arm.cpp | 37 ++++++++++++++++++++++ src/layer/convolutiondepthwise.cpp | 14 ++++---- 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/src/layer/arm/convolutiondepthwise_arm.cpp b/src/layer/arm/convolutiondepthwise_arm.cpp index b8964130177..a814c83c514 100644 --- a/src/layer/arm/convolutiondepthwise_arm.cpp +++ b/src/layer/arm/convolutiondepthwise_arm.cpp @@ -220,7 +220,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con opt_g.num_threads = 1; opt_g.blob_allocator = top_blob.allocator; - Mat top_blob_g = top_blob.channel(g); + Mat top_blob_g = top_blob.channel_range(g, 1); dequantize_ops[g]->forward_inplace(top_blob_g, opt_g); } diff --git a/src/layer/arm/dequantize_arm.cpp b/src/layer/arm/dequantize_arm.cpp index 1309469268c..3e3f278b2ee 100644 --- a/src/layer/arm/dequantize_arm.cpp +++ b/src/layer/arm/dequantize_arm.cpp @@ -53,6 +53,43 @@ int Dequantize_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) con } } + if (dims == 2) + { + int w = bottom_top_blob.w; + int h = bottom_top_blob.h; + + if (bias_term) + { + #pragma omp parallel for num_threads(opt.num_threads) + for (int i=0; i(i); + float* ptr = bottom_top_blob.row(i); + + float bias = bias_data_size > 1 ? bias_data[i] : bias_data[0]; + + for (int j=0; j(i); + float* ptr = bottom_top_blob.row(i); + + for (int j=0; jforward(weight_data_g, int8_weight_data_g, opt); delete op; @@ -181,7 +181,7 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb) dequantize_ops[g]->load_param(pd); ncnn::Mat weights[1]; - weights[0] = Mat(1, (void*)((const float*)bias_data + g)); + weights[0] = bias_data.range(g, 1); dequantize_ops[g]->load_model(ModelBinFromMatArray(weights)); } @@ -229,8 +229,8 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O opt_g.num_threads = 1; opt_g.blob_allocator = bottom_blob_int8.allocator; - const Mat bottom_blob_g(w, h, channels_g, (void*)((const float*)bottom_blob.channel(channels_g * g))); - Mat bottom_blob_int8_g(w, h, channels_g, (void*)((signed char*)bottom_blob_int8.channel(channels_g * g))); + const Mat bottom_blob_g = bottom_blob.channel_range(channels_g * g, channels_g); + Mat bottom_blob_int8_g = bottom_blob_int8.channel_range(channels_g * g, channels_g); quantize_ops[g]->forward(bottom_blob_g, bottom_blob_int8_g, opt_g); } @@ -329,7 +329,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O opt_g.num_threads = 1; opt_g.blob_allocator = top_blob.allocator; - Mat top_blob_g = top_blob.channel(g); + Mat top_blob_g = top_blob.channel_range(g, 1); dequantize_ops[g]->forward_inplace(top_blob_g, opt_g); } } @@ -391,7 +391,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O opt_g.num_threads = 1; opt_g.blob_allocator = top_blob.allocator; - Mat top_blob_g(outw, outh, num_output_g, (void*)((signed int*)top_blob.channel(g * num_output_g))); + Mat top_blob_g = top_blob.channel_range(num_output_g * g, num_output_g); dequantize_ops[g]->forward_inplace(top_blob_g, opt_g); } }