From ef36d79b7eaa114797d968f262e410197f6ac3da Mon Sep 17 00:00:00 2001
From: nihuini <nihuini@tencent.com>
Date: Thu, 30 Aug 2018 17:33:43 +0800
Subject: [PATCH] implement the missing dequantize image on armv7, prefer
 neon-optimized 3-dim dequantize, fix #547

---
 src/layer/arm/convolutiondepthwise_arm.cpp |  2 +-
 src/layer/arm/dequantize_arm.cpp           | 37 ++++++++++++++++++++++
 src/layer/convolutiondepthwise.cpp         | 14 ++++----
 3 files changed, 45 insertions(+), 8 deletions(-)
diff --git a/src/layer/arm/convolutiondepthwise_arm.cpp b/src/layer/arm/convolutiondepthwise_arm.cpp
index b8964130177..a814c83c514 100644
--- a/src/layer/arm/convolutiondepthwise_arm.cpp
+++ b/src/layer/arm/convolutiondepthwise_arm.cpp
@@ -220,7 +220,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
                         opt_g.num_threads = 1;
                         opt_g.blob_allocator = top_blob.allocator;
 
-                        Mat top_blob_g = top_blob.channel(g);
+                        Mat top_blob_g = top_blob.channel_range(g, 1);
                         dequantize_ops[g]->forward_inplace(top_blob_g, opt_g);
                     }
 
diff --git a/src/layer/arm/dequantize_arm.cpp b/src/layer/arm/dequantize_arm.cpp
index 1309469268c..3e3f278b2ee 100644
--- a/src/layer/arm/dequantize_arm.cpp
+++ b/src/layer/arm/dequantize_arm.cpp
@@ -53,6 +53,43 @@ int Dequantize_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) con
         }
     }
 
+    if (dims == 2)
+    {
+        int w = bottom_top_blob.w;
+        int h = bottom_top_blob.h;
+
+        if (bias_term)
+        {
+            #pragma omp parallel for num_threads(opt.num_threads)
+            for (int i=0; i<h; i++)
+            {
+                const int* intptr = bottom_top_blob.row<const int>(i);
+                float* ptr = bottom_top_blob.row(i);
+
+                float bias = bias_data_size > 1 ? bias_data[i] : bias_data[0];
+
+                for (int j=0; j<w; j++)
+                {
+                    ptr[j] = intptr[j] * scale + bias;
+                }
+            }
+        }
+        else
+        {
+            #pragma omp parallel for num_threads(opt.num_threads)
+            for (int i=0; i<h; i++)
+            {
+                const int* intptr = bottom_top_blob.row<const int>(i);
+                float* ptr = bottom_top_blob.row(i);
+
+                for (int j=0; j<w; j++)
+                {
+                    ptr[j] = intptr[j] * scale;
+                }
+            }
+        }
+    }
+
     if (dims == 3)
     {
         int w = bottom_top_blob.w;
diff --git a/src/layer/convolutiondepthwise.cpp b/src/layer/convolutiondepthwise.cpp
index 5d2aa2f3aa8..0b95999005b 100644
--- a/src/layer/convolutiondepthwise.cpp
+++ b/src/layer/convolutiondepthwise.cpp
@@ -142,8 +142,8 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
             ncnn::Option opt = ncnn::get_default_option();
             opt.blob_allocator = int8_weight_data.allocator;
 
-            const Mat weight_data_g(weight_data_size_g, (void*)((float*)weight_data + weight_data_size_g * g), (size_t)4u, weight_data.allocator);
-            Mat int8_weight_data_g(weight_data_size_g, (void*)((signed char*)int8_weight_data + weight_data_size_g * g), (size_t)1u, int8_weight_data.allocator);
+            const Mat weight_data_g = weight_data.range(weight_data_size_g * g, weight_data_size_g);
+            Mat int8_weight_data_g = int8_weight_data.range(weight_data_size_g * g, weight_data_size_g);
             op->forward(weight_data_g, int8_weight_data_g, opt);
 
             delete op;
@@ -181,7 +181,7 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
             dequantize_ops[g]->load_param(pd);
 
             ncnn::Mat weights[1];
-            weights[0] = Mat(1, (void*)((const float*)bias_data + g));
+            weights[0] = bias_data.range(g, 1);
 
             dequantize_ops[g]->load_model(ModelBinFromMatArray(weights));
         }
@@ -229,8 +229,8 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
             opt_g.num_threads = 1;
             opt_g.blob_allocator = bottom_blob_int8.allocator;
 
-            const Mat bottom_blob_g(w, h, channels_g, (void*)((const float*)bottom_blob.channel(channels_g * g)));
-            Mat bottom_blob_int8_g(w, h, channels_g, (void*)((signed char*)bottom_blob_int8.channel(channels_g * g)));
+            const Mat bottom_blob_g = bottom_blob.channel_range(channels_g * g, channels_g);
+            Mat bottom_blob_int8_g = bottom_blob_int8.channel_range(channels_g * g, channels_g);
             quantize_ops[g]->forward(bottom_blob_g, bottom_blob_int8_g, opt_g);
         }
 
@@ -329,7 +329,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
                     opt_g.num_threads = 1;
                     opt_g.blob_allocator = top_blob.allocator;
 
-                    Mat top_blob_g = top_blob.channel(g);
+                    Mat top_blob_g = top_blob.channel_range(g, 1);
                     dequantize_ops[g]->forward_inplace(top_blob_g, opt_g);
                 }
             }
@@ -391,7 +391,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
                 opt_g.num_threads = 1;
                 opt_g.blob_allocator = top_blob.allocator;
 
-                Mat top_blob_g(outw, outh, num_output_g, (void*)((signed int*)top_blob.channel(g * num_output_g)));
+                Mat top_blob_g = top_blob.channel_range(num_output_g * g, num_output_g);
                 dequantize_ops[g]->forward_inplace(top_blob_g, opt_g);
             }
         }