Skip to content

Commit

Permalink
implement the missing dequantize image on armv7, prefer neon-optimize…
Browse files Browse the repository at this point in the history
…d 3-dim dequantize, fix Tencent#547
  • Loading branch information
nihui committed Aug 30, 2018
1 parent 19ad4cf commit ef36d79
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 8 deletions.
2 changes: 1 addition & 1 deletion src/layer/arm/convolutiondepthwise_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ int ConvolutionDepthWise_arm::forward(const Mat& bottom_blob, Mat& top_blob, con
opt_g.num_threads = 1;
opt_g.blob_allocator = top_blob.allocator;

Mat top_blob_g = top_blob.channel(g);
Mat top_blob_g = top_blob.channel_range(g, 1);
dequantize_ops[g]->forward_inplace(top_blob_g, opt_g);
}

Expand Down
37 changes: 37 additions & 0 deletions src/layer/arm/dequantize_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,43 @@ int Dequantize_arm::forward_inplace(Mat& bottom_top_blob, const Option& opt) con
}
}

if (dims == 2)
{
int w = bottom_top_blob.w;
int h = bottom_top_blob.h;

if (bias_term)
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int i=0; i<h; i++)
{
const int* intptr = bottom_top_blob.row<const int>(i);
float* ptr = bottom_top_blob.row(i);

float bias = bias_data_size > 1 ? bias_data[i] : bias_data[0];

for (int j=0; j<w; j++)
{
ptr[j] = intptr[j] * scale + bias;
}
}
}
else
{
#pragma omp parallel for num_threads(opt.num_threads)
for (int i=0; i<h; i++)
{
const int* intptr = bottom_top_blob.row<const int>(i);
float* ptr = bottom_top_blob.row(i);

for (int j=0; j<w; j++)
{
ptr[j] = intptr[j] * scale;
}
}
}
}

if (dims == 3)
{
int w = bottom_top_blob.w;
Expand Down
14 changes: 7 additions & 7 deletions src/layer/convolutiondepthwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
ncnn::Option opt = ncnn::get_default_option();
opt.blob_allocator = int8_weight_data.allocator;

const Mat weight_data_g(weight_data_size_g, (void*)((float*)weight_data + weight_data_size_g * g), (size_t)4u, weight_data.allocator);
Mat int8_weight_data_g(weight_data_size_g, (void*)((signed char*)int8_weight_data + weight_data_size_g * g), (size_t)1u, int8_weight_data.allocator);
const Mat weight_data_g = weight_data.range(weight_data_size_g * g, weight_data_size_g);
Mat int8_weight_data_g = int8_weight_data.range(weight_data_size_g * g, weight_data_size_g);
op->forward(weight_data_g, int8_weight_data_g, opt);

delete op;
Expand Down Expand Up @@ -181,7 +181,7 @@ int ConvolutionDepthWise::load_model(const ModelBin& mb)
dequantize_ops[g]->load_param(pd);

ncnn::Mat weights[1];
weights[0] = Mat(1, (void*)((const float*)bias_data + g));
weights[0] = bias_data.range(g, 1);

dequantize_ops[g]->load_model(ModelBinFromMatArray(weights));
}
Expand Down Expand Up @@ -229,8 +229,8 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
opt_g.num_threads = 1;
opt_g.blob_allocator = bottom_blob_int8.allocator;

const Mat bottom_blob_g(w, h, channels_g, (void*)((const float*)bottom_blob.channel(channels_g * g)));
Mat bottom_blob_int8_g(w, h, channels_g, (void*)((signed char*)bottom_blob_int8.channel(channels_g * g)));
const Mat bottom_blob_g = bottom_blob.channel_range(channels_g * g, channels_g);
Mat bottom_blob_int8_g = bottom_blob_int8.channel_range(channels_g * g, channels_g);
quantize_ops[g]->forward(bottom_blob_g, bottom_blob_int8_g, opt_g);
}

Expand Down Expand Up @@ -329,7 +329,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
opt_g.num_threads = 1;
opt_g.blob_allocator = top_blob.allocator;

Mat top_blob_g = top_blob.channel(g);
Mat top_blob_g = top_blob.channel_range(g, 1);
dequantize_ops[g]->forward_inplace(top_blob_g, opt_g);
}
}
Expand Down Expand Up @@ -391,7 +391,7 @@ int ConvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob, const O
opt_g.num_threads = 1;
opt_g.blob_allocator = top_blob.allocator;

Mat top_blob_g(outw, outh, num_output_g, (void*)((signed int*)top_blob.channel(g * num_output_g)));
Mat top_blob_g = top_blob.channel_range(num_output_g * g, num_output_g);
dequantize_ops[g]->forward_inplace(top_blob_g, opt_g);
}
}
Expand Down

0 comments on commit ef36d79

Please sign in to comment.