Skip to content

Commit

Permalink
Update cuDNN layers to use v2 API
Browse files Browse the repository at this point in the history
  • Loading branch information
NV-slayton committed Mar 4, 2015
1 parent 4fba3da commit f73ed4d
Show file tree
Hide file tree
Showing 16 changed files with 162 additions and 71 deletions.
4 changes: 2 additions & 2 deletions include/caffe/common_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,8 @@ class CuDNNSoftmaxLayer : public SoftmaxLayer<Dtype> {

bool handles_setup_;
cudnnHandle_t handle_;
cudnnTensor4dDescriptor_t bottom_desc_;
cudnnTensor4dDescriptor_t top_desc_;
cudnnTensorDescriptor_t bottom_desc_;
cudnnTensorDescriptor_t top_desc_;
};
#endif

Expand Down
12 changes: 6 additions & 6 deletions include/caffe/neuron_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,8 @@ class CuDNNReLULayer : public ReLULayer<Dtype> {

bool handles_setup_;
cudnnHandle_t handle_;
cudnnTensor4dDescriptor_t bottom_desc_;
cudnnTensor4dDescriptor_t top_desc_;
cudnnTensorDescriptor_t bottom_desc_;
cudnnTensorDescriptor_t top_desc_;
};
#endif

Expand Down Expand Up @@ -516,8 +516,8 @@ class CuDNNSigmoidLayer : public SigmoidLayer<Dtype> {

bool handles_setup_;
cudnnHandle_t handle_;
cudnnTensor4dDescriptor_t bottom_desc_;
cudnnTensor4dDescriptor_t top_desc_;
cudnnTensorDescriptor_t bottom_desc_;
cudnnTensorDescriptor_t top_desc_;
};
#endif

Expand Down Expand Up @@ -601,8 +601,8 @@ class CuDNNTanHLayer : public TanHLayer<Dtype> {

bool handles_setup_;
cudnnHandle_t handle_;
cudnnTensor4dDescriptor_t bottom_desc_;
cudnnTensor4dDescriptor_t top_desc_;
cudnnTensorDescriptor_t bottom_desc_;
cudnnTensorDescriptor_t top_desc_;
};
#endif

Expand Down
18 changes: 9 additions & 9 deletions include/caffe/util/cudnn.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,20 +57,20 @@ template<> class dataType<double> {
};

template <typename Dtype>
inline void createTensor4dDesc(cudnnTensor4dDescriptor_t* desc) {
CUDNN_CHECK(cudnnCreateTensor4dDescriptor(desc));
inline void createTensor4dDesc(cudnnTensorDescriptor_t* desc) {
CUDNN_CHECK(cudnnCreateTensorDescriptor(desc));
}

template <typename Dtype>
inline void setTensor4dDesc(cudnnTensor4dDescriptor_t* desc,
inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
int n, int c, int h, int w,
int stride_n, int stride_c, int stride_h, int stride_w) {
CUDNN_CHECK(cudnnSetTensor4dDescriptorEx(*desc, dataType<Dtype>::type,
n, c, h, w, stride_n, stride_c, stride_h, stride_w));
}

template <typename Dtype>
inline void setTensor4dDesc(cudnnTensor4dDescriptor_t* desc,
inline void setTensor4dDesc(cudnnTensorDescriptor_t* desc,
int n, int c, int h, int w) {
const int stride_w = 1;
const int stride_h = w * stride_w;
Expand All @@ -84,7 +84,7 @@ template <typename Dtype>
inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
int n, int c, int h, int w) {
CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
CUDNN_CHECK(cudnnSetFilterDescriptor(*desc, dataType<Dtype>::type,
CUDNN_CHECK(cudnnSetFilter4dDescriptor(*desc, dataType<Dtype>::type,
n, c, h, w));
}

Expand All @@ -95,9 +95,9 @@ inline void createConvolutionDesc(cudnnConvolutionDescriptor_t* conv) {

template <typename Dtype>
inline void setConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
cudnnTensor4dDescriptor_t bottom, cudnnFilterDescriptor_t filter,
cudnnTensorDescriptor_t bottom, cudnnFilterDescriptor_t filter,
int pad_h, int pad_w, int stride_h, int stride_w) {
CUDNN_CHECK(cudnnSetConvolutionDescriptor(*conv, bottom, filter,
CUDNN_CHECK(cudnnSetConvolution2dDescriptor(*conv,
pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION));
}

Expand All @@ -110,13 +110,13 @@ inline void createPoolingDesc(cudnnPoolingDescriptor_t* conv,
*mode = CUDNN_POOLING_MAX;
break;
case PoolingParameter_PoolMethod_AVE:
*mode = CUDNN_POOLING_AVERAGE;
*mode = CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
break;
default:
LOG(FATAL) << "Unknown pooling method.";
}
CUDNN_CHECK(cudnnCreatePoolingDescriptor(conv));
CUDNN_CHECK(cudnnSetPoolingDescriptor(*conv, *mode, h, w,
CUDNN_CHECK(cudnnSetPooling2dDescriptor(*conv, *mode, h, w, 0, 0,
stride_h, stride_w));
}

Expand Down
9 changes: 6 additions & 3 deletions include/caffe/vision_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,11 +246,14 @@ class CuDNNConvolutionLayer : public ConvolutionLayer<Dtype> {
bool handles_setup_;
cudnnHandle_t* handle_;
cudaStream_t* stream_;
vector<cudnnTensor4dDescriptor_t> bottom_descs_, top_descs_;
cudnnTensor4dDescriptor_t bias_desc_;
vector<cudnnTensorDescriptor_t> bottom_descs_, top_descs_;
cudnnTensorDescriptor_t bias_desc_;
cudnnFilterDescriptor_t filter_desc_;
vector<cudnnConvolutionDescriptor_t> conv_descs_;
int bottom_offset_, top_offset_, weight_offset_, bias_offset_;

size_t workspaceSizeInBytes;
void *workspace;
};
#endif

Expand Down Expand Up @@ -445,7 +448,7 @@ class CuDNNPoolingLayer : public PoolingLayer<Dtype> {

bool handles_setup_;
cudnnHandle_t handle_;
cudnnTensor4dDescriptor_t bottom_desc_, top_desc_;
cudnnTensorDescriptor_t bottom_desc_, top_desc_;
cudnnPoolingDescriptor_t pooling_desc_;
cudnnPoolingMode_t mode_;
};
Expand Down
15 changes: 10 additions & 5 deletions src/caffe/layers/cudnn_conv_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ void CuDNNConvolutionLayer<Dtype>::LayerSetUp(
stream_ = new cudaStream_t[this->group_ * CUDNN_STREAMS_PER_GROUP];
handle_ = new cudnnHandle_t[this->group_ * CUDNN_STREAMS_PER_GROUP];

workspace = NULL;
workspaceSizeInBytes = (size_t)0;

for (int g = 0; g < this->group_ * CUDNN_STREAMS_PER_GROUP; g++) {
CUDA_CHECK(cudaStreamCreate(&stream_[g]));
CUDNN_CHECK(cudnnCreate(&handle_[g]));
Expand All @@ -43,10 +46,10 @@ void CuDNNConvolutionLayer<Dtype>::LayerSetUp(

// Create tensor descriptor(s) for data and corresponding convolution(s).
for (int i = 0; i < bottom.size(); i++) {
cudnnTensor4dDescriptor_t bottom_desc;
cudnnTensorDescriptor_t bottom_desc;
cudnn::createTensor4dDesc<Dtype>(&bottom_desc);
bottom_descs_.push_back(bottom_desc);
cudnnTensor4dDescriptor_t top_desc;
cudnnTensorDescriptor_t top_desc;
cudnn::createTensor4dDesc<Dtype>(&top_desc);
top_descs_.push_back(top_desc);
cudnnConvolutionDescriptor_t conv_desc;
Expand Down Expand Up @@ -104,12 +107,12 @@ CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() {
if (!handles_setup_) { return; }

for (int i = 0; i < bottom_descs_.size(); i++) {
cudnnDestroyTensor4dDescriptor(bottom_descs_[i]);
cudnnDestroyTensor4dDescriptor(top_descs_[i]);
cudnnDestroyTensorDescriptor(bottom_descs_[i]);
cudnnDestroyTensorDescriptor(top_descs_[i]);
cudnnDestroyConvolutionDescriptor(conv_descs_[i]);
}
if (this->bias_term_) {
cudnnDestroyTensor4dDescriptor(bias_desc_);
cudnnDestroyTensorDescriptor(bias_desc_);
}
cudnnDestroyFilterDescriptor(filter_desc_);

Expand All @@ -118,6 +121,8 @@ CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() {
cudnnDestroy(handle_[g]);
}

if (workspace) cudaFree(workspace);

delete [] stream_;
delete [] handle_;
}
Expand Down
75 changes: 59 additions & 16 deletions src/caffe/layers/cudnn_conv_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,57 @@ void CuDNNConvolutionLayer<Dtype>::Forward_gpu(

// Forward through cuDNN in parallel over groups.
for (int g = 0; g < this->group_; g++) {
const Dtype alpha = 1.0;
const Dtype beta = 0.0;

cudnnConvolutionFwdAlgo_t algo;

CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(handle_[g],
bottom_descs_[i],
filter_desc_,
conv_descs_[i],
top_descs_[i],
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
0, // memoryLimitInBytes,
&algo));

size_t workspaceSizeInBytes_temp = 0;

CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(handle_[g],
bottom_descs_[i],
filter_desc_,
conv_descs_[i],
top_descs_[i],
algo,
&workspaceSizeInBytes_temp));

if (workspaceSizeInBytes_temp > workspaceSizeInBytes) {
workspaceSizeInBytes = workspaceSizeInBytes_temp;
// free the existing workspace and allocate a new (larger) one
if (this->workspace != NULL) {
cudaFree(this->workspace);
}
cudaMalloc(&(this->workspace), workspaceSizeInBytes);
CUDA_POST_KERNEL_CHECK;
}

// Filters.
CUDNN_CHECK(cudnnConvolutionForward(handle_[g],
CUDNN_CHECK(cudnnConvolutionForward(handle_[g], (void *)(&alpha),
bottom_descs_[i], bottom_data + bottom_offset_ * g,
filter_desc_, weight + weight_offset_ * g,
conv_descs_[i],
top_descs_[i], top_data + top_offset_ * g,
CUDNN_RESULT_NO_ACCUMULATE));
algo, workspace, workspaceSizeInBytes,
(void *)(&beta),
top_descs_[i], top_data + top_offset_ * g) );

// Bias.
if (this->bias_term_) {
const Dtype* bias_data = this->blobs_[1]->gpu_data();
Dtype alpha = 1.;
CUDNN_CHECK(cudnnAddTensor4d(handle_[g], CUDNN_ADD_SAME_C, &alpha,
Dtype alpha = 1.0;
Dtype beta = 1.0;
CUDNN_CHECK(cudnnAddTensor(handle_[g], CUDNN_ADD_SAME_C, (void *)(&alpha),
bias_desc_, bias_data + bias_offset_ * g,
(void *)(&beta),
top_descs_[i], top_data + top_offset_ * g));
}
}
Expand Down Expand Up @@ -66,36 +103,42 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
// Backward through cuDNN in parallel over groups and gradients.
for (int g = 0; g < this->group_; g++) {
// Gradient w.r.t. bias.

if (this->bias_term_ && this->param_propagate_down_[1]) {
Dtype alpha = 1.0;
Dtype beta = 1.0;
CUDNN_CHECK(cudnnConvolutionBackwardBias(handle_[0*this->group_ + g],
(void *)(&alpha),
top_descs_[i], top_diff + top_offset_ * g,
bias_desc_, bias_diff + bias_offset_ * g,
CUDNN_RESULT_ACCUMULATE));
(void *)(&beta),
bias_desc_, bias_diff + bias_offset_ * g) );
}

// Gradient w.r.t. weights.
if (this->param_propagate_down_[0]) {
Dtype alpha = 1.0;
Dtype beta = 1.0;
const Dtype* bottom_data = bottom[i]->gpu_data();
CUDNN_CHECK(cudnnConvolutionBackwardFilter(handle_[1*this->group_ + g],
(void *)(&alpha),
bottom_descs_[i], bottom_data + bottom_offset_ * g,
top_descs_[i], top_diff + top_offset_ * g,
conv_descs_[i],
filter_desc_, weight_diff + weight_offset_ * g,
CUDNN_RESULT_ACCUMULATE));
conv_descs_[i], (void *)(&beta),
filter_desc_, weight_diff + weight_offset_ * g) );
}

// Gradient w.r.t. bottom data.
if (propagate_down[i]) {
if (weight == NULL) {
weight = this->blobs_[0]->gpu_data();
}
Dtype alpha = 1.0;
Dtype beta = 0.0;

Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();
CUDNN_CHECK(cudnnConvolutionBackwardData(handle_[2*this->group_ + g],
(void *)(&alpha),
filter_desc_, weight + weight_offset_ * g,
top_descs_[i], top_diff + top_offset_ * g,
conv_descs_[i],
bottom_descs_[i], bottom_diff + bottom_offset_ * g,
CUDNN_RESULT_NO_ACCUMULATE));
conv_descs_[i], (void *)(&beta),
bottom_descs_[i], bottom_diff + bottom_offset_ * g) );
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/caffe/layers/cudnn_pooling_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ CuDNNPoolingLayer<Dtype>::~CuDNNPoolingLayer() {
// Check that handles have been setup before destroying.
if (!handles_setup_) { return; }

cudnnDestroyTensor4dDescriptor(bottom_desc_);
cudnnDestroyTensor4dDescriptor(top_desc_);
cudnnDestroyTensorDescriptor(bottom_desc_);
cudnnDestroyTensorDescriptor(top_desc_);
cudnnDestroyPoolingDescriptor(pooling_desc_);
cudnnDestroy(handle_);
}
Expand Down
16 changes: 12 additions & 4 deletions src/caffe/layers/cudnn_pooling_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,12 @@ void CuDNNPoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
CUDNN_CHECK(cudnnPoolingForward(handle_, pooling_desc_,
bottom_desc_, bottom_data, top_desc_, top_data));

Dtype alpha = 1.0;
Dtype beta = 0.0;

CUDNN_CHECK(cudnnPoolingForward(handle_, pooling_desc_, (void *)(&alpha),
bottom_desc_, bottom_data, (void *)(&beta), top_desc_, top_data));
}

template <typename Dtype>
Expand All @@ -28,9 +32,13 @@ void CuDNNPoolingLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const Dtype* top_data = top[0]->gpu_data();
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
CUDNN_CHECK(cudnnPoolingBackward(handle_, pooling_desc_,

Dtype alpha = 1.0;
Dtype beta = 0.0;

CUDNN_CHECK(cudnnPoolingBackward(handle_, pooling_desc_, (void *)(&alpha),
top_desc_, top_data, top_desc_, top_diff,
bottom_desc_, bottom_data, bottom_desc_, bottom_diff));
bottom_desc_, bottom_data, (void *)(&beta), bottom_desc_, bottom_diff));
}

INSTANTIATE_LAYER_GPU_FUNCS(CuDNNPoolingLayer);
Expand Down
4 changes: 2 additions & 2 deletions src/caffe/layers/cudnn_relu_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ CuDNNReLULayer<Dtype>::~CuDNNReLULayer() {
// Check that handles have been setup before destroying.
if (!handles_setup_) { return; }

cudnnDestroyTensor4dDescriptor(this->bottom_desc_);
cudnnDestroyTensor4dDescriptor(this->top_desc_);
cudnnDestroyTensorDescriptor(this->bottom_desc_);
cudnnDestroyTensorDescriptor(this->top_desc_);
cudnnDestroy(this->handle_);
}

Expand Down
16 changes: 12 additions & 4 deletions src/caffe/layers/cudnn_relu_layer.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,13 @@ void CuDNNReLULayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,

const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();

Dtype alpha = 1.0;
Dtype beta = 0.0;

CUDNN_CHECK(cudnnActivationForward(this->handle_,
CUDNN_ACTIVATION_RELU,
this->bottom_desc_, bottom_data, this->top_desc_, top_data));
CUDNN_ACTIVATION_RELU, (void *)(&alpha),
this->bottom_desc_, bottom_data, (void *)(&beta), this->top_desc_, top_data));
}

template <typename Dtype>
Expand All @@ -39,10 +43,14 @@ void CuDNNReLULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const Dtype* top_diff = top[0]->gpu_diff();
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();

Dtype alpha = 1.0;
Dtype beta = 0.0;

CUDNN_CHECK(cudnnActivationBackward(this->handle_,
CUDNN_ACTIVATION_RELU,
CUDNN_ACTIVATION_RELU, (void *)(&alpha),
this->top_desc_, top_data, this->top_desc_, top_diff,
this->bottom_desc_, bottom_data, this->bottom_desc_, bottom_diff));
this->bottom_desc_, bottom_data, (void *)(&beta), this->bottom_desc_, bottom_diff));
}

INSTANTIATE_LAYER_GPU_FUNCS(CuDNNReLULayer);
Expand Down
4 changes: 2 additions & 2 deletions src/caffe/layers/cudnn_sigmoid_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ CuDNNSigmoidLayer<Dtype>::~CuDNNSigmoidLayer() {
// Check that handles have been setup before destroying.
if (!handles_setup_) { return; }

cudnnDestroyTensor4dDescriptor(this->bottom_desc_);
cudnnDestroyTensor4dDescriptor(this->top_desc_);
cudnnDestroyTensorDescriptor(this->bottom_desc_);
cudnnDestroyTensorDescriptor(this->top_desc_);
cudnnDestroy(this->handle_);
}

Expand Down
Loading

0 comments on commit f73ed4d

Please sign in to comment.