Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove all CUBLAS related thing... #6929

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ caffe_option(USE_CUDNN "Build Caffe with cuDNN library support" ON IF NOT CPU_ON
caffe_option(USE_NCCL "Build Caffe with NCCL library support" OFF)
caffe_option(BUILD_SHARED_LIBS "Build shared libraries" ON)
caffe_option(BUILD_python "Build Python wrapper" ON)
set(python_version "2" CACHE STRING "Specify which Python version to use")
set(python_version "3" CACHE STRING "Specify which Python version to use")
caffe_option(BUILD_matlab "Build Matlab wrapper" OFF IF UNIX OR APPLE)
caffe_option(BUILD_docs "Build documentation" ON IF UNIX OR APPLE)
caffe_option(BUILD_python_layer "Build the Caffe Python layer" ON)
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ INCLUDE_DIRS += $(BUILD_INCLUDE_DIR) ./src ./include
ifneq ($(CPU_ONLY), 1)
INCLUDE_DIRS += $(CUDA_INCLUDE_DIR)
LIBRARY_DIRS += $(CUDA_LIB_DIR)
LIBRARIES := cudart cublas curand
LIBRARIES := cudart curand
#LIBRARIES := cudart cublas curand
endif

LIBRARIES += glog gflags protobuf boost_system boost_filesystem m
Expand Down
14 changes: 7 additions & 7 deletions include/caffe/util/device_alternate.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
} while (0)

#define CUBLAS_CHECK(condition) \
do { \
cublasStatus_t status = condition; \
CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) << " " \
<< caffe::cublasGetErrorString(status); \
} while (0)
//#define CUBLAS_CHECK(condition) \
// do { \
// cublasStatus_t status = condition; \
// CHECK_EQ(status, CUBLAS_STATUS_SUCCESS) << " " \
// << caffe::cublasGetErrorString(status); \
// } while (0)

#define CURAND_CHECK(condition) \
do { \
Expand All @@ -78,7 +78,7 @@ void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
namespace caffe {

// CUDA: library error reporting.
const char* cublasGetErrorString(cublasStatus_t error);
//const char* cublasGetErrorString(cublasStatus_t error);
const char* curandGetErrorString(curandStatus_t error);

// CUDA: use 512 threads per block
Expand Down
2 changes: 1 addition & 1 deletion scripts/travis/install-deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ if $WITH_CUDA ; then
apt-get install -y --no-install-recommends \
cuda-core-$CUDA_PKG_VERSION \
cuda-cudart-dev-$CUDA_PKG_VERSION \
cuda-cublas-dev-$CUDA_PKG_VERSION \
#cuda-cublas-dev-$CUDA_PKG_VERSION \
cuda-curand-dev-$CUDA_PKG_VERSION
# manually create CUDA symlink
ln -s /usr/local/cuda-$CUDA_VERSION /usr/local/cuda
Expand Down
17 changes: 10 additions & 7 deletions src/caffe/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,15 @@ void* Caffe::RNG::generator() {
#else // Normal GPU + CPU Caffe.

Caffe::Caffe()
: cublas_handle_(NULL), curand_generator_(NULL), random_generator_(),
//: cublas_handle_(NULL), curand_generator_(NULL), random_generator_(),
: curand_generator_(NULL), random_generator_(),
mode_(Caffe::CPU),
solver_count_(1), solver_rank_(0), multiprocess_(false) {
// Try to create a cublas handler, and report an error if failed (but we will
// keep the program running as one might just want to run CPU code).
if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available.";
}
//if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
// LOG(ERROR) << "Cannot create Cublas handle. Cublas won't be available.";
//}
// Try to create a curand handler.
if (curandCreateGenerator(&curand_generator_, CURAND_RNG_PSEUDO_DEFAULT)
!= CURAND_STATUS_SUCCESS ||
Expand All @@ -123,7 +124,7 @@ Caffe::Caffe()
}

Caffe::~Caffe() {
if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
//if (cublas_handle_) CUBLAS_CHECK(cublasDestroy(cublas_handle_));
if (curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(curand_generator_));
}
Expand Down Expand Up @@ -156,11 +157,11 @@ void Caffe::SetDevice(const int device_id) {
// The call to cudaSetDevice must come before any calls to Get, which
// may perform initialization using the GPU.
CUDA_CHECK(cudaSetDevice(device_id));
if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
//if (Get().cublas_handle_) CUBLAS_CHECK(cublasDestroy(Get().cublas_handle_));
if (Get().curand_generator_) {
CURAND_CHECK(curandDestroyGenerator(Get().curand_generator_));
}
CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
//CUBLAS_CHECK(cublasCreate(&Get().cublas_handle_));
CURAND_CHECK(curandCreateGenerator(&Get().curand_generator_,
CURAND_RNG_PSEUDO_DEFAULT));
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(Get().curand_generator_,
Expand Down Expand Up @@ -258,6 +259,7 @@ void* Caffe::RNG::generator() {
return static_cast<void*>(generator_->rng());
}

/*
const char* cublasGetErrorString(cublasStatus_t error) {
switch (error) {
case CUBLAS_STATUS_SUCCESS:
Expand Down Expand Up @@ -287,6 +289,7 @@ const char* cublasGetErrorString(cublasStatus_t error) {
}
return "Unknown cublas status";
}
*/

const char* curandGetErrorString(curandStatus_t error) {
switch (error) {
Expand Down
2 changes: 1 addition & 1 deletion src/caffe/layers/window_data_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ void WindowDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
image_database_cache_[window[WindowDataLayer<Dtype>::IMAGE_INDEX]];
cv_img = DecodeDatumToCVMat(image_cached.second, true);
} else {
cv_img = cv::imread(image.first, CV_LOAD_IMAGE_COLOR);
cv_img = cv::imread(image.first, cv::IMREAD_COLOR);
if (!cv_img.data) {
LOG(ERROR) << "Could not open or find file " << image.first;
return;
Expand Down
8 changes: 4 additions & 4 deletions src/caffe/util/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ void WriteProtoToBinaryFile(const Message& proto, const char* filename) {
cv::Mat ReadImageToCVMat(const string& filename,
const int height, const int width, const bool is_color) {
cv::Mat cv_img;
int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR :
CV_LOAD_IMAGE_GRAYSCALE);
int cv_read_flag = (is_color ? cv::IMREAD_COLOR:
cv::IMREAD_GRAYSCALE);
cv::Mat cv_img_origin = cv::imread(filename, cv_read_flag);
if (!cv_img_origin.data) {
LOG(ERROR) << "Could not open or find file " << filename;
Expand Down Expand Up @@ -179,8 +179,8 @@ cv::Mat DecodeDatumToCVMat(const Datum& datum, bool is_color) {
CHECK(datum.encoded()) << "Datum not encoded";
const string& data = datum.data();
std::vector<char> vec_data(data.c_str(), data.c_str() + data.size());
int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR :
CV_LOAD_IMAGE_GRAYSCALE);
int cv_read_flag = (is_color ? cv::IMREAD_COLOR :
cv::IMREAD_GRAYSCALE);
cv_img = cv::imdecode(vec_data, cv_read_flag);
if (!cv_img.data) {
LOG(ERROR) << "Could not decode datum ";
Expand Down
60 changes: 30 additions & 30 deletions src/caffe/util/math_functions.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ void caffe_gpu_gemm<float>(const CBLAS_TRANSPOSE TransA,
(TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
cublasOperation_t cuTransB =
(TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
CUBLAS_CHECK(cublasSgemm(Caffe::cublas_handle(), cuTransB, cuTransA,
N, M, K, &alpha, B, ldb, A, lda, &beta, C, N));
// CUBLAS_CHECK(cublasSgemm(Caffe::cublas_handle(), cuTransB, cuTransA,
// N, M, K, &alpha, B, ldb, A, lda, &beta, C, N));
}

template <>
Expand All @@ -38,8 +38,8 @@ void caffe_gpu_gemm<double>(const CBLAS_TRANSPOSE TransA,
(TransA == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
cublasOperation_t cuTransB =
(TransB == CblasNoTrans) ? CUBLAS_OP_N : CUBLAS_OP_T;
CUBLAS_CHECK(cublasDgemm(Caffe::cublas_handle(), cuTransB, cuTransA,
N, M, K, &alpha, B, ldb, A, lda, &beta, C, N));
// CUBLAS_CHECK(cublasDgemm(Caffe::cublas_handle(), cuTransB, cuTransA,
// N, M, K, &alpha, B, ldb, A, lda, &beta, C, N));
}

template <>
Expand All @@ -48,30 +48,30 @@ void caffe_gpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
const float beta, float* y) {
cublasOperation_t cuTransA =
(TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
CUBLAS_CHECK(cublasSgemv(Caffe::cublas_handle(), cuTransA, N, M, &alpha,
A, N, x, 1, &beta, y, 1));
// CUBLAS_CHECK(cublasSgemv(Caffe::cublas_handle(), cuTransA, N, M, &alpha,
// A, N, x, 1, &beta, y, 1));
}

template <>
void caffe_gpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
const int N, const double alpha, const double* A, const double* x,
const double beta, double* y) {
cublasOperation_t cuTransA =
(TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
CUBLAS_CHECK(cublasDgemv(Caffe::cublas_handle(), cuTransA, N, M, &alpha,
A, N, x, 1, &beta, y, 1));
// cublasOperation_t cuTransA =
// (TransA == CblasNoTrans) ? CUBLAS_OP_T : CUBLAS_OP_N;
// CUBLAS_CHECK(cublasDgemv(Caffe::cublas_handle(), cuTransA, N, M, &alpha,
// A, N, x, 1, &beta, y, 1));
}

template <>
void caffe_gpu_axpy<float>(const int N, const float alpha, const float* X,
float* Y) {
CUBLAS_CHECK(cublasSaxpy(Caffe::cublas_handle(), N, &alpha, X, 1, Y, 1));
// CUBLAS_CHECK(cublasSaxpy(Caffe::cublas_handle(), N, &alpha, X, 1, Y, 1));
}

template <>
void caffe_gpu_axpy<double>(const int N, const double alpha, const double* X,
double* Y) {
CUBLAS_CHECK(cublasDaxpy(Caffe::cublas_handle(), N, &alpha, X, 1, Y, 1));
// CUBLAS_CHECK(cublasDaxpy(Caffe::cublas_handle(), N, &alpha, X, 1, Y, 1));
}

void caffe_gpu_memcpy(const size_t N, const void* X, void* Y) {
Expand All @@ -82,32 +82,32 @@ void caffe_gpu_memcpy(const size_t N, const void* X, void* Y) {

template <>
void caffe_gpu_scal<float>(const int N, const float alpha, float *X) {
CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), N, &alpha, X, 1));
// CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), N, &alpha, X, 1));
}

template <>
void caffe_gpu_scal<double>(const int N, const double alpha, double *X) {
CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), N, &alpha, X, 1));
// CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), N, &alpha, X, 1));
}

template <>
void caffe_gpu_scal<float>(const int N, const float alpha, float* X,
cudaStream_t str) {
cudaStream_t initial_stream;
CUBLAS_CHECK(cublasGetStream(Caffe::cublas_handle(), &initial_stream));
CUBLAS_CHECK(cublasSetStream(Caffe::cublas_handle(), str));
CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), N, &alpha, X, 1));
CUBLAS_CHECK(cublasSetStream(Caffe::cublas_handle(), initial_stream));
// CUBLAS_CHECK(cublasGetStream(Caffe::cublas_handle(), &initial_stream));
// CUBLAS_CHECK(cublasSetStream(Caffe::cublas_handle(), str));
// CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), N, &alpha, X, 1));
// CUBLAS_CHECK(cublasSetStream(Caffe::cublas_handle(), initial_stream));
}

template <>
void caffe_gpu_scal<double>(const int N, const double alpha, double* X,
cudaStream_t str) {
cudaStream_t initial_stream;
CUBLAS_CHECK(cublasGetStream(Caffe::cublas_handle(), &initial_stream));
CUBLAS_CHECK(cublasSetStream(Caffe::cublas_handle(), str));
CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), N, &alpha, X, 1));
CUBLAS_CHECK(cublasSetStream(Caffe::cublas_handle(), initial_stream));
// CUBLAS_CHECK(cublasGetStream(Caffe::cublas_handle(), &initial_stream));
// CUBLAS_CHECK(cublasSetStream(Caffe::cublas_handle(), str));
// CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), N, &alpha, X, 1));
// CUBLAS_CHECK(cublasSetStream(Caffe::cublas_handle(), initial_stream));
}

template <>
Expand All @@ -127,37 +127,37 @@ void caffe_gpu_axpby<double>(const int N, const double alpha, const double* X,
template <>
void caffe_gpu_dot<float>(const int n, const float* x, const float* y,
float* out) {
CUBLAS_CHECK(cublasSdot(Caffe::cublas_handle(), n, x, 1, y, 1, out));
// CUBLAS_CHECK(cublasSdot(Caffe::cublas_handle(), n, x, 1, y, 1, out));
}

template <>
void caffe_gpu_dot<double>(const int n, const double* x, const double* y,
double * out) {
CUBLAS_CHECK(cublasDdot(Caffe::cublas_handle(), n, x, 1, y, 1, out));
// CUBLAS_CHECK(cublasDdot(Caffe::cublas_handle(), n, x, 1, y, 1, out));
}

template <>
void caffe_gpu_asum<float>(const int n, const float* x, float* y) {
CUBLAS_CHECK(cublasSasum(Caffe::cublas_handle(), n, x, 1, y));
// CUBLAS_CHECK(cublasSasum(Caffe::cublas_handle(), n, x, 1, y));
}

template <>
void caffe_gpu_asum<double>(const int n, const double* x, double* y) {
CUBLAS_CHECK(cublasDasum(Caffe::cublas_handle(), n, x, 1, y));
// CUBLAS_CHECK(cublasDasum(Caffe::cublas_handle(), n, x, 1, y));
}

template <>
void caffe_gpu_scale<float>(const int n, const float alpha, const float *x,
float* y) {
CUBLAS_CHECK(cublasScopy(Caffe::cublas_handle(), n, x, 1, y, 1));
CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), n, &alpha, y, 1));
// CUBLAS_CHECK(cublasScopy(Caffe::cublas_handle(), n, x, 1, y, 1));
// CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), n, &alpha, y, 1));
}

template <>
void caffe_gpu_scale<double>(const int n, const double alpha, const double *x,
double* y) {
CUBLAS_CHECK(cublasDcopy(Caffe::cublas_handle(), n, x, 1, y, 1));
CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), n, &alpha, y, 1));
// CUBLAS_CHECK(cublasDcopy(Caffe::cublas_handle(), n, x, 1, y, 1));
// CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), n, &alpha, y, 1));
}

template <typename Dtype>
Expand Down