Merge pull request #410 from drnikolaev/caffe-0.16

August release: fixes and optimizations
NVIDIA · Sep 3, 2017 · f579901 · f579901
2 parents 5e106ce + e47f3a3
commit f579901
Show file tree

Hide file tree

Showing 44 changed files with 2,838 additions and 878 deletions.
diff --git a/Makefile b/Makefile
@@ -443,7 +443,7 @@ COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
 CXXFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS)
 NVCCFLAGS += -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
 # mex may invoke an older gcc that is too liberal with -Wuninitalized
-MATLAB_CXXFLAGS := $(CXXFLAGS) -Wno-uninitialized
+MATLAB_CXXFLAGS := $(CXXFLAGS) -Wno-uninitialized -std=c++11
 LINKFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS)
 
 USE_PKG_CONFIG ?= 0

diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp
@@ -106,8 +106,8 @@ class Blob {
     diff_tensor_->current_memory(on_gpu);
   }
 
-  size_t cpu_memory_data_use() const;
-  size_t cpu_memory_diff_use() const;
+  size_t cpu_memory_data_use(bool own_only = false) const;
+  size_t cpu_memory_diff_use(bool own_only = false) const;
 
   /**
    * @brief Creates an instance of a Blob with given Dtype.
@@ -470,8 +470,8 @@ class Blob {
   }
 
 #ifndef CPU_ONLY
-  size_t gpu_memory_data_use() const;
-  size_t gpu_memory_diff_use() const;
+  size_t gpu_memory_data_use(bool own_only = false) const;
+  size_t gpu_memory_diff_use(bool own_only = false) const;
 
   void set_gpu_data(void* data) {
     CHECK_NOTNULL(data);
@@ -536,7 +536,6 @@ class Blob {
   shared_ptr<SyncedMemory> shape_data_;
   vector<int> shape_;
   int count_;
-  std::mutex reshape_mutex_;
   Type last_data_type_, last_diff_type_; // in case of move
 
   bool is_current_data_valid() const {

diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp
@@ -544,10 +544,10 @@ class Caffe {
     DISABLE_COPY_MOVE_AND_ASSIGN(Properties);
   };
 
+  static Properties props_;
+
   static Properties& props() {
-    std::lock_guard<std::mutex> lock(props_mutex_);
-    static Properties props;
-    return props;
+    return props_;
   }
 };
 
@@ -609,15 +609,28 @@ class Flag {
   }
 };
 
+class MutexVec {
+  static constexpr size_t TOP_ORDINAL = 128;
+  vector<shared_ptr<mutex>> v_;
+
+ public:
+  MutexVec() : v_(TOP_ORDINAL, make_shared<mutex>()) {}
+  mutex& operator[] (size_t dev) {
+    while (v_.size() <= dev) {
+      v_.resize(v_.size(), make_shared<mutex>());
+    }
+    return *v_[dev];
+  }
+};
 
 template <typename M>
 class ThreadSafeMap {
  public:
-  ThreadSafeMap() {
+  explicit ThreadSafeMap(std::mutex& m) : m_(m) {
     std::lock_guard<std::mutex> lock(m_);
     map_.reset(new M());
   }
-  ~ThreadSafeMap() {}
+  ~ThreadSafeMap() = default;
 
   using iterator = typename M::iterator;
   using const_iterator = typename M::const_iterator;
@@ -634,6 +647,11 @@ class ThreadSafeMap {
     std::lock_guard<std::mutex> lock(m_);
     return map_->insert(entry);
   }
+  template<class... Args>
+  std::pair<iterator, bool> emplace(Args&&... args) {
+    std::lock_guard<std::mutex> lock(m_);
+    return map_->emplace(args...);
+  }
   mapped_type& operator[](const key_type& key) {
     std::lock_guard<std::mutex> lock(m_);
     return (*map_)[key];
@@ -705,11 +723,9 @@ class ThreadSafeMap {
 
  private:
   std::unique_ptr<M> map_;
-  static std::mutex m_;
+  std::mutex& m_;
 };
 
-template <typename M>
-std::mutex ThreadSafeMap<M>::m_;
 
 ///> the biggest number n which is not greater than val and divisible by 2^power
 template<int power, typename T>
@@ -874,6 +890,16 @@ void atomic_minimum(std::atomic<Dtype>& min_val, Dtype const& new_val) noexcept
          !min_val.compare_exchange_weak(prev_val, new_val)) {}
 }
 
+template <typename Dtype>
+float gb_round2(Dtype val) {
+  return std::round(val * 1.e-7) * 0.01F;
+}
+
+template <typename Dtype>
+float f_round2(Dtype val) {
+  return std::round(val * 100.F) * 0.01F;
+}
+
 }  // namespace caffe
 
 #endif  // CAFFE_COMMON_HPP_
diff --git a/include/caffe/data_transformer.hpp b/include/caffe/data_transformer.hpp
@@ -82,17 +82,17 @@ class DataTransformer {
    *    The destination Datum that will store transformed data of a fixed
    *    shape. Suitable for other transformations.
    */
-  shared_ptr<Datum> VariableSizedTransforms(shared_ptr<Datum> old_datum);
+  void VariableSizedTransforms(Datum* datum);
 
   bool        var_sized_image_random_resize_enabled() const;
   vector<int> var_sized_image_random_resize_shape(const vector<int>& prev_shape) const;
-  cv::Mat&    var_sized_image_random_resize(cv::Mat& img);
+  void        var_sized_image_random_resize(cv::Mat& img);
   bool        var_sized_image_random_crop_enabled() const;
   vector<int> var_sized_image_random_crop_shape(const vector<int>& prev_shape) const;
-  cv::Mat&    var_sized_image_random_crop(const cv::Mat& img);
+  void        var_sized_image_random_crop(cv::Mat& img);
   bool        var_sized_image_center_crop_enabled() const;
   vector<int> var_sized_image_center_crop_shape(const vector<int>& prev_shape) const;
-  cv::Mat&    var_sized_image_center_crop(const cv::Mat& img);
+  void        var_sized_image_center_crop(cv::Mat& img);
 #endif
 
   /**
@@ -240,12 +240,6 @@ class DataTransformer {
 #ifndef CPU_ONLY
   GPUMemory::Workspace mean_values_gpu_;
 #endif
-#ifdef USE_OPENCV
-  cv::Mat varsz_orig_img_;
-  cv::Mat varsz_rand_resize_img_;
-  cv::Mat varsz_rand_crop_img_;
-  cv::Mat varsz_center_crop_img_;
-#endif
 };
 
 }  // namespace caffe

diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp
@@ -139,7 +139,6 @@ class LayerBase {
   // Iteration counter maintained by Solver
   int iter() const;
   int relative_iter() const;
-  int iterations_sized() const;
 
   void set_solver_rank(size_t solver_rank) {
     solver_rank_ = solver_rank;

diff --git a/include/caffe/layers/base_data_layer.hpp b/include/caffe/layers/base_data_layer.hpp
@@ -127,6 +127,9 @@ class BasePrefetchingDataLayer : public BaseDataLayer<Ftype, Btype>, public Inte
   virtual size_t queue_id(size_t thread_id) const {
     return thread_id;
   }
+  virtual bool auto_mode() const {
+    return auto_mode_;
+  }
 
   size_t batch_id(int thread_id) {
     size_t id = batch_ids_[thread_id];