Skip to content

Commit

Permalink
switch to hash maps for sparse vectors
Browse files Browse the repository at this point in the history
  • Loading branch information
Chris Dyer committed Jun 19, 2012
1 parent 6f2c793 commit 5cd58c1
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 17 deletions.
3 changes: 2 additions & 1 deletion utils/dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@

class Dict {
typedef
HASH_MAP<std::string, WordID, boost::hash<std::string> > Map;
//HASH_MAP<std::string, WordID, boost::hash<std::string> > Map;
HASH_MAP<std::string, WordID> Map;
public:
Dict() : b0_("<bad0>") {
HASH_MAP_EMPTY(d_,"<bad1>");
Expand Down
30 changes: 16 additions & 14 deletions utils/fast_sparse_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ class FastSparseVector {
}
const bool local_;
PairIntT<T>* local_it_;
typename std::map<unsigned, T>::iterator remote_it_;
typename SPARSE_HASH_MAP<unsigned, T>::iterator remote_it_;
std::pair<const unsigned, T>& operator*() const {
if (local_)
return *reinterpret_cast<std::pair<const unsigned, T>*>(local_it_);
Expand Down Expand Up @@ -142,7 +142,7 @@ class FastSparseVector {
}
const bool local_;
const PairIntT<T>* local_it_;
typename std::map<unsigned, T>::const_iterator remote_it_;
typename SPARSE_HASH_MAP<unsigned, T>::const_iterator remote_it_;
const std::pair<const unsigned, T>& operator*() const {
if (local_)
return *reinterpret_cast<const std::pair<const unsigned, T>*>(local_it_);
Expand Down Expand Up @@ -181,7 +181,7 @@ class FastSparseVector {
}
FastSparseVector(const FastSparseVector& other) {
std::memcpy(this, &other, sizeof(FastSparseVector));
if (is_remote_) data_.rbmap = new std::map<unsigned, T>(*data_.rbmap);
if (is_remote_) data_.rbmap = new SPARSE_HASH_MAP<unsigned, T>(*data_.rbmap);
}
FastSparseVector(std::pair<unsigned, T>* first, std::pair<unsigned, T>* last) {
const ptrdiff_t n = last - first;
Expand All @@ -191,7 +191,7 @@ class FastSparseVector {
std::memcpy(data_.local, first, sizeof(std::pair<unsigned, T>) * n);
} else {
is_remote_ = true;
data_.rbmap = new std::map<unsigned, T>(first, last);
data_.rbmap = new SPARSE_HASH_MAP<unsigned, T>(first, last);
}
}
void erase(int k) {
Expand All @@ -213,7 +213,7 @@ class FastSparseVector {
clear();
std::memcpy(this, &other, sizeof(FastSparseVector));
if (is_remote_)
data_.rbmap = new std::map<unsigned, T>(*data_.rbmap);
data_.rbmap = new SPARSE_HASH_MAP<unsigned, T>(*data_.rbmap);
return *this;
}
T const& get_singleton() const {
Expand All @@ -237,7 +237,7 @@ class FastSparseVector {
}
inline T value(unsigned k) const {
if (is_remote_) {
typename std::map<unsigned, T>::const_iterator it = data_.rbmap->find(k);
typename SPARSE_HASH_MAP<unsigned, T>::const_iterator it = data_.rbmap->find(k);
if (it != data_.rbmap->end()) return it->second;
} else {
for (unsigned i = 0; i < local_size_; ++i) {
Expand Down Expand Up @@ -322,8 +322,8 @@ class FastSparseVector {
}
inline FastSparseVector& operator*=(const T& scalar) {
if (is_remote_) {
const typename std::map<unsigned, T>::iterator end = data_.rbmap->end();
for (typename std::map<unsigned, T>::iterator it = data_.rbmap->begin(); it != end; ++it)
const typename SPARSE_HASH_MAP<unsigned, T>::iterator end = data_.rbmap->end();
for (typename SPARSE_HASH_MAP<unsigned, T>::iterator it = data_.rbmap->begin(); it != end; ++it)
it->second *= scalar;
} else {
for (int i = 0; i < local_size_; ++i)
Expand All @@ -333,8 +333,8 @@ class FastSparseVector {
}
inline FastSparseVector& operator/=(const T& scalar) {
if (is_remote_) {
const typename std::map<unsigned, T>::iterator end = data_.rbmap->end();
for (typename std::map<unsigned, T>::iterator it = data_.rbmap->begin(); it != end; ++it)
const typename SPARSE_HASH_MAP<unsigned, T>::iterator end = data_.rbmap->end();
for (typename SPARSE_HASH_MAP<unsigned, T>::iterator it = data_.rbmap->begin(); it != end; ++it)
it->second /= scalar;
} else {
for (int i = 0; i < local_size_; ++i)
Expand Down Expand Up @@ -431,25 +431,27 @@ class FastSparseVector {
void swap_local_rbmap() {
if (is_remote_) { // data is in rbmap, move to local
assert(data_.rbmap->size() < LOCAL_MAX);
const std::map<unsigned, T>* m = data_.rbmap;
const SPARSE_HASH_MAP<unsigned, T>* m = data_.rbmap;
local_size_ = m->size();
int i = 0;
for (typename std::map<unsigned, T>::const_iterator it = m->begin();
for (typename SPARSE_HASH_MAP<unsigned, T>::const_iterator it = m->begin();
it != m->end(); ++it) {
data_.local[i] = *it;
++i;
}
is_remote_ = false;
} else { // data is local, move to rbmap
std::map<unsigned, T>* m = new std::map<unsigned, T>(&data_.local[0], &data_.local[local_size_]);
SPARSE_HASH_MAP<unsigned, T>* m = new SPARSE_HASH_MAP<unsigned, T>(
reinterpret_cast<std::pair<unsigned, T>*>(&data_.local[0]),
reinterpret_cast<std::pair<unsigned, T>*>(&data_.local[local_size_]), local_size_ * 1.5 + 1);
data_.rbmap = m;
is_remote_ = true;
}
}

union {
PairIntT<T> local[LOCAL_MAX];
std::map<unsigned, T>* rbmap;
SPARSE_HASH_MAP<unsigned, T>* rbmap;
} data_;
unsigned char local_size_;
bool is_remote_;
Expand Down
7 changes: 5 additions & 2 deletions utils/hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,18 @@
#endif

#ifdef HAVE_SPARSEHASH
# include <google/dense_hash_map>
# include <google/dense_hash_set>
# include <sparsehash/dense_hash_map>
# include <sparsehash/dense_hash_set>
# include <sparsehash/sparse_hash_map>
# define SPARSE_HASH_MAP google::sparse_hash_map
# define HASH_MAP google::dense_hash_map
# define HASH_SET google::dense_hash_set
# define HASH_MAP_RESERVED(h,empty,deleted) do { h.set_empty_key(empty); h.set_deleted_key(deleted); } while(0)
# define HASH_MAP_EMPTY(h,empty) do { h.set_empty_key(empty); } while(0)
#else
# include <tr1/unordered_map>
# include <tr1/unordered_set>
# define SPARSE_HASH_MAP std::tr1::unordered_map
# define HASH_MAP std::tr1::unordered_map
# define HASH_SET std::tr1::unordered_set
# define HASH_MAP_RESERVED(h,empty,deleted)
Expand Down

0 comments on commit 5cd58c1

Please sign in to comment.