Skip to content

Commit

Permalink
Merge branch 'fix-GAR-BufferChannel-junzhang' into 'main'
Browse files Browse the repository at this point in the history
Fix Grouped AllReduce problem of DP tables

See merge request dl/hugectr/hugectr!1455
  • Loading branch information
minseokl committed Aug 28, 2023
2 parents 3b6f182 + 7a140d7 commit 0c8c125
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 52 deletions.
9 changes: 2 additions & 7 deletions HugeCTR/embedding/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -575,13 +575,8 @@ AllreduceWgradInitializer &AllreduceWgradInitializer::init_data(
core23::TensorParams wgrads_params = core23::TensorParams().device(device);
int alignment_num = 32;
if (grouped) {
if (wgrad->attr.type.type() == core23::ScalarType::Float) {
wgrads_params.alignment(alignment_num).buffer_channel(buffer_channel);
} else if (wgrad->attr.type.type() == core23::ScalarType::Half) {
wgrads_params.alignment(alignment_num).buffer_channel(buffer_channel);
} else {
HCTR_OWN_THROW(HugeCTR::Error_t::WrongInput, "Embedding wgrad type set wrong can't support!");
}
// out-of-place modifications
wgrads_params = wgrads_params.alignment(alignment_num).buffer_channel(buffer_channel);
}
wgrad->data = core23::Tensor(wgrads_params.shape({max_buffer_size}).data_type(wgrad->attr.type));

Expand Down
5 changes: 3 additions & 2 deletions HugeCTR/src/network_buffer_channels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,13 @@ namespace HugeCTR {

namespace {

std::unordered_map<NetworkBufferChannelType, std::string> g_type_to_name = {
static std::unordered_map<NetworkBufferChannelType, std::string> g_type_to_name = {
{NetworkBufferChannelType::Blobs, "BLOBS"}, {NetworkBufferChannelType::Weight, "WEIGHT"},
{NetworkBufferChannelType::WeightHalf, "WH"}, {NetworkBufferChannelType::Wgrad, "WG"},
{NetworkBufferChannelType::WgradHalf, "WGH"}, {NetworkBufferChannelType::OptState, "OPT"},
};
}

} // namespace
std::string SetNetworkBufferChannel(NetworkBufferChannelType type, const std::string& new_name) {
if (g_type_to_name.find(type) == g_type_to_name.end()) {
HCTR_OWN_THROW(Error_t::WrongInput, "There is no such BufferChannel type");
Expand Down
27 changes: 17 additions & 10 deletions HugeCTR/src/pybind/add_dense_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1042,21 +1042,28 @@ void Model::add_dense_layers(std::vector<DenseLayer>& dense_layers) {
}
}
};
// use default buffer channel for eval
add_dense_layers_op(false);
std::unordered_map<NetworkBufferChannelType, std::string> new_channel = {
{NetworkBufferChannelType::Blobs, "TRAIN_BLOBS"},
{NetworkBufferChannelType::WeightHalf, "TRAIN_WEIGHT_HALF"},
{NetworkBufferChannelType::Weight, "TRAIN_WEIGHT"},
{NetworkBufferChannelType::Wgrad, "TRAIN_WGRAD"},
{NetworkBufferChannelType::WgradHalf, "TRAIN_WGRAD_HALF"},
std::unordered_map<NetworkBufferChannelType, std::string> original_channel;
const std::unordered_map<NetworkBufferChannelType, std::string> new_channel = {
{NetworkBufferChannelType::Blobs, "EVAL_BLOBS"},
{NetworkBufferChannelType::WeightHalf, "EVAL_WEIGHT_HALF"},
{NetworkBufferChannelType::Weight, "EVAL_WEIGHT"},
{NetworkBufferChannelType::Wgrad, "EVAL_WGRAD"},
{NetworkBufferChannelType::WgradHalf, "EVAL_WGRAD_HALF"},
};

// set bufferchannel for train layer
// freeze the train BufferChannel because the ExchangeWgrad needs it
//! Embeddings and Train layers should use default channels;
//! set new buffer channel for eval layers
for (auto it = new_channel.begin(); it != new_channel.end(); it++) {
auto original = SetNetworkBufferChannel(it->first, it->second);
original_channel.emplace(std::make_pair(it->first, original));
}
add_dense_layers_op(false);

//! Restore the channel
for (auto it = original_channel.begin(); it != original_channel.end(); it++) {
SetNetworkBufferChannel(it->first, it->second);
}

add_dense_layers_op(true);
}

Expand Down
66 changes: 34 additions & 32 deletions test/utest/core23_layer_test/multi_cross_layer_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,6 @@
#include <vector>

using namespace HugeCTR;
namespace {
__half operator*(const __half& lhs, const __half& rhs) {
return __float2half(__half2float(lhs) * __half2float(rhs));
}
__half operator+(const __half& lhs, const __half& rhs) {
return __float2half(__half2float(lhs) + __half2float(rhs));
}
__half operator*(const __half& lhs, const float& rhs) {
return __float2half(__half2float(lhs) * (rhs));
}
__half operator*(const float& lhs, const __half& rhs) { return rhs * lhs; }
__half operator+(const __half& lhs, const float& rhs) {
return __float2half(__half2float(lhs) + (rhs));
}
__half operator+(const float& lhs, const __half& rhs) { return rhs + lhs; }
} // namespace

template <typename T>
class MultiCrossLayerTest {
Expand Down Expand Up @@ -170,7 +154,9 @@ class MultiCrossLayerTest {
out[j] = 0.0f;
for (size_t i = 0; i < w; i++) {
size_t k = j * w + i;
out[j] = out[j] + T(in_m[k] * in_v[i]);
out[j] = TypeConvert<T, float>::convert(TypeConvert<float, T>::convert(out[j]) +
TypeConvert<float, T>::convert(in_m[k]) *
TypeConvert<float, T>::convert(in_v[i]));
}
}
}
Expand All @@ -194,9 +180,11 @@ class MultiCrossLayerTest {
float acc = 0.f;
for (size_t k = 0; k < rowB; k++) {
// column of A is rowA
acc = acc + (A[r * rowB + k] * B[c * rowB + k]);
acc = acc + TypeConvert<float, T>::convert(A[r * rowB + k]) *
TypeConvert<float, T>::convert(B[c * rowB + k]);
}
C[r * colB + c] = ((C[r * colB + c]) * beta + (acc));
C[r * colB + c] = TypeConvert<T, float>::convert(
TypeConvert<float, T>::convert(C[r * colB + c]) * beta + acc);
}
}
} else if (transA) {
Expand All @@ -206,9 +194,11 @@ class MultiCrossLayerTest {
float acc = 0.f;
for (size_t k = 0; k < rowB; k++) {
// column of A is rowA
acc = acc + (A[k * rowA + r] * B[k * colB + c]);
acc = acc + TypeConvert<float, T>::convert(A[k * rowA + r]) *
TypeConvert<float, T>::convert(B[k * colB + c]);
}
C[r * colB + c] = ((C[r * colB + c]) * beta + (acc));
C[r * colB + c] = TypeConvert<T, float>::convert(
TypeConvert<float, T>::convert(C[r * colB + c]) * beta + (acc));
}
}
} else {
Expand All @@ -217,9 +207,11 @@ class MultiCrossLayerTest {
float acc = 0.f;
for (size_t k = 0; k < rowB; k++) {
// column of A is rowB
acc = acc + (A[r * rowB + k] * B[k * colB + c]);
acc = acc + TypeConvert<float, T>::convert(A[r * rowB + k]) *
TypeConvert<float, T>::convert(B[k * colB + c]);
}
C[r * colB + c] = ((C[r * colB + c]) * beta + (acc));
C[r * colB + c] = TypeConvert<T, float>::convert(
TypeConvert<float, T>::convert(C[r * colB + c]) * beta + (acc));
}
}
}
Expand All @@ -234,7 +226,8 @@ class MultiCrossLayerTest {
void matrix_matrix_elementwise_dot(T* C, const T* A, const T* B, size_t w, size_t batchsize) {
for (size_t r = 0; r < batchsize; r++) {
for (size_t c = 0; c < w; c++) {
C[r * w + c] = A[r * w + c] * B[r * w + c];
C[r * w + c] = TypeConvert<T, float>::convert(TypeConvert<float, T>::convert(A[r * w + c]) *
TypeConvert<float, T>::convert(B[r * w + c]));
}
}

Expand All @@ -248,7 +241,8 @@ class MultiCrossLayerTest {
std::ofstream& ofs) {
for (size_t r = 0; r < batchsize; r++) {
for (size_t c = 0; c < w; c++) {
C[r * w + c] = A[r * w + c] * B[r * w + c];
C[r * w + c] = TypeConvert<T, float>::convert(TypeConvert<float, T>::convert(A[r * w + c]) *
TypeConvert<float, T>::convert(B[r * w + c]));
ofs << "C(" << r << "," << c << ") is " << A[r * w + c] << " * " << B[r * w + c] << "="
<< C[r * w + c] << std::endl;
}
Expand All @@ -259,7 +253,8 @@ class MultiCrossLayerTest {
for (size_t j = 0; j < h; j++) {
for (size_t i = 0; i < w; i++) {
size_t k = j * w + i;
out[k] = in_m[k] * in_v[j];
out[k] = TypeConvert<T, float>::convert(TypeConvert<float, T>::convert(in_m[k]) *
TypeConvert<float, T>::convert(in_v[j]));
}
}
}
Expand All @@ -268,7 +263,8 @@ class MultiCrossLayerTest {
for (size_t j = 0; j < h; j++) {
for (size_t i = 0; i < w; i++) {
size_t k = j * w + i;
out[k] = in_m_1[k] + in_m_2[k];
out[k] = TypeConvert<T, float>::convert(TypeConvert<float, T>::convert(in_m_1[k]) +
TypeConvert<float, T>::convert(in_m_2[k]));
}
}
}
Expand All @@ -277,7 +273,8 @@ class MultiCrossLayerTest {
for (size_t j = 0; j < h; j++) {
for (size_t i = 0; i < w; i++) {
size_t k = j * w + i;
out[k] = in_m[k] + in_v[i];
out[k] = TypeConvert<T, float>::convert(TypeConvert<float, T>::convert(in_m[k]) +
TypeConvert<float, T>::convert(in_v[i]));
}
}
}
Expand All @@ -287,7 +284,9 @@ class MultiCrossLayerTest {
out[j] = 0.0f;
for (size_t i = 0; i < w; i++) {
size_t k = j * w + i;
out[j] = out[j] + T(in_m_1[k] * in_m_2[k]);
out[j] = TypeConvert<T, float>::convert(TypeConvert<float, T>::convert(out[j]) +
(TypeConvert<float, T>::convert(in_m_1[k]) *
TypeConvert<float, T>::convert(in_m_2[k])));
}
}
}
Expand All @@ -297,7 +296,9 @@ class MultiCrossLayerTest {
out[i] = 0.0f;
for (size_t j = 0; j < h; j++) {
size_t k = j * w + i;
out[i] = out[i] + T(in_m[k] * in_v[j]);
out[i] = TypeConvert<T, float>::convert(
TypeConvert<float, T>::convert(out[i]) +
(TypeConvert<float, T>::convert(in_m[k]) * TypeConvert<float, T>::convert(in_v[j])));
}
}
}
Expand All @@ -307,7 +308,7 @@ class MultiCrossLayerTest {
out[i] = 0.0f;
for (size_t j = 0; j < h; j++) {
size_t k = j * w + i;
out[i] = out[i] + in_m[k];
out[i] = (TypeConvert<float, T>::convert(out[i]) + TypeConvert<float, T>::convert(in_m[k]));
}
}
}
Expand All @@ -316,7 +317,8 @@ class MultiCrossLayerTest {
for (size_t j = 0; j < h; j++) {
for (size_t i = 0; i < w; i++) {
size_t k = j * w + i;
out[k] = in_v_1[j] * in_v_2[i];
out[k] =
(TypeConvert<float, T>::convert(in_v_1[j]) * TypeConvert<float, T>::convert(in_v_2[i]));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ void elementwise_multiply_dgrad_cpu(const T *top_grad, T **dgrad, const T *fprop

for (size_t i = 0; i < size; i++) {
for (size_t j = 0; j < num; j++) {
if (0 == fprop_output[i]) {
if (zero == fprop_output[i]) {
dgrad[j][i] = zero;
} else {
T d_input = dgrad[j][i];
Expand Down

0 comments on commit 0c8c125

Please sign in to comment.