diff --git a/.gitignore b/.gitignore index 66b8a9b4a..662a52dc9 100644 --- a/.gitignore +++ b/.gitignore @@ -455,3 +455,6 @@ dask-worker-space/ *.pub *.rdp *_rsa + +# Others +src/main.*.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index e47718ec4..40e433dd1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -271,7 +271,7 @@ if(${MM_MALLOC}) endif() if(UNIX OR MINGW OR CYGWIN) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 -lstdc++fs -pthread -Wextra -Wall -Wno-ignored-attributes -Wno-unknown-pragmas -Wno-return-type ") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pthread -Wextra -Wall -Wno-ignored-attributes -Wno-unknown-pragmas -Wno-return-type ") if(USE_DEBUG) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0") else() @@ -338,21 +338,13 @@ file(GLOB SOURCES src/objective/*.cpp src/network/*.cpp src/treelearner/*.cpp - src/application/*.cpp if(USE_CUDA) src/treelearner/*.cu endif(USE_CUDA) ) -# To ease out the debugging -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0") -set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0") - -## NOTE: CUSTOM FLAGS -#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lstdc++fs") #NOTE: may only be needed for running on the hdp - -#add_executable(lightgbm src/main.cpp ${SOURCES}) # NOTE: this was the original command! -add_executable(lightgbm src/main.multiple_runs.cpp ${SOURCES}) # NOTE: changed it to this one! +add_executable(lightgbm src/main.cpp src/application/application.cpp ${SOURCES}) +#add_executable(fairgbm_multiple_runs src/main.multiple_runs.cpp src/application/application.cpp ${SOURCES}) list(APPEND SOURCES "src/c_api.cpp") # Only build the R part of the library if building for @@ -365,7 +357,6 @@ if (BUILD_STATIC_LIB) add_library(_lightgbm STATIC ${SOURCES}) else() add_library(_lightgbm SHARED ${SOURCES}) - # add_library(_lightgbm SHARED ${SOURCES} include/LightGBM/utils/constrained.hpp) endif(BUILD_STATIC_LIB) if(MSVC) diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h index 98457a4bf..8a2814cb9 100644 --- a/include/LightGBM/config.h +++ b/include/LightGBM/config.h @@ -194,7 +194,7 @@ struct Config { // check = >0.0 // desc = learning rate for the constrained boosting // desc = it only takes effect when using a constrained objective function - double lagrangian_learning_rate = 0.1; + double multiplier_learning_rate = 0.1; // type = multi-double // default = None @@ -204,7 +204,7 @@ struct Config { // desc = if not specified, will use 0 weight penalty for all constraints, // desc = which is equivalent to using unconstrained version in the // desc = first iteration. - std::vector init_lagrangians; + std::vector init_lagrange_multipliers; // default = 31 // alias = num_leaf, max_leaves, max_leaf @@ -823,6 +823,7 @@ struct Config { #pragma endregion + // type = string // desc = used only in ``training`` task // desc = output dir of gradients and hessians per iteration // desc = **Note**: can be used only in CLI version @@ -924,7 +925,7 @@ struct Config { // type = string // desc = type of proxy function to use in constraints (hinge, quadratic, cross_entropy) - std::string constraint_stepwise_proxy = "quadratic"; + std::string constraint_stepwise_proxy = "cross_entropy"; // type = string // desc = type of proxy function to use as the proxy objective @@ -966,15 +967,17 @@ struct Config { std::string global_constraint_type; // check = >=0 - // check = <1.0 + // check = <=1.0 // type = double + // default = 1.0 // desc = used only in ``constrained_cross_entropy`` application // desc = target rate for the global FPR constraint double global_target_fpr = 1.; // check = >=0 - // check = <1.0 + // check = <=1.0 // type = double + // default = 1.0 // desc = used only in ``constrained_cross_entropy`` application // desc = target rate for the global FNR constraint double global_target_fnr = 1.; diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h index ca4183b81..db0b3fb8d 100644 --- a/include/LightGBM/dataset.h +++ b/include/LightGBM/dataset.h @@ -37,7 +37,7 @@ class DatasetLoader; * 4. Query Weights, auto calculate by weights and query_boundaries(if both of them are existed) * the weight for i-th query is sum(query_boundaries[i] , .., query_boundaries[i+1]) / (query_boundaries[i + 1] - query_boundaries[i+1]) * 5. Initial score. optional. if existing, the model will boost from this score, otherwise will start from 0. -* 6. [FairGBM-only] Group, used for training during constrain optimization. +* 6. [FairGBM-only] Group, used for training during constrained optimization. */ class Metadata { public: @@ -208,8 +208,8 @@ class Metadata { * \param idx Index of this record * \param value Group constraint value of this record */ - inline void SetGroupConstraintAt(data_size_t idx, data_size_t value) { - group_[idx] = static_cast(value); + inline void SetGroupConstraintAt(data_size_t idx, group_t value) { + group_[idx] = value; } /*! @@ -220,7 +220,7 @@ class Metadata { /*! \brief Get unique groups in data */ inline std::vector group_values() const { - std::vector values = group_; // copy + std::vector values(group_); std::sort(values.begin(), values.end()); auto last = std::unique(values.begin(), values.end()); diff --git a/include/LightGBM/objective_function.h b/include/LightGBM/objective_function.h index 431f59253..17f8002ed 100644 --- a/include/LightGBM/objective_function.h +++ b/include/LightGBM/objective_function.h @@ -145,7 +145,6 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction label_ = metadata.label(); weights_ = metadata.weights(); - // ----------------------------------------------------- START FAIRBGM // Store Information about the group group_ = metadata.group(); group_values_ = metadata.group_values(); @@ -154,7 +153,6 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction total_label_positives_ = 0; total_label_negatives_ = 0; ComputeLabelCounts(); - // -----------------------------------------------------END FAIRGM Update CHECK_NOTNULL(label_); Common::CheckElementsIntervalClosed(label_, 0.0f, 1.0f, num_data_, GetName()); @@ -197,7 +195,7 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction virtual std::vector GetLagrangianGradientsWRTMultipliers(const double *score) const { if (weights_ != nullptr) - throw std::runtime_error("not implemented yet"); + throw std::logic_error("not implemented yet"); // TODO: https://github.com/feedzai/fairgbm/issues/5 std::vector functions; std::unordered_map group_fpr, group_fnr; @@ -211,7 +209,8 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction // 1st Lagrange multiplier corresponds to predictive loss function double sum_loss = 0.0; - // #pragma omp parallel for schedule(static) reduction(+:sum_loss) + +// #pragma omp parallel for schedule(static) reduction(+:sum_loss) for (data_size_t i = 0; i < num_data_; ++i) { sum_loss += this->ComputePredictiveLoss(label_[i], score[i]); @@ -223,7 +222,7 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction if (IsFPRConstrained()) { ComputeFPR(score, score_threshold_, group_fpr); - double max_fpr = findMaxValuePair(group_fpr).second; + double max_fpr = Constrained::findMaxValuePair(group_fpr).second; // Assuming group_values_ is in ascending order for (const auto &group : group_values_) @@ -231,8 +230,10 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction double fpr_constraint_value = max_fpr - group_fpr[group] - fpr_threshold_; functions.push_back(fpr_constraint_value); -#ifdef FAIRGBM_DEBUG - std::cout << "DEBUG; true FPR constraint value: c=" << max_fpr << "-" << group_fpr[group] << "=" << fpr_constraint_value << std::endl; +#ifdef DEBUG + Log::Debug( + "DEBUG; true FPR constraint value: c = %.3f - %.3f = %.3f\n", + max_fpr, group_fpr[group], fpr_constraint_value); #endif } } @@ -241,7 +242,7 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction if (IsFNRConstrained()) { ComputeFNR(score, score_threshold_, group_fnr); - double max_fnr = findMaxValuePair(group_fnr).second; + double max_fnr = Constrained::findMaxValuePair(group_fnr).second; // Assuming group_values_ is in ascending order for (const auto &group : group_values_) @@ -249,8 +250,10 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction double fnr_constraint_value = max_fnr - group_fnr[group] - fnr_threshold_; functions.push_back(fnr_constraint_value); -#ifdef FAIRGBM_DEBUG - std::cout << "DEBUG; true FNR constraint value: c=" << max_fnr << "-" << group_fnr[group] << "=" << fnr_constraint_value << std::endl; +#ifdef DEBUG + Log::Debug( + "DEBUG; true FNR constraint value: c = %.3f - %.3f = %.3f\n", + max_fnr, group_fnr[group], fnr_constraint_value); #endif } } @@ -263,8 +266,10 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction functions.push_back(global_fpr_constraint_value); -#ifdef FAIRGBM_DEBUG - std::cout << "DEBUG; true global FPR constraint value: c=" << global_fpr << "-" << global_target_fpr_ << "=" << global_fpr_constraint_value << std::endl; +#ifdef DEBUG + Log::Debug( + "DEBUG; true global FPR constraint value: c = %.3f - %.3f = %.3f\n", + global_fpr, global_target_fpr_, global_fpr_constraint_value); #endif } @@ -276,8 +281,10 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction functions.push_back(global_fnr_constraint_value); -#ifdef FAIRGBM_DEBUG - std::cout << "DEBUG; true global FNR constraint value: c=" << global_fnr << "-" << global_target_fnr_ << "=" << global_fnr_constraint_value << std::endl; +#ifdef DEBUG + Log::Debug( + "DEBUG; true global FNR constraint value: c = %.3f - %.3f = %.3f\n", + global_fnr, global_target_fnr_, global_fnr_constraint_value); #endif } @@ -329,7 +336,7 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction else throw std::invalid_argument("constraint_stepwise_proxy=" + constraint_stepwise_proxy + " not implemented!"); - max_proxy_fpr = findMaxValuePair(group_fpr); + max_proxy_fpr = Constrained::findMaxValuePair(group_fpr); } if (IsFNRConstrained()) { @@ -342,7 +349,7 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction else throw std::invalid_argument("constraint_stepwise_proxy=" + constraint_stepwise_proxy + " not implemented!"); - max_proxy_fnr = findMaxValuePair(group_fnr); + max_proxy_fnr = Constrained::findMaxValuePair(group_fnr); } /** ---------------------------------------------------------------- * @@ -350,11 +357,11 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction * ---------------------------------------------------------------- */ if (weights_ != nullptr) { - throw std::runtime_error("not implemented"); + throw std::logic_error("not implemented yet"); // TODO: https://github.com/feedzai/fairgbm/issues/5 } // compute pointwise gradients and hessians with implied unit weights - // #pragma omp parallel for schedule(static) // FIXME: there seems to be weird behavior with this directive +// #pragma omp parallel for schedule(static) // TODO: https://github.com/feedzai/fairgbm/issues/6 for (data_size_t i = 0; i < num_data_; ++i) { const auto group = group_[i]; @@ -404,13 +411,15 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction fpr_constraints_gradient_wrt_pred = score[i] <= -proxy_margin_ ? 0. : 1. / group_ln; // Derivative for BCE-based proxy FPR - else if (constraint_stepwise_proxy == "cross_entropy") - fpr_constraints_gradient_wrt_pred = (sigmoid(score[i] + xent_horizontal_shift)) / group_ln; - // fpr_constraints_gradient_wrt_pred = (sigmoid(score[i]) - label_[i]) / group_ln; // without margin + else if (constraint_stepwise_proxy == "cross_entropy") { + fpr_constraints_gradient_wrt_pred = (Constrained::sigmoid(score[i] + xent_horizontal_shift)) / group_ln; +// fpr_constraints_gradient_wrt_pred = (Constrained::sigmoid(score[i]) - label_[i]) / group_ln; // without margin + } // Loss-function implicitly defined as having a hinge-based derivative (quadratic loss) - else if (constraint_stepwise_proxy == "quadratic") + else if (constraint_stepwise_proxy == "quadratic") { fpr_constraints_gradient_wrt_pred = std::max(0., score[i] + proxy_margin_) / group_ln; + } else throw std::invalid_argument("constraint_stepwise_proxy=" + constraint_stepwise_proxy + " not implemented!"); @@ -464,13 +473,15 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction fnr_constraints_gradient_wrt_pred = score[i] >= proxy_margin_ ? 0. : -1. / group_lp; // Derivative for BCE-based proxy FNR - else if (constraint_stepwise_proxy == "cross_entropy") - fnr_constraints_gradient_wrt_pred = (sigmoid(score[i] - xent_horizontal_shift) - 1) / group_lp; - // fnr_constraints_gradient_wrt_pred = (sigmoid(score[i]) - label_[i]) / group_lp; // without margin + else if (constraint_stepwise_proxy == "cross_entropy") { + fnr_constraints_gradient_wrt_pred = (Constrained::sigmoid(score[i] - xent_horizontal_shift) - 1) / group_lp; +// fnr_constraints_gradient_wrt_pred = (Constrained::sigmoid(score[i]) - label_[i]) / group_lp; // without margin + } // Loss-function implicitly defined as having a hinge-based derivative (quadratic loss) - else if (constraint_stepwise_proxy == "quadratic") + else if (constraint_stepwise_proxy == "quadratic") { fnr_constraints_gradient_wrt_pred = std::min(0., score[i] - proxy_margin_) / group_lp; + } else throw std::invalid_argument("constraint_stepwise_proxy=" + constraint_stepwise_proxy + " not implemented!"); @@ -517,17 +528,20 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction { // Condition for non-zero gradient double global_fpr_constraint_gradient_wrt_pred; // Gradient for hinge proxy FPR - if (constraint_stepwise_proxy == "hinge") + if (constraint_stepwise_proxy == "hinge") { global_fpr_constraint_gradient_wrt_pred = score[i] >= -proxy_margin_ ? 1. / total_label_negatives_ : 0.; + } // Gradient for BCE proxy FPR - else if (constraint_stepwise_proxy == "cross_entropy") - global_fpr_constraint_gradient_wrt_pred = (sigmoid(score[i] + xent_horizontal_shift)) / total_label_negatives_; - // global_fpr_constraint_gradient_wrt_pred = (sigmoid(score[i]) - label_[i]) / total_label_negatives_; // without margin + else if (constraint_stepwise_proxy == "cross_entropy") { + global_fpr_constraint_gradient_wrt_pred = (Constrained::sigmoid(score[i] + xent_horizontal_shift)) / total_label_negatives_; +// global_fpr_constraint_gradient_wrt_pred = (Constrained::sigmoid(score[i]) - label_[i]) / total_label_negatives_; // without margin + } // Hinge-based gradient (for quadratic proxy FPR) - else if (constraint_stepwise_proxy == "quadratic") + else if (constraint_stepwise_proxy == "quadratic") { global_fpr_constraint_gradient_wrt_pred = std::max(0., score[i] + proxy_margin_) / total_label_negatives_; + } else throw std::invalid_argument("constraint_stepwise_proxy=" + constraint_stepwise_proxy + " not implemented!"); @@ -548,20 +562,24 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction double global_fnr_constraint_gradient_wrt_pred; // Gradient for hinge proxy FNR - if (constraint_stepwise_proxy == "hinge") + if (constraint_stepwise_proxy == "hinge") { global_fnr_constraint_gradient_wrt_pred = score[i] >= proxy_margin_ ? 0. : -1. / total_label_positives_; + } // Gradient for BCE proxy FNR - else if (constraint_stepwise_proxy == "cross_entropy") - global_fnr_constraint_gradient_wrt_pred = (sigmoid(score[i] - xent_horizontal_shift) - 1) / total_label_positives_; - // global_fnr_constraint_gradient_wrt_pred = (sigmoid(score[i]) - label_[i]) / total_label_positives_; // without margin + else if (constraint_stepwise_proxy == "cross_entropy") { + global_fnr_constraint_gradient_wrt_pred = (Constrained::sigmoid(score[i] - xent_horizontal_shift) - 1) / total_label_positives_; +// global_fnr_constraint_gradient_wrt_pred = (Constrained::sigmoid(score[i]) - label_[i]) / total_label_positives_; // without margin + } // Hinge-based gradient (for quadratic proxy FNR) - else if (constraint_stepwise_proxy == "quadratic") + else if (constraint_stepwise_proxy == "quadratic") { global_fnr_constraint_gradient_wrt_pred = std::min(0., score[i] - proxy_margin_) / total_label_positives_; + } - else + else { throw std::invalid_argument("constraint_stepwise_proxy=" + constraint_stepwise_proxy + " not implemented!"); + } // Update instance gradient and hessian gradients[i] += (score_t)(lagrangian_multipliers[multipliers_base_index] * @@ -695,7 +713,7 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction std::unordered_map false_positives; // map of group index to the respective hinge-proxy FPs std::unordered_map label_negatives; // map of group index to the respective number of LNs - // #pragma omp parallel for schedule(static) // FIXME + // #pragma omp parallel for schedule(static) // TODO: https://github.com/feedzai/fairgbm/issues/6 for (data_size_t i = 0; i < num_data_; ++i) { group_t group = group_[i]; @@ -736,7 +754,7 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction std::unordered_map false_positives; // map of group index to the respective proxy FPs std::unordered_map label_negatives; // map of group index to the respective number of LNs - // #pragma omp parallel for schedule(static) // FIXME + // #pragma omp parallel for schedule(static) // TODO: https://github.com/feedzai/fairgbm/issues/6 for (data_size_t i = 0; i < num_data_; ++i) { group_t group = group_[i]; @@ -779,7 +797,7 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction std::unordered_map label_negatives; // map of group index to the respective number of LNs double xent_horizontal_shift = log(exp(proxy_margin_) - 1); - // #pragma omp parallel for schedule(static) + // #pragma omp parallel for schedule(static) // TODO: https://github.com/feedzai/fairgbm/issues/6 for (data_size_t i = 0; i < num_data_; ++i) { group_t group = group_[i]; @@ -1093,4 +1111,4 @@ class ConstrainedObjectiveFunction : public ObjectiveFunction }; } // namespace LightGBM -#endif // LightGBM_OBJECTIVE_FUNCTION_H_ +#endif // LightGBM_OBJECTIVE_FUNCTION_H_ diff --git a/include/LightGBM/utils/constrained.hpp b/include/LightGBM/utils/constrained.hpp index fcd6a02e2..5da3bac01 100644 --- a/include/LightGBM/utils/constrained.hpp +++ b/include/LightGBM/utils/constrained.hpp @@ -21,67 +21,57 @@ #include -namespace LightGBM { +namespace LightGBM::Constrained { - inline double sigmoid(double x) { - return 1. / (1. + std::exp(-x)); - } - - template - std::pair findMaxValuePair(std::unordered_map const &x) - { - return *std::max_element( - x.begin(), x.end(), - [](const std::pair &p1, const std::pair &p2) { - return p1.second < p2.second; - } - ); - } - - template> - void write_values(const std::string& dir, const std::string& filename, - std::vector values) { - struct stat buf; - - std::string filename_path = dir + "/" + filename; - bool file_exists = (stat(filename_path.c_str(), &buf) != -1); - - std::stringstream tmp_buf; - for (auto e : values) { - tmp_buf << e << ","; - } - - tmp_buf.seekp(-1, tmp_buf.cur); - tmp_buf << std::endl; - - std::ofstream outfile; - outfile.open(filename_path, std::ios::out | (file_exists ? std::ios::app : std::ios::trunc)); - outfile << tmp_buf.str(); - - outfile.close(); - } - - template - void write_values(const std::string& dir, const std::string& filename, const T* arr, int arr_len) { - struct stat buf; +/** + * Standard sigmoid mathematical function. + * @param x the input to the function. + * @return the sigmoid of the input. + */ +inline double sigmoid(double x) { + return 1. / (1. + std::exp(-x)); +} - std::string filename_path = dir + "/" + filename; - bool file_exists = (stat(filename_path.c_str(), &buf) != -1); +/** + * Finds the (key, value) pair with highest value. + * @tparam Key The type of the Map Key. + * @tparam Value The type of the Map Value. + * @param x Reference to the map to search over. + * @return The pair with highest value V. + */ +template +std::pair findMaxValuePair(std::unordered_map const &x) +{ + return *std::max_element( + x.begin(), x.end(), + [](const std::pair &p1, const std::pair &p2) { + return p1.second < p2.second; + } + ); +} - std::stringstream tmp_buf; - for (int i = 0; i < arr_len; i++) { - tmp_buf << arr[i] << ","; - } +/** + * Writes the given values to the end of the given file. + * @tparam T The type of values in the input vector. + * @tparam Allocator The type of allocator in the input vector. + * @param dir The directory of the file to write on. + * @param filename The name of the file to write on. + * @param values A vector of the values to append to the file. + */ +template> +void write_values(const std::string& dir, const std::string& filename, + std::vector values) { + struct stat buf; - tmp_buf.seekp(-1, tmp_buf.cur); - tmp_buf << std::endl; + std::string filename_path = dir + "/" + filename; + bool file_exists = (stat(filename_path.c_str(), &buf) != -1); - std::ofstream outfile; - outfile.open(filename_path, std::ios::out | (file_exists ? std::ios::app : std::ios::trunc)); - outfile << tmp_buf.str(); + std::ofstream outfile; + outfile.open(filename_path, std::ios::out | (file_exists ? std::ios::app : std::ios::trunc)); + outfile << LightGBM::Common::Join(values, ",") << std::endl; - outfile.close(); - } + outfile.close(); +} } diff --git a/src/application/application.cpp b/src/application/application.cpp index 5cc196f91..c0276d8e3 100644 --- a/src/application/application.cpp +++ b/src/application/application.cpp @@ -52,7 +52,7 @@ Application::~Application() { /** * Loads the application config, either as command-line arguments or from a config file. * @param argc Number of command-line arguments. - * @param argv Array of strings containing the command-line arguments. The most common element will be "config=". + * @param argv Array of strings containing the command-line arguments. A common element is "config=". */ void Application::LoadParameters(int argc, char** argv) { std::unordered_map params; diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp index 55e59b070..7a50d4a2d 100644 --- a/src/boosting/gbdt.cpp +++ b/src/boosting/gbdt.cpp @@ -98,20 +98,20 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective // constraint configurations is_constrained_ = objective_function->IsConstrained(); - lagrangian_learning_rate = config_->lagrangian_learning_rate; + lagrangian_learning_rate_ = config_->multiplier_learning_rate; - auto num_constraints = objective_function->NumConstraints(); + int num_constraints = objective_function->NumConstraints(); // If no lagrange multipliers are specified // Set the first multiplier to 1 (corresponds to objective function) and // all others to 0. - if ((config->init_lagrangians).empty()) { + if ((config->init_lagrange_multipliers).empty()) { std::vector default_lag_multipliers(num_constraints+1, 0); default_lag_multipliers[0] = 1; lagrangian_multipliers_.push_back(default_lag_multipliers); } else { - CHECK_EQ(num_constraints+1, config->init_lagrangians.size()); - lagrangian_multipliers_.push_back(config->init_lagrangians); + CHECK_EQ(num_constraints+1, (int) config->init_lagrange_multipliers.size()); + lagrangian_multipliers_.push_back(config->init_lagrange_multipliers); } // -- END FairGBM block -- @@ -226,16 +226,16 @@ void GBDT::Boosting() { // // NOTE: lagrange_multipliers is a vector of vectors - each element represents the multipliers at a given iteration; -#ifdef FAIRGBM_DEBUG +#ifdef DEBUG // Dump lagrangian multipliers - write_values(debugging_output_dir_, "lagrangian_multipliers.dat", lagrangian_multipliers_.back()); + Constrained::write_values(debugging_output_dir_, "lagrangian_multipliers.dat", lagrangian_multipliers_.back()); // Dump the gradients of the Lagrangian (grads of loss + grads of constraints) - write_values>( + Constrained::write_values>( debugging_output_dir_, "gradients.lagrangian.dat", gradients_); // Dump hessians, we don't currently use them though :P - write_values>( + Constrained::write_values>( debugging_output_dir_, "hessians.lagrangian.dat", hessians_); #endif } @@ -327,16 +327,11 @@ void GBDT::Bagging(int iter) { void GBDT::Train(int snapshot_freq, const std::string& model_output_path) { Common::FunctionTimer fun_timer("GBDT::Train", global_timer); - bool is_finished = false; + bool is_finished = false, is_finished_lagrangian = false; auto start_time = std::chrono::steady_clock::now(); // -- START FairGBM block -- - for (int iter = 0; - iter < config_->num_iterations && !is_finished; - ++iter) { - - // if (iter % 250 == 0) - // std::cout << "Iter i=" << iter << std::endl; + for (int iter = 0; iter < config_->num_iterations && !is_finished; ++iter) { // Descent step! is_finished = TrainOneIter(nullptr, nullptr); @@ -573,9 +568,9 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) { return false; } -// TODO -// - implement early stopping criteria (convergence fulfilled); +// TODO: https://github.com/feedzai/fairgbm/issues/7 // - implement normalization / bound on multipliers; +// - implement early stopping criteria (convergence fulfilled); /*! * \brief Gradient ascent step w.r.t. Lagrange multipliers (used only for constrained optimization) * \param gradients nullptr for using default objective, otherwise use self-defined boosting @@ -590,11 +585,11 @@ bool GBDT::TrainLagrangianOneIter(const score_t *gradients, const score_t *hessi auto lag_updates = constrained_objective_function->GetLagrangianGradientsWRTMultipliers( GetTrainingScore(&num_score)); - // Get current lagrange multipliers values - auto current_lag_multipliers = lagrangian_multipliers_.back(); // Multipliers of the latest iteration + // Get Lagrange multipliers of the latest iteration + auto current_lag_multipliers = lagrangian_multipliers_.back(); // Initialize updated lagrangian multipliers w/ previous value - std::vector updated_lag_multipliers(current_lag_multipliers.begin(), current_lag_multipliers.end()); + std::vector updated_lag_multipliers(current_lag_multipliers); // Gradient ascent in Lagrangian multipliers (or constraint space) // NOTE: @@ -603,7 +598,7 @@ bool GBDT::TrainLagrangianOneIter(const score_t *gradients, const score_t *hessi // - we're currently NOT UPDATING this multiplier, hence the loop STARTS AT i=1; // - the gradient for this multiplier will always be the value of the loss; for (uint i = 1; i < lag_updates.size(); i++) { - updated_lag_multipliers[i] += lagrangian_learning_rate * lag_updates[i]; + updated_lag_multipliers[i] += lagrangian_learning_rate_ * lag_updates[i]; // Ensuring multipliers >= 0 -> using *INEQUALITY* constraints! c(theta) <= 0 updated_lag_multipliers[i] = std::max(0.0, updated_lag_multipliers[i]); @@ -615,9 +610,9 @@ bool GBDT::TrainLagrangianOneIter(const score_t *gradients, const score_t *hessi } lagrangian_multipliers_.push_back(updated_lag_multipliers); -#ifdef FAIRGBM_DEBUG +#ifdef DEBUG // Log constraints violation to file - write_values(debugging_output_dir_, "functions_evals.dat", lag_updates); + Constrained::write_values(debugging_output_dir_, "functions_evals.dat", lag_updates); #endif return false; @@ -970,8 +965,7 @@ void GBDT::ResetBaggingConfig(const Config* config, bool is_change_dataset) { if (objective_function_ != nullptr) { num_pos_data = objective_function_->NumPositiveData(); } - bool balance_bagging_cond = - (config->pos_bagging_fraction < 1.0 || config->neg_bagging_fraction < 1.0) && (num_pos_data > 0); + bool balance_bagging_cond = (config->pos_bagging_fraction < 1.0 || config->neg_bagging_fraction < 1.0) && (num_pos_data > 0); if ((config->bagging_fraction < 1.0 || balance_bagging_cond) && config->bagging_freq > 0) { need_re_bagging_ = false; if (!is_change_dataset && diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h index 7392050ba..e8b6472a3 100644 --- a/src/boosting/gbdt.h +++ b/src/boosting/gbdt.h @@ -548,10 +548,10 @@ class GBDT : public GBDTBase { bool is_constrained_; /*! \brief Shrinkage rate for the Ascent step */ - double lagrangian_learning_rate; + double lagrangian_learning_rate_; /*! \brief Lagrangian multiplier(s) per iteration */ - std::vector> lagrangian_multipliers_; + std::vector> lagrangian_multipliers_; // TODO: https://github.com/feedzai/fairgbm/issues/8 /*! \brief Output directory to store debug files (e.g., gradients/hessians) */ std::string debugging_output_dir_; diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp index e49496fc9..698ce681c 100644 --- a/src/io/config_auto.cpp +++ b/src/io/config_auto.cpp @@ -168,6 +168,11 @@ const std::unordered_map& Config::alias_table() { {"nodes", "machines"}, // -- START FairGBM block -- + {"lagrangian_learning_rate_", "multiplier_learning_rate"}, + {"lagrangian_multiplier_learning_rate", "multiplier_learning_rate"}, + {"lagrange_multipliers", "init_lagrange_multipliers"}, + {"init_multipliers", "init_lagrange_multipliers"}, + {"output_dir", "debugging_output_dir"}, {"global_fpr", "global_target_fpr"}, {"target_global_fpr", "global_target_fpr"}, {"global_fnr", "global_target_fnr"}, @@ -326,7 +331,7 @@ const std::unordered_set& Config::parameter_set() { "constraint_fnr_threshold", "score_threshold", "init_lagrange_multipliers", - "lagrangian_learning_rate", + "multiplier_learning_rate", "global_constraint_type", "global_target_fpr", "global_target_fnr", @@ -673,7 +678,7 @@ void Config::GetMembersFromString(const std::unordered_map(tmp_str, ','); - for (auto lag : init_lagrangians) + init_lagrange_multipliers = Common::StringToArray(tmp_str, ','); + for (auto lag : init_lagrange_multipliers) CHECK_GE(lag, 0); } @@ -821,11 +826,8 @@ std::string Config::SaveMembersToString() const { str_buf << "[score_threshold: " << score_threshold << "]\n"; str_buf << "[constraint_fpr_threshold: " << constraint_fpr_threshold << "]\n"; str_buf << "[constraint_fnr_threshold: " << constraint_fnr_threshold << "]\n"; - str_buf << "[lagrangian_learning_rate: " << lagrangian_learning_rate << "]\n"; - str_buf << "[init_lagrange_multipliers: "; - for (auto &lag : init_lagrangians) - str_buf << lag << ","; - str_buf << "]\n"; + str_buf << "[multiplier_learning_rate: " << multiplier_learning_rate << "]\n"; + str_buf << "[init_lagrange_multipliers: " << Common::Join(init_lagrange_multipliers, ",") << "]\n"; // Global constraint parameters str_buf << "[global_constraint_type: " << global_constraint_type << "]\n"; diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp index 1c8f40a53..fbd38b37e 100644 --- a/src/io/dataset_loader.cpp +++ b/src/io/dataset_loader.cpp @@ -1021,7 +1021,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines, CHECK(label_idx_ >= 0 && label_idx_ <= dataset->num_total_features_); CHECK(weight_idx_ < 0 || weight_idx_ < dataset->num_total_features_); CHECK(group_idx_ < 0 || group_idx_ < dataset->num_total_features_); - CHECK(group_constraint_idx_ < 0 || group_constraint_idx_ < dataset->num_total_features_); // FairGBM + CHECK(group_constraint_idx_ == NO_SPECIFIC || (group_constraint_idx_ >= 0 && group_constraint_idx_ < dataset->num_total_features_)); // FairGBM // fill feature_names_ if not header if (feature_names_.empty()) { @@ -1344,7 +1344,7 @@ void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser* } // -- START FairGBM block -- if (inner_data.first == group_constraint_idx_) - dataset->metadata_.SetGroupConstraintAt(start_idx + i, static_cast(inner_data.second)); + dataset->metadata_.SetGroupConstraintAt(start_idx + i, static_cast(inner_data.second)); // -- END FairGBM block -- } if (dataset->has_raw()) { diff --git a/src/main.multiple_runs.cpp b/src/main.multiple_runs.cpp deleted file mode 100644 index ced14de6d..000000000 --- a/src/main.multiple_runs.cpp +++ /dev/null @@ -1,132 +0,0 @@ -/** - * The copyright of this file belongs to Feedzai. The file cannot be - * reproduced in whole or in part, stored in a retrieval system, - * transmitted in any form, or by any means electronic, mechanical, - * photocopying, or otherwise, without the prior permission of the owner. - * - * (c) 2021 Feedzai, Strictly Confidential - */ -/** - * File intended to run an experiment that trains multiple LGBMs from different config files. - */ -#include -#include -#include -#include -#include -#include -#include - - -double time_it_with_chrono(std::string config_path, int argc) { - std::cout << "-----------------------------------------------------------" << std::endl; - std::cout << "--> Processing " << config_path << " file" << std::endl; - std::cout << "-----------------------------------------------------------" << std::endl; - - auto start = std::chrono::high_resolution_clock::now(); - - std::string prefixed_config_path = "config=" + config_path; - char* argv[] = { (char*) "", (char*) prefixed_config_path.c_str() }; - LightGBM::Application app(argc, argv); - app.Run(); - - auto end = std::chrono::high_resolution_clock::now(); - std::chrono::duration time_span = std::chrono::duration_cast>(end - start); - return time_span.count(); -} - - -void write_elapsed_time(std::string path, std::vector paths, std::vector times) { - struct stat buf; - bool file_exists = (stat(path.c_str(), &buf) != -1); - - std::stringstream tmp_buf; - for (ushort i = 0; i < paths.size(); i++) { - tmp_buf << paths[i] << "," << times[i] << std::endl; - } - std::ofstream outfile; - outfile.open(path, std::ios::out | (file_exists ? std::ios::app : std::ios::trunc)); - outfile << tmp_buf.str(); - outfile.close(); -} - - -int main() { - bool success = false; - try { - -// std::string experiments_root_path = "/home/andre.cruz/Documents/fair-boosting/experiments/"; // Path for local machine - std::string experiments_root_path = "/mnt/home/andre.cruz/fair-boosting/experiments/"; // Path for hd-processor - -// std::string dataset_name = "Adult-2021"; -// std::string dataset_name = "AOF-Fairbench"; -// std::string dataset_name = "AOF-FairHO"; - std::string dataset_name = "AOF-FairHO-type_of_employment"; - -// std::string experiment_name = "randomly-generated-configs"; // standard: run all configs - - /** AOF specific configurations **/ - std::string experiment_name = "randomly-generated-configs/LightGBM"; -// std::string experiment_name = "randomly-generated-configs/FairGBM"; -// std::string experiment_name = "randomly-generated-configs/LightGBM-with-unawareness"; -// std::string experiment_name = "randomly-generated-configs/LightGBM-with-equalized-prev"; -// std::string experiment_name = "randomly-generated-configs/FairGBM.BCE+BCE"; -// std::string experiment_name = "randomly-generated-configs/FairGBM.Recall+BCE"; -// std::string experiment_name = "randomly-generated-configs/FairGBM-with-equalized-prev.BCE+BCE"; -// std::string experiment_name = "randomly-generated-configs/FairGBM-with-unawareness.BCE+BCE"; - - /** Adult specific configurations **/ -// std::string experiment_name = "randomly-generated-configs/LightGBM"; -// std::string experiment_name = "randomly-generated-configs/LightGBM-with-unawareness"; -// std::string experiment_name = "randomly-generated-configs/LightGBM-with-equalized-prev"; -// std::string experiment_name = "randomly-generated-configs/FairGBM-params-fixed"; -// std::string experiment_name = "randomly-generated-configs/FairGBM-params-exploration"; -// std::string experiment_name = "randomly-generated-configs/FairGBM-equalized-prev-params-exploration"; - - std::string confs_root_path = experiments_root_path + dataset_name + "/confs/" + experiment_name + "/"; - std::string results_root_path = experiments_root_path + dataset_name + "/results/" + experiment_name + "/"; - - std::vector paths; - int N_CONFIGS = 100; - // Gather all config files under the given root folder - for (int i = 0; i < N_CONFIGS; ++i) { - std::stringstream ss; - ss << std::setw(3) << std::setfill('0') << i << ".conf"; - std::string conf_file_path = confs_root_path + ss.str(); - paths.push_back(conf_file_path); - } - - std::vector elapsed_times; - for (std::string path : paths) { - double elapsed_time = time_it_with_chrono(path, 2); - elapsed_times.push_back(elapsed_time); - } - - write_elapsed_time(results_root_path + "elapsed-times.csv", paths, elapsed_times); - -#ifdef USE_MPI - LightGBM::Linkers::MpiFinalizeIfIsParallel(); -#endif - - success = true; - } - catch (const std::exception &ex) { - std::cerr << "Met Exceptions:" << std::endl; - std::cerr << ex.what() << std::endl; - } - catch (const std::string &ex) { - std::cerr << "Met Exceptions:" << std::endl; - std::cerr << ex << std::endl; - } - catch (...) { - std::cerr << "Unknown Exceptions" << std::endl; - } - - if (!success) { -#ifdef USE_MPI - LightGBM::Linkers::MpiAbortIfIsParallel(); -#endif - - exit(-1); - } -} diff --git a/src/objective/constrained_recall_objective.hpp b/src/objective/constrained_recall_objective.hpp index f31e85996..3efc1ba14 100644 --- a/src/objective/constrained_recall_objective.hpp +++ b/src/objective/constrained_recall_objective.hpp @@ -43,10 +43,15 @@ class ConstrainedRecallObjective : public ConstrainedObjectiveFunction { if (not this->IsGlobalFPRConstrained()) throw std::invalid_argument("Must provide a global FPR constraint in order to optimize for Recall!"); - if (objective_stepwise_proxy == "cross_entropy" or constraint_stepwise_proxy == "cross_entropy") - assert(proxy_margin_ > 1e-2); // Must be strictly positive + if (objective_stepwise_proxy == "cross_entropy" or constraint_stepwise_proxy == "cross_entropy") { + if (proxy_margin_ < DBL_MIN) { + Log::Fatal("Proxy margin must be positive. It was %f.", proxy_margin_); + } + } - assert(not objective_stepwise_proxy.empty()); + if (objective_stepwise_proxy.empty()) { + Log::Fatal("Must provide an `objective_stepwise_proxy` to optimize for Recall. Got empty input."); + } }; explicit ConstrainedRecallObjective(const std::vector &) @@ -122,7 +127,20 @@ class ConstrainedRecallObjective : public ConstrainedObjectiveFunction { * @param hessians */ void GetGradients(const double *score, score_t *gradients, score_t *hessians) const override { + /** + * How much to shift the cross-entropy function (horizontally) to get + * the target proxy_margin_ at x=0; i.e., f(0) = proxy_margin_ + */ const double xent_horizontal_shift = log(exp(proxy_margin_) - 1); + + /** + * NOTE + * - This value should be zero in order to optimize solely for TPR (Recall), + * as TPR considers only label positives and ignores label negatives. + * - However, initial splits will have -inf information gain if the gradients + * of all label negatives are 0; + * - Hence, we're adding a small constant to the gradient of all LNs; + */ const double label_negative_weight = 1e-2; #pragma omp parallel for schedule(static) @@ -136,7 +154,7 @@ class ConstrainedRecallObjective : public ConstrainedObjectiveFunction { } else if (objective_stepwise_proxy == "cross_entropy") { - const double z = sigmoid(score[i] - xent_horizontal_shift); + const double z = Constrained::sigmoid(score[i] - xent_horizontal_shift); gradients[i] = (score_t) (z - 1.); hessians[i] = (score_t) (z * (1. - z)); } @@ -156,12 +174,8 @@ class ConstrainedRecallObjective : public ConstrainedObjectiveFunction { } } else { - // NOTE! - // - Initial splits will have -inf information gain if the gradients of all label negatives are 0; - // - We're adding a small constant indiscriminately to the gradient of all LNs; - // NOTE! trying to use a soft BCE signal for negative labels - const double z = sigmoid(score[i] + xent_horizontal_shift); + const double z = Constrained::sigmoid(score[i] + xent_horizontal_shift); gradients[i] = (score_t) (label_negative_weight * z); hessians[i] = (score_t) (label_negative_weight * z * (1. - z)); } diff --git a/src/objective/constrained_xentropy_objective.hpp b/src/objective/constrained_xentropy_objective.hpp index 6e04fdef7..0d51e5bb8 100644 --- a/src/objective/constrained_xentropy_objective.hpp +++ b/src/objective/constrained_xentropy_objective.hpp @@ -17,8 +17,6 @@ #ifndef LIGHTGBM_OBJECTIVE_CONSTRAINED_XENTROPY_OBJECTIVE_HPP_ #define LIGHTGBM_OBJECTIVE_CONSTRAINED_XENTROPY_OBJECTIVE_HPP_ -//#define FAIRGBM_DEBUG // Print debug messages - #include #include #include @@ -54,8 +52,9 @@ class ConstrainedCrossEntropy : public ConstrainedObjectiveFunction { // TODO: i : deterministic_(config.deterministic) { SetUpFromConfig(config); - if (not objective_stepwise_proxy.empty()) - std::cerr << "ERR: Ignoring argument objective_stepwise_proxy=" << objective_stepwise_proxy << std::endl; + if (not objective_stepwise_proxy.empty()) { + Log::Warning("Ignoring argument objective_stepwise_proxy=%s.", objective_stepwise_proxy.c_str()); + } } explicit ConstrainedCrossEntropy(const std::vector &) @@ -65,7 +64,7 @@ class ConstrainedCrossEntropy : public ConstrainedObjectiveFunction { // TODO: i ~ConstrainedCrossEntropy() override = default; double ComputePredictiveLoss(label_t label, double score) const override { - return XentLoss(label, sigmoid(score)); + return XentLoss(label, Constrained::sigmoid(score)); } /** @@ -83,7 +82,7 @@ class ConstrainedCrossEntropy : public ConstrainedObjectiveFunction { // TODO: i // compute pointwise gradients and Hessians with implied unit weights #pragma omp parallel for schedule(static) for (data_size_t i = 0; i < num_data_; ++i) { - const double z = 1. / (1. + std::exp(-score[i])); // NOTE: Computing sigmoid of logodds + const double z = Constrained::sigmoid(score[i]); gradients[i] = static_cast(z - label_[i]); // 1st derivative hessians[i] = static_cast(z * (1.0f - z)); // 2nd derivative @@ -93,7 +92,7 @@ class ConstrainedCrossEntropy : public ConstrainedObjectiveFunction { // TODO: i // compute pointwise gradients and Hessians with given weights #pragma omp parallel for schedule(static) for (data_size_t i = 0; i < num_data_; ++i) { - const double z = 1.0f / (1.0f + std::exp(-score[i])); // NOTE: Computing sigmoid of logodds + const double z = Constrained::sigmoid(score[i]); gradients[i] = static_cast((z - label_[i]) * weights_[i]); hessians[i] = static_cast(z * (1.0f - z) * weights_[i]); @@ -125,16 +124,16 @@ class ConstrainedCrossEntropy : public ConstrainedObjectiveFunction { // TODO: i double suml = 0.0f; double sumw = 0.0f; if (weights_ != nullptr) { -#pragma omp parallel for schedule(static) reduction(+:suml, sumw) if (!deterministic_) + #pragma omp parallel for schedule(static) reduction(+:suml, sumw) if (!deterministic_) for (data_size_t i = 0; i < num_data_; ++i) { suml += label_[i] * weights_[i]; sumw += weights_[i]; } } else { sumw = static_cast(num_data_); -#pragma omp parallel for schedule(static) reduction(+:suml) if (!deterministic_) + #pragma omp parallel for schedule(static) reduction(+:suml) if (!deterministic_) for (data_size_t i = 0; i < num_data_; ++i) { suml += label_[i]; }