diff --git a/.gitignore b/.gitignore
index 66b8a9b4a..662a52dc9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -455,3 +455,6 @@ dask-worker-space/
 *.pub
 *.rdp
 *_rsa
+
+# Others
+src/main.*.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 41fb21f5e..ae3da4e92 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -344,6 +344,7 @@ endif(USE_CUDA)
 )
 
 add_executable(lightgbm src/main.cpp src/application/application.cpp ${SOURCES})
+#add_executable(fairgbm_multiple_runs src/main.multiple_runs.cpp src/application/application.cpp ${SOURCES})
 list(APPEND SOURCES "src/c_api.cpp")
 
 # Only build the R part of the library if building for
diff --git a/LICENSE b/LICENSE
index 5ae193c94..25df6b8c2 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,3 +1,213 @@
+
+   **For commercial uses of FairGBM please contact <oss-licenses@feedzai.com>.**
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2022 Feedzai
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
+
+
 The MIT License (MIT)
 
 Copyright (c) Microsoft Corporation
diff --git a/include/LightGBM/boosting.h b/include/LightGBM/boosting.h
index ddbcdbc18..ffdc84bfc 100644
--- a/include/LightGBM/boosting.h
+++ b/include/LightGBM/boosting.h
@@ -84,6 +84,14 @@ class LIGHTGBM_EXPORT Boosting {
   */
   virtual bool TrainOneIter(const score_t* gradients, const score_t* hessians) = 0;
 
+  /*!
+  * \brief Gradient ascent step w.r.t. Lagrangian multipliers (used only for constrained optimization)
+  * \param gradients nullptr for using default objective, otherwise use self-defined boosting
+  * \param hessians nullptr for using default objective, otherwise use self-defined boosting
+  * \return True if cannot train anymore (or training has ended due to early stopping)
+  */
+  virtual bool TrainLagrangianOneIter(const score_t* gradients, const score_t* hessians) = 0;
+
   /*!
   * \brief Rollback one iteration
   */
diff --git a/include/LightGBM/config.h b/include/LightGBM/config.h
index 5142604ca..b4ee397c6 100644
--- a/include/LightGBM/config.h
+++ b/include/LightGBM/config.h
@@ -7,6 +7,11 @@
  * - nested sections can be placed only at the bottom of parent's section;
  * - [doc-only] tag indicates that only documentation for this param should be generated and all other actions are performed manually;
  * - [no-save] tag indicates that this param should not be saved into a model text representation.
+ *
+ * **NOTES!**
+ * - configs for use with the LightGBM::Application, to be passed via command line arguments or argv.
+ * - can also be filled by means of a config file, passed as config=<path> in this config.
+ * - see the parameters in this Class to see what configs are available!
  */
 #ifndef LIGHTGBM_CONFIG_H_
 #define LIGHTGBM_CONFIG_H_
@@ -186,6 +191,20 @@ struct Config {
   // desc = in ``dart``, it also affects on normalization weights of dropped trees
   double learning_rate = 0.1;
 
+  // alias = multiplier_shrinkage_rate, lagrangian_learning_rate, lagrangian_multiplier_learning_rate
+  // check = >0.0
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = learning rate for the Lagrangian multipliers (which enforce the constraints)
+  double multiplier_learning_rate = 0.1;
+
+  // alias = lagrangian_multipliers, init_multipliers
+  // type = multi-double
+  // default = 0,0,...,0
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = list representing the magnitude of *initial* (first iteration only) penalties for each constraint
+  // desc = list should have the same number of elements as the number of constraints
+  std::vector<double> init_lagrangian_multipliers;
+
   // default = 31
   // alias = num_leaf, max_leaves, max_leaf
   // check = >1
@@ -803,6 +822,24 @@ struct Config {
 
   #pragma endregion
 
+  // alias = output_dir
+  // type = string
+  // default = "."
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = output dir of gradients and hessians per iteration
+  // desc = **Note**: can be used only in CLI version
+  std::string debugging_output_dir = ".";
+
+  // type = int or string
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = used to specify the Protected Attribute id column
+  // desc = use number for index, e.g. ``constraint_group=0`` means column\_0 is the query id
+  // desc = add a prefix ``name:`` for column name, e.g. ``constraint_group=name:id``
+  // desc = **Note**: works only in case of loading data directly from file
+  // desc = **Note**: index starts from ``0`` and it doesn't count the label column when passing type is ``int``, e.g. when label is column\_0 and query\_id is column\_1, the correct parameter is ``query=0``
+  // desc = **Note**: group membership values will take type `ushort`, hence keep all values below the maximum according to your compilation settings
+  std::string constraint_group_column = "";
+
   #pragma endregion
 
   #pragma region Objective Parameters
@@ -885,6 +922,107 @@ struct Config {
   // desc = separate by ``,``
   std::vector<double> label_gain;
 
+  // type = string
+  // default = None
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = type of group-wise constraint to enforce during training
+  // desc = can take values "fpr", "fnr", or "fpr,fnr"
+  std::string constraint_type;
+
+  // alias = constraint_proxy_function, constraint_stepwise_proxy_function
+  // type = string
+  // default = "cross_entropy"
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = type of proxy function to use in group-wise constraints
+  // desc = this will be used as a differentiable proxy for the stepwise function in the gradient descent step
+  // desc = can take values "hinge", "quadratic", or "cross_entropy"
+  std::string constraint_stepwise_proxy = "cross_entropy";
+
+  // alias = objective_proxy_function, objective_stepwise_proxy_function
+  // type = string
+  // default = None
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = type of proxy function to use as the proxy objective
+  // desc = only used when optimizing for functions with a stepwise (e.g., FNR, FPR)
+  std::string objective_stepwise_proxy = "";
+
+  // alias = proxy_margin
+  // check = >0
+  // type = double
+  // default = 1.0
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = for `ConstrainedCrossEntropy`: the value of the function at x=0; f(0)=stepwise_proxy_margin; (vertical margin)
+  // desc = for other constrained objectives: the horizontal margin of the function; i.e., for stepwise_proxy_margin=1, the proxy function will be 0 until x=-1 for FPR and non-zero onwards, or non-zero until x=1 for FNR, and non-zero onwards;
+  // desc = **TODO**: set all functions to use this value as the vertical margin
+  double stepwise_proxy_margin = 1.0;
+
+  // alias = constraint_fpr_slack, constraint_fpr_delta
+  // check = >=0
+  // check = <1.0
+  // type = double
+  // default = 0
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = the slack when fulfilling group-wise FPR constraints
+  // desc = when using the value 0.0 this will enforce group-wise FPR to be *exactly* equal
+  double constraint_fpr_threshold = 0.0;
+
+  // alias = constraint_fnr_slack, constraint_fnr_delta
+  // check = >=0
+  // check = <1.0
+  // type = double
+  // default = 0
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = the slack when fulfilling group-wise FNR constraints
+  // desc = when using the value 0.0 this will enforce group-wise FNR to be *exactly* equal
+  double constraint_fnr_threshold = 0.0;
+
+  // check = >=0
+  // check = <1.0
+  // type = double
+  // default = 0.5
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = score threshold used for computing the GROUP-WISE confusion matrices
+  // desc = used to compute violation of group-wise constraints during training
+  double score_threshold = 0.5;
+
+  // type = string
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = type of GLOBAL constraint to enforce during training
+  // desc = can take values "fpr", "fnr", or "fpr,fnr"
+  // desc = must be paired with the arguments "global_target_<fpr|fnr>" accordingly
+  std::string global_constraint_type;
+
+  // alias = global_fpr, target_global_fpr
+  // check = >=0
+  // check = <=1.0
+  // type = double
+  // default = 1.0
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = target rate for the global FPR (inequality) constraint
+  // desc = constraint is fulfilled with global_fpr <= global_target_fpr
+  // desc = the default value of 1 means that this constraint is always fulfilled (never active)
+  double global_target_fpr = 1.;
+
+  // alias = global_fnr, target_global_fnr
+  // check = >=0
+  // check = <=1.0
+  // type = double
+  // default = 1.0
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = target rate for the global FNR (inequality) constraint
+  // desc = constraint is fulfilled with global_fnr <= global_target_fnr
+  // desc = the default value of 1 means that this constraint is always fulfilled (never active)
+  double global_target_fnr = 1.;
+
+  // check = >=0
+  // check = <1.0
+  // type = double
+  // default = 0.5
+  // desc = used only for constrained optimization (ignored for standard LightGBM)
+  // desc = score threshold for computing the GLOBAL confusion matrix
+  // desc = used to compute violation of GLOBAL constraints during training
+  double global_score_threshold = 0.5;
+
   #pragma endregion
 
   #pragma region Metric Parameters
diff --git a/include/LightGBM/dataset.h b/include/LightGBM/dataset.h
index d2f48ef15..a8a0d323f 100644
--- a/include/LightGBM/dataset.h
+++ b/include/LightGBM/dataset.h
@@ -37,6 +37,7 @@ class DatasetLoader;
 *        4. Query Weights, auto calculate by weights and query_boundaries(if both of them are existed)
 *           the weight for i-th query is sum(query_boundaries[i] , .., query_boundaries[i+1]) / (query_boundaries[i + 1] -  query_boundaries[i+1])
 *        5. Initial score. optional. if existing, the model will boost from this score, otherwise will start from 0.
+*        6. [FairGBM-only] Group, used for training during constrained optimization.
 */
 class Metadata {
  public:
@@ -69,8 +70,9 @@ class Metadata {
   * \param num_data Number of training data
   * \param weight_idx Index of weight column, < 0 means doesn't exists
   * \param query_idx Index of query id column, < 0 means doesn't exists
+  * \param constraint_group_idx_ Index of group constraint id column, < 0 means it doesn't exist
   */
-  void Init(data_size_t num_data, int weight_idx, int query_idx);
+  void Init(data_size_t num_data, int weight_idx, int query_idx, int constraint_group_idx_);
 
   /*!
   * \brief Partition label by used indices
@@ -92,6 +94,13 @@ class Metadata {
 
   void SetQuery(const data_size_t* query, data_size_t len);
 
+  /*!
+  * \brief Set constraint group information in bulk (for the whole train dataset)
+  * \param constraint_group constraint group information for each instance.
+  * \param len the number of elements in the constraint_group array.
+  */
+  void SetConstraintGroup(const float* constraint_group, data_size_t len);
+
   /*!
   * \brief Set initial scores
   * \param init_score Initial scores, this class will manage memory for init_score.
@@ -158,7 +167,7 @@ class Metadata {
   /*!
   * \brief Get data boundaries on queries, if not exists, will return nullptr
   *        we assume data will order by query,
-  *        the interval of [query_boundaris[i], query_boundaris[i+1])
+  *        the interval of [query_boundaries[i], query_boundaries[i+1])
   *        is the data indices for query i.
   * \return Pointer of data boundaries on queries
   */
@@ -200,6 +209,31 @@ class Metadata {
     }
   }
 
+  /*!
+  * \brief Set constraint group value for one record
+  * \param idx Index of this record
+  * \param value Group constraint value of this record
+  */
+  inline void SetConstraintGroupAt(data_size_t idx, constraint_group_t value) {
+    constraint_group_[idx] = value;
+  }
+
+  /*!
+  * \brief Get pointer of group
+  * \return Pointer of group
+  */
+  inline const constraint_group_t* group() const { return constraint_group_.data(); }
+
+  /*! \brief Get unique groups in data */
+  inline std::vector<constraint_group_t> group_values() const {
+    std::vector<constraint_group_t> values(constraint_group_);
+    std::sort(values.begin(), values.end());
+
+    auto last = std::unique(values.begin(), values.end());
+    values.erase(last, values.end());
+    return values;
+  }
+
   /*!
   * \brief Get size of initial scores
   */
@@ -246,6 +280,9 @@ class Metadata {
   bool weight_load_from_file_;
   bool query_load_from_file_;
   bool init_score_load_from_file_;
+
+  /*! \brief Group data for group constraints */
+  std::vector<constraint_group_t> constraint_group_;
 };
 
 
diff --git a/include/LightGBM/dataset_loader.h b/include/LightGBM/dataset_loader.h
index e72dd4910..88cd58437 100644
--- a/include/LightGBM/dataset_loader.h
+++ b/include/LightGBM/dataset_loader.h
@@ -84,6 +84,9 @@ class DatasetLoader {
   std::unordered_set<int> categorical_features_;
   /*! \brief Whether to store raw feature values */
   bool store_raw_;
+
+  /*! \brief index of constraint group column */
+  int constraint_group_idx_;
 };
 
 }  // namespace LightGBM
diff --git a/include/LightGBM/meta.h b/include/LightGBM/meta.h
index 3452f28d8..831873257 100644
--- a/include/LightGBM/meta.h
+++ b/include/LightGBM/meta.h
@@ -47,6 +47,13 @@ typedef double label_t;
 typedef float label_t;
 #endif
 
+/*! \brief Type of metadata, include group */
+#ifdef GROUP_T_USE_INT
+  typedef u_int constraint_group_t;
+#else
+  typedef u_short constraint_group_t;
+#endif
+
 const score_t kMinScore = -std::numeric_limits<score_t>::infinity();
 
 const score_t kEpsilon = 1e-15f;
diff --git a/include/LightGBM/objective_function.h b/include/LightGBM/objective_function.h
index 5ea838dec..c483742a8 100644
--- a/include/LightGBM/objective_function.h
+++ b/include/LightGBM/objective_function.h
@@ -8,6 +8,7 @@
 #include <LightGBM/config.h>
 #include <LightGBM/dataset.h>
 #include <LightGBM/meta.h>
+#include <LightGBM/utils/constrained.hpp>
 
 #include <string>
 #include <functional>
@@ -41,6 +42,8 @@ class ObjectiveFunction {
 
   virtual bool IsConstantHessian() const { return false; }
 
+  virtual bool IsConstrained() const { return false; }
+
   virtual bool IsRenewTreeOutput() const { return false; }
 
   virtual double RenewTreeOutput(double ori_output, std::function<double(const label_t*, int)>,
@@ -58,6 +61,8 @@ class ObjectiveFunction {
 
   virtual int NumPredictOneRow() const { return 1; }
 
+  virtual int NumConstraints() const { return 0; }
+
   /*! \brief The prediction should be accurate or not. True will disable early stopping for prediction. */
   virtual bool NeedAccuratePrediction() const { return true; }
 
@@ -90,6 +95,1001 @@ class ObjectiveFunction {
   LIGHTGBM_EXPORT static ObjectiveFunction* CreateObjectiveFunction(const std::string& str);
 };
 
-}  // namespace LightGBM
+class ConstrainedObjectiveFunction : public ObjectiveFunction
+{
+public:
+  /*! \brief virtual destructor */
+  virtual ~ConstrainedObjectiveFunction() {}
+
+  void SetUpFromConfig(const Config &config)
+  {
+    constraint_type = config.constraint_type;
+
+    // Normalize constraint type
+    std::transform(constraint_type.begin(), constraint_type.end(), constraint_type.begin(), ::toupper);
+    if (constraint_type == "FNR,FPR")
+      constraint_type = "FPR,FNR";
+
+    fpr_threshold_ = (score_t)config.constraint_fpr_threshold;
+    fnr_threshold_ = (score_t)config.constraint_fnr_threshold;
+    score_threshold_ = (score_t)config.score_threshold;
+    proxy_margin_ = (score_t)config.stepwise_proxy_margin;
+
+    /** Global constraint parameters **/
+    global_constraint_type = config.global_constraint_type;
+
+    // Normalize global constraint type
+    std::transform(global_constraint_type.begin(), global_constraint_type.end(), global_constraint_type.begin(), ::toupper);
+    if (global_constraint_type == "FNR,FPR")
+      global_constraint_type = "FPR,FNR";
+
+    global_target_fpr_ = (score_t)config.global_target_fpr;
+    global_target_fnr_ = (score_t)config.global_target_fnr;
+    global_score_threshold_ = (score_t)config.global_score_threshold;
+
+    // Function used as a PROXY for step-wise in the CONSTRAINTS
+    constraint_stepwise_proxy = ValidateProxyFunctionName(config.constraint_stepwise_proxy, false);
+
+    // Function used as a PROXY for the step-wise in the OBJECTIVE
+    objective_stepwise_proxy = ValidateProxyFunctionName(config.objective_stepwise_proxy, true);
+
+    // Debug configs
+    debugging_output_dir_ = config.debugging_output_dir;
+  }
+
+  /*!
+    * \brief Initialize
+    * \param metadata Label data
+    * \param num_data Number of data
+    */
+  void Init(const Metadata &metadata, data_size_t num_data) override
+  {
+    num_data_ = num_data;
+    label_ = metadata.label();
+    weights_ = metadata.weights();
+
+    // Store Information about the group
+    group_ = metadata.group();
+    group_values_ = metadata.group_values();
+
+    // Store Information about the labels
+    total_label_positives_ = 0;
+    total_label_negatives_ = 0;
+    ComputeLabelCounts();
+
+    CHECK_NOTNULL(label_);
+    Common::CheckElementsIntervalClosed<label_t>(label_, 0.0f, 1.0f, num_data_, GetName());
+    Log::Info("[%s:%s]: (objective) labels passed interval [0, 1] check", GetName(), __func__);
+
+    if (weights_ != nullptr)
+    {
+      label_t minw;
+      double sumw;
+      Common::ObtainMinMaxSum(weights_, num_data_, &minw, static_cast<label_t *>(nullptr), &sumw);
+      if (minw < 0.0f)
+      {
+        Log::Fatal("[%s]: at least one weight is negative", GetName());
+      }
+      if (sumw < DBL_MIN)
+      {
+        Log::Fatal("[%s]: sum of weights is zero", GetName());
+      }
+    }
+  }
+
+  /**
+   * Template method for computing an instance's predictive loss value
+   * from its predicted score (log-odds).
+   *
+   * @param label Instance label.
+   * @param score Instance predicted score (log-odds);
+   * @return The instance loss value.
+   */
+  virtual double ComputePredictiveLoss(label_t label, double score) const = 0;
+
+  /*!
+    * \brief Get functions w.r.t. to the lagrangian multipliers.
+    * \brief This includes the evaluation of both the objective
+    * \brief function (aka the loss) and also the (real) constraints.
+    * \brief Therefore, the returned array will be of size.
+    * \brief NumConstraints + 1 (plus one from the loss lagrang. multiplier).
+    * \param score prediction score in this round.
+    */
+  virtual std::vector<double> GetLagrangianGradientsWRTMultipliers(const double *score) const
+  {
+    if (weights_ != nullptr)
+      throw std::logic_error("not implemented yet");  // TODO: https://github.com/feedzai/fairgbm/issues/5
+
+    std::vector<double> constraint_values;
+    std::unordered_map<constraint_group_t, double> group_fpr, group_fnr;
+
+    // NOTE! ** MULTIPLIERS ARE ORDERED! **
+    //  - 1st: group-wise FPR constraints (one multiplier per group)
+    //  - 2nd: group-wise FNR constraints (one multiplier per group)
+    //  - 3rd: global FPR constraint      (a single multiplier)
+    //  - 4th: global FNR constraint      (a single multiplier)
+
+    // Multiplier corresponding to group-wise FPR constraints
+    if (IsFPRConstrained())
+    {
+      ComputeFPR(score, score_threshold_, group_fpr);
+      double max_fpr = Constrained::findMaxValuePair<constraint_group_t, double>(group_fpr).second;
+
+      // Assuming group_values_ is in ascending order
+      for (const auto &group : group_values_)
+      {
+        double fpr_constraint_value = max_fpr - group_fpr[group] - fpr_threshold_;
+        constraint_values.push_back(fpr_constraint_value);
+
+#ifdef DEBUG
+        Log::Debug(
+                "DEBUG; true FPR constraint value: c = %.3f - %.3f = %.3f\n",
+                max_fpr, group_fpr[group], fpr_constraint_value);
+#endif
+      }
+    }
+
+    // Multiplier corresponding to group-wise FNR constraints
+    if (IsFNRConstrained())
+    {
+      ComputeFNR(score, score_threshold_, group_fnr);
+      double max_fnr = Constrained::findMaxValuePair<constraint_group_t, double>(group_fnr).second;
+
+      // Assuming group_values_ is in ascending order
+      for (const auto &group : group_values_)
+      {
+        double fnr_constraint_value = max_fnr - group_fnr[group] - fnr_threshold_;
+        constraint_values.push_back(fnr_constraint_value);
+
+#ifdef DEBUG
+        Log::Debug(
+                "DEBUG; true FNR constraint value: c = %.3f - %.3f = %.3f\n",
+                max_fnr, group_fnr[group], fnr_constraint_value);
+#endif
+      }
+    }
+
+    // Next multiplier will correspond to the global FPR constraint
+    if (IsGlobalFPRConstrained())
+    {
+      double global_fpr = ComputeGlobalFPR(score, global_score_threshold_);
+      double global_fpr_constraint_value = global_fpr - global_target_fpr_;
+
+      constraint_values.push_back(global_fpr_constraint_value);
+
+#ifdef DEBUG
+      Log::Debug(
+              "DEBUG; true global FPR constraint value: c = %.3f - %.3f = %.3f\n",
+              global_fpr, global_target_fpr_, global_fpr_constraint_value);
+#endif
+    }
+
+    // Next multiplier will correspond to the global FNR constraint
+    if (IsGlobalFNRConstrained())
+    {
+      double global_fnr = ComputeGlobalFNR(score, global_score_threshold_);
+      double global_fnr_constraint_value = global_fnr - global_target_fnr_;
+
+      constraint_values.push_back(global_fnr_constraint_value);
+
+#ifdef DEBUG
+      Log::Debug(
+              "DEBUG; true global FNR constraint value: c = %.3f - %.3f = %.3f\n",
+              global_fnr, global_target_fnr_, global_fnr_constraint_value);
+#endif
+    }
+
+#ifdef DEBUG
+    Constrained::write_values<double>(debugging_output_dir_, "constraint_values.dat", constraint_values);
+#endif
+
+    return constraint_values;
+  }
+
+  /*!
+    * \brief Get gradients of the constraints w.r.t. to the scores (this will use proxy constraints!).
+    * \param double Lagrangian multipliers in this round
+    * \param score prediction score in this round
+    * \gradients Output gradients
+    * \hessians Output hessians
+    */
+  virtual void GetConstraintGradientsWRTModelOutput(const double *lagrangian_multipliers,
+                                                    const double *score, score_t *gradients,
+                                                    score_t * /* hessians */) const
+  {
+
+    std::unordered_map<constraint_group_t, double> group_fpr, group_fnr;
+    std::pair<constraint_group_t, double> max_proxy_fpr, max_proxy_fnr;
+
+    // Helper constant for BCE-based proxies
+    double xent_horizontal_shift = log(exp(proxy_margin_) - 1); // here, proxy_margin_ is the VERTICAL margin
+
+    /** ---------------------------------------------------------------- *
+     *                        FPR (Proxy) Constraint
+     *  ---------------------------------------------------------------- *
+     *  It corresponds to the result of differentiating the FPR proxy
+     *  constraint w.r.t. the score of the ensemble.
+     *
+     *  FPR Proxy Constraints:
+     *  lambda_group_i * [max(FPR_group_1, ..., FPR_group_j) - FPR_group_i]
+     *
+     *  ---------------------------------------------------------------- *
+     *  To compute it, we need to:
+     *  1. Compute FPR by group
+     *  2. Determine the group with max(FPR)
+     *  3. Compute derivative w.r.t. all groups except max(FPR)
+     *  ---------------------------------------------------------------- *
+     * */
+    if (IsFPRConstrained())
+    {
+      if (constraint_stepwise_proxy == "hinge")
+        ComputeHingeFPR(score, group_fpr);
+      else if (constraint_stepwise_proxy == "quadratic")
+        ComputeQuadraticLossFPR(score, group_fpr);
+      else if (constraint_stepwise_proxy == "cross_entropy")
+        ComputeXEntropyLossFPR(score, group_fpr);
+      else
+        throw std::invalid_argument("constraint_stepwise_proxy=" + constraint_stepwise_proxy + " not implemented!");
+
+      max_proxy_fpr = Constrained::findMaxValuePair<constraint_group_t, double>(group_fpr);
+    }
+    if (IsFNRConstrained())
+    {
+      if (constraint_stepwise_proxy == "hinge")
+        ComputeHingeLossFNR(score, group_fnr);
+      else if (constraint_stepwise_proxy == "quadratic")
+        ComputeQuadraticLossFNR(score, group_fnr);
+      else if (constraint_stepwise_proxy == "cross_entropy")
+        ComputeXEntropyLossFNR(score, group_fnr);
+      else
+        throw std::invalid_argument("constraint_stepwise_proxy=" + constraint_stepwise_proxy + " not implemented!");
+
+      max_proxy_fnr = Constrained::findMaxValuePair<constraint_group_t, double>(group_fnr);
+    }
+
+    /** ---------------------------------------------------------------- *
+     *                       GRADIENTS (per instance)                    *
+     *  ---------------------------------------------------------------- */
+    if (weights_ != nullptr)
+    {
+      throw std::logic_error("not implemented yet");  // TODO: https://github.com/feedzai/fairgbm/issues/5
+    }
+
+    // compute pointwise gradients and hessians with implied unit weights
+//    #pragma omp parallel for schedule(static)       // TODO: https://github.com/feedzai/fairgbm/issues/6
+    for (data_size_t i = 0; i < num_data_; ++i)
+    {
+      const auto group = group_[i];
+
+      // Constraint index
+      u_short number_of_groups = group_values_.size();
+      u_short multipliers_base_index = 0;
+
+      // -------------------------------------------------------------------
+      // Skip FPR propagation if label positive, since LPs do not count for FPR constraints
+      // -------------------------------------------------------------------
+      // Grads of proxy constraints w.r.t. the scores:
+      // (1) 0,    if label positive or score <= -margin (default margin=1)
+      // (2) (m-1) / |LN_group_j| * (margin+score) * sum(lag multipliers except group j), if i belongs to group j whose FPR is maximal
+      // (3) -lambda_k * (margin+score) / |LN_group_k| if the instance belongs to group k != j (where j has maximal FPR)
+      // -------------------------------------------------------------------
+      if (IsFPRConstrained())
+      {
+        if (label_[i] == 0)
+        {
+          const int group_ln = group_label_negatives_.at(group);
+
+          double fpr_constraints_gradient_wrt_pred;
+          // TODO: https://github.com/feedzai/fairgbm/issues/7
+
+          // Derivative for hinge-based proxy FPR
+          if (constraint_stepwise_proxy == "hinge")
+            fpr_constraints_gradient_wrt_pred = score[i] <= -proxy_margin_ ? 0. : 1. / group_ln;
+
+          // Derivative for BCE-based proxy FPR
+          else if (constraint_stepwise_proxy == "cross_entropy") {
+            fpr_constraints_gradient_wrt_pred = (Constrained::sigmoid(score[i] + xent_horizontal_shift)) / group_ln;
+//            fpr_constraints_gradient_wrt_pred = (Constrained::sigmoid(score[i]) - label_[i]) / group_ln;   // without margin
+          }
+
+          // Loss-function implicitly defined as having a hinge-based derivative (quadratic loss)
+          else if (constraint_stepwise_proxy == "quadratic") {
+            fpr_constraints_gradient_wrt_pred = std::max(0., score[i] + proxy_margin_) / group_ln;
+          }
+
+          else
+            throw std::invalid_argument("constraint_stepwise_proxy=" + constraint_stepwise_proxy + " not implemented!");
+
+          // -------------------------------------------------------------------
+          // Derivative (2) because instance belongs to group with maximal FPR
+          // -------------------------------------------------------------------
+          if (group == max_proxy_fpr.first)
+          {
+            // 2.1) Multiply by (m-1)
+            fpr_constraints_gradient_wrt_pred *= (number_of_groups - 1.);
+
+            // 2.2) Sum lagrangian multipliers (all except that of group with maximal FPR)
+            double lag_multipliers = 0;
+            for (const auto &other_group : group_values_)
+            {
+              if (other_group == max_proxy_fpr.first)
+                continue;
+              else
+                lag_multipliers += lagrangian_multipliers[multipliers_base_index + other_group];
+            }
+
+            gradients[i] += static_cast<score_t>(fpr_constraints_gradient_wrt_pred * lag_multipliers);
+            // hessians[i] += ...
+          }
+          else
+          {
+            // ----------------------------------------------------------------------
+            // Derivative (3) because instance belongs to group with non-maximal FPR
+            // ----------------------------------------------------------------------
+            gradients[i] += static_cast<score_t>(-1. * fpr_constraints_gradient_wrt_pred * lagrangian_multipliers[multipliers_base_index + group]);
+            // hessians[i] += ...
+          }
+        }
+
+        // Update index of multipliers to be used for next constraints
+        multipliers_base_index += number_of_groups;
+      }
+
+      // Skip FNR propagation if label negative, since LNs do not count for FNR constraints
+      if (IsFNRConstrained())
+      {
+        if (label_[i] == 1)
+        {
+          const int group_lp = group_label_positives_.at(group);
+
+          double fnr_constraints_gradient_wrt_pred;
+
+          // Derivative for hinge-based proxy FNR
+          if (constraint_stepwise_proxy == "hinge")
+            fnr_constraints_gradient_wrt_pred = score[i] >= proxy_margin_ ? 0. : -1. / group_lp;
+
+          // Derivative for BCE-based proxy FNR
+          else if (constraint_stepwise_proxy == "cross_entropy") {
+            fnr_constraints_gradient_wrt_pred = (Constrained::sigmoid(score[i] - xent_horizontal_shift) - 1) / group_lp;
+//            fnr_constraints_gradient_wrt_pred = (Constrained::sigmoid(score[i]) - label_[i]) / group_lp;   // without margin
+          }
+
+          // Loss-function implicitly defined as having a hinge-based derivative (quadratic loss)
+          else if (constraint_stepwise_proxy == "quadratic") {
+            fnr_constraints_gradient_wrt_pred = std::min(0., score[i] - proxy_margin_) / group_lp;
+          }
+
+          else
+            throw std::invalid_argument("constraint_stepwise_proxy=" + constraint_stepwise_proxy + " not implemented!");
+
+          // -------------------------------------------------------------------
+          // Derivative (2) because instance belongs to group with max FNR
+          // -------------------------------------------------------------------
+          if (group == max_proxy_fnr.first)
+          {
+            // 2.1) Multiply by (m-1)
+            fnr_constraints_gradient_wrt_pred *= (number_of_groups - 1.0);
+
+            // 2.2) Sum lagrangian multipliers (all except that of group with maximal FNR)
+            double lag_multipliers = 0;
+            for (const auto &other_group : group_values_)
+            {
+              if (other_group == max_proxy_fnr.first)
+                continue;
+              else
+                lag_multipliers += lagrangian_multipliers[multipliers_base_index + other_group];
+            }
+
+            gradients[i] += static_cast<score_t>(fnr_constraints_gradient_wrt_pred * lag_multipliers);
+            // hessians[i] += ...
+          }
+          else
+          {
+            // ----------------------------------------------------------------------
+            // Derivative (3) because instance belongs to group with non-maximal FNR
+            // ----------------------------------------------------------------------
+            gradients[i] += static_cast<score_t>(-1. * fnr_constraints_gradient_wrt_pred * lagrangian_multipliers[multipliers_base_index + group]);
+            // hessians[i] += ...
+          }
+        }
+
+        // Update index of multipliers to be used for next constraints
+        multipliers_base_index += number_of_groups;
+      }
+
+      // ** Global Constraints **
+      if (IsGlobalFPRConstrained())
+      {
+        if (label_[i] == 0)
+        { // Condition for non-zero gradient
+          double global_fpr_constraint_gradient_wrt_pred;
+          // Gradient for hinge proxy FPR
+          if (constraint_stepwise_proxy == "hinge") {
+            global_fpr_constraint_gradient_wrt_pred = score[i] >= -proxy_margin_ ? 1. / total_label_negatives_ : 0.;
+          }
+
+          // Gradient for BCE proxy FPR
+          else if (constraint_stepwise_proxy == "cross_entropy") {
+            global_fpr_constraint_gradient_wrt_pred = (Constrained::sigmoid(score[i] + xent_horizontal_shift)) / total_label_negatives_;
+//            global_fpr_constraint_gradient_wrt_pred = (Constrained::sigmoid(score[i]) - label_[i]) / total_label_negatives_;   // without margin
+          }
+
+          // Hinge-based gradient (for quadratic proxy FPR)
+          else if (constraint_stepwise_proxy == "quadratic") {
+            global_fpr_constraint_gradient_wrt_pred = std::max(0., score[i] + proxy_margin_) / total_label_negatives_;
+          }
+
+          else
+            throw std::invalid_argument("constraint_stepwise_proxy=" + constraint_stepwise_proxy + " not implemented!");
+
+          // Update instance gradient and hessian
+          gradients[i] += (score_t)(lagrangian_multipliers[multipliers_base_index] * global_fpr_constraint_gradient_wrt_pred);
+          //          hessians[i] += ...
+        }
+
+        // Update index of multipliers to be used for next constraints
+        multipliers_base_index += 1;
+      }
+
+      if (IsGlobalFNRConstrained())
+      {
+        if (label_[i] == 1)
+        { // Condition for non-zero gradient
+          double global_fnr_constraint_gradient_wrt_pred;
+
+          // Gradient for hinge proxy FNR
+          if (constraint_stepwise_proxy == "hinge") {
+            global_fnr_constraint_gradient_wrt_pred = score[i] >= proxy_margin_ ? 0. : -1. / total_label_positives_;
+          }
+
+          // Gradient for BCE proxy FNR
+          else if (constraint_stepwise_proxy == "cross_entropy") {
+            global_fnr_constraint_gradient_wrt_pred = (Constrained::sigmoid(score[i] - xent_horizontal_shift) - 1) / total_label_positives_;
+//            global_fnr_constraint_gradient_wrt_pred = (Constrained::sigmoid(score[i]) - label_[i]) / total_label_positives_;   // without margin
+          }
+
+          // Hinge-based gradient (for quadratic proxy FNR)
+          else if (constraint_stepwise_proxy == "quadratic") {
+            global_fnr_constraint_gradient_wrt_pred = std::min(0., score[i] - proxy_margin_) / total_label_positives_;
+          }
+
+          else {
+            throw std::invalid_argument("constraint_stepwise_proxy=" + constraint_stepwise_proxy + " not implemented!");
+          }
+
+          // Update instance gradient and hessian
+          gradients[i] += (score_t)(lagrangian_multipliers[multipliers_base_index] *
+                                    global_fnr_constraint_gradient_wrt_pred);
+          //            hessians[i] += ...
+        }
+
+        // Update index of multipliers to be used for next constraints
+        multipliers_base_index += 1;
+      }
+    }
+  }
+
+  bool IsConstrained() const override { return true; }
+
+  // convert score to a probability
+  void ConvertOutput(const double *input, double *output) const override
+  {
+    *output = 1.0f / (1.0f + std::exp(-(*input)));
+  }
+
+  bool IsFPRConstrained() const
+  {
+    return (constraint_type == "FPR" || constraint_type == "FPR,FNR");
+    // NOTE: Order of constraints in config file doesn't matter, it's sorted beforehand
+  }
+
+  bool IsFNRConstrained() const
+  {
+    return (constraint_type == "FNR" || constraint_type == "FPR,FNR");
+  }
+
+  bool IsGlobalFPRConstrained() const
+  {
+    return (global_constraint_type == "FPR" || global_constraint_type == "FPR,FNR");
+  }
+
+  bool IsGlobalFNRConstrained() const
+  {
+    return (global_constraint_type == "FNR" || global_constraint_type == "FPR,FNR");
+  }
+
+  int NumConstraints() const override
+  {
+    int group_size = (int)group_values_.size();
+    int num_constraints = 0;
+
+    if (IsFPRConstrained())
+      num_constraints += group_size;
+    if (IsFNRConstrained())
+      num_constraints += group_size;
+    if (IsGlobalFPRConstrained())
+      num_constraints += 1;
+    if (IsGlobalFNRConstrained())
+      num_constraints += 1;
+
+    return num_constraints;
+  }
+
+  /*!
+    * \brief Computes group-wise false positive rate w.r.t. a given probabilities_threshold.
+    * \param score prediction score in this round (logodds)
+    * \param probabilities_threshold to consider for computing the FPR
+    * \group_fpr Output the FPR per group
+    */
+  void ComputeFPR(const double *score, double probabilities_threshold, std::unordered_map<constraint_group_t, double> &group_fpr) const
+  {
+    std::unordered_map<int, int> false_positives;
+    std::unordered_map<int, int> label_negatives;
+
+    // #pragma omp parallel for schedule(static)        // TODO: https://github.com/feedzai/fairgbm/issues/6
+    for (data_size_t i = 0; i < num_data_; ++i)
+    {
+      constraint_group_t group = group_[i];
+
+      if (label_[i] == 0)
+      {
+        label_negatives[group] += 1;
+
+        const double z = 1.0f / (1.0f + std::exp(-score[i]));
+        if (z >= probabilities_threshold)
+          false_positives[group] += 1;
+      }
+    }
+
+    for (auto group_id : group_values_)
+    {
+      double fpr;
+      if (label_negatives[group_id] == 0)
+        fpr = 0;
+      else
+        fpr = ((double)false_positives[group_id]) / ((double)label_negatives[group_id]);
+
+      group_fpr[group_id] = fpr;
+    }
+  }
+
+  /**
+   * Computes global False-Positive Rate according to the given threshold.
+   * @param score
+   * @param probabilities_threshold
+   * @return the global FNR
+   */
+  double ComputeGlobalFPR(const double *score, double probabilities_threshold) const
+  {
+    int false_positives = 0, label_negatives = 0;
+
+    // #pragma omp parallel for schedule(static)        // TODO: https://github.com/feedzai/fairgbm/issues/6
+    for (data_size_t i = 0; i < num_data_; ++i)
+    {
+      if (label_[i] == 0)
+      {
+        label_negatives += 1;
+
+        const double z = 1.0f / (1.0f + std::exp(-score[i]));
+        if (z >= probabilities_threshold)
+          false_positives += 1;
+      }
+    }
+
+    return (double)false_positives / (double)label_negatives;
+  }
+
+  /*!
+    * \brief Get hinge-proxy false positive rate w.r.t. a given margin
+    * \param array of scores -> prediction score in this round
+    * \param margin to consider for computing the Hinge approximation of FPR
+    * \group_fpr Output the proxy FPR per group
+    */
+  void ComputeHingeFPR(const double *score, std::unordered_map<constraint_group_t, double> &group_fpr) const
+  {
+    std::unordered_map<constraint_group_t, double> false_positives; // map of group index to the respective hinge-proxy FPs
+    std::unordered_map<constraint_group_t, int> label_negatives;    // map of group index to the respective number of LNs
+
+    // #pragma omp parallel for schedule(static)        // TODO: https://github.com/feedzai/fairgbm/issues/6
+    for (data_size_t i = 0; i < num_data_; ++i)
+    {
+      constraint_group_t group = group_[i];
+
+      // HingeFPR uses only label negatives
+      if (label_[i] == 0)
+      {
+        label_negatives[group] += 1;
+
+        // proxy_margin_ is the line intercept value
+        const double hinge_score = proxy_margin_ + score[i];
+        false_positives[group] += std::max(0.0, hinge_score);
+      }
+    }
+
+    for (auto group_id : group_values_)
+    {
+      double fpr;
+      if (label_negatives[group_id] == 0)
+        fpr = 0;
+      else
+        fpr = false_positives[group_id] / label_negatives[group_id];
+
+      group_fpr[group_id] = fpr;
+    }
+  }
+
+  /**
+   * Compute quadratic-proxy FPR (with a given margin).
+   *
+   * Proxy FPR: (1/2) * (H_i + margin)^2 * I[H_i > -margin and y_i == 0]
+   *
+   * @param score array of scores
+   * @param group_fpr hash-map of group to proxy-FPR
+   */
+  void ComputeQuadraticLossFPR(const double *score, std::unordered_map<constraint_group_t, double> &group_fpr) const
+  {
+    std::unordered_map<constraint_group_t, double> false_positives; // map of group index to the respective proxy FPs
+    std::unordered_map<constraint_group_t, int> label_negatives;    // map of group index to the respective number of LNs
+
+    // #pragma omp parallel for schedule(static)        // TODO: https://github.com/feedzai/fairgbm/issues/6
+    for (data_size_t i = 0; i < num_data_; ++i)
+    {
+      constraint_group_t group = group_[i];
+
+      // FPR uses only label NEGATIVES
+      if (label_[i] == 0 and score[i] > -proxy_margin_)
+      { // Conditions for non-zero proxy-FPR value
+        label_negatives[group] += 1;
+
+        // proxy_margin_ corresponds to the symmetric of the function's zero point; f(-proxy_margin_)=0
+        const double quadratic_score = (1. / 2.) * std::pow(score[i] + proxy_margin_, 2);
+        assert(quadratic_score >= 0.);
+        false_positives[group] += quadratic_score;
+      }
+    }
+
+    for (auto group_id : group_values_)
+    {
+      double fpr;
+      if (label_negatives[group_id] == 0)
+        fpr = 0;
+      else
+        fpr = false_positives[group_id] / label_negatives[group_id];
+
+      group_fpr[group_id] = fpr;
+    }
+  }
+
+  /**
+   * Compute cross-entropy-proxy FPR.
+   * Function:
+   *      l(a) = log(1 + exp( a + log(exp(b) - 1) )),      where b = proxy_margin_ = l(0)
+   *
+   * @param score array of scores
+   * @param group_fpr hash-map of group to proxy-FPR
+   */
+  void ComputeXEntropyLossFPR(const double *score, std::unordered_map<constraint_group_t, double> &group_fpr) const
+  {
+    std::unordered_map<constraint_group_t, double> false_positives; // map of group index to the respective proxy FPs
+    std::unordered_map<constraint_group_t, int> label_negatives;    // map of group index to the respective number of LNs
+    double xent_horizontal_shift = log(exp(proxy_margin_) - 1);
+
+    // #pragma omp parallel for schedule(static)        // TODO: https://github.com/feedzai/fairgbm/issues/6
+    for (data_size_t i = 0; i < num_data_; ++i)
+    {
+      constraint_group_t group = group_[i];
+
+      // FPR uses only label NEGATIVES
+      if (label_[i] == 0)
+      {
+        label_negatives[group] += 1;
+
+        // proxy_margin_ corresponds to the vertical margin at x=0; l(0) = proxy_margin_
+        const double xent_score = log(1 + exp(score[i] + xent_horizontal_shift));
+        assert(xent_score >= 0.);
+        false_positives[group] += xent_score;
+      }
+    }
+
+    for (auto group_id : group_values_)
+    {
+      double fpr;
+      if (label_negatives[group_id] == 0)
+        fpr = 0;
+      else
+        fpr = false_positives[group_id] / label_negatives[group_id];
+
+      group_fpr[group_id] = fpr;
+    }
+  }
+
+  /*!
+    * \brief Computes group-wise false negative rate w.r.t. a given probabilities_threshold.
+    * \param score prediction score in this round (log-odds)
+    * \param probabilities_threshold to consider for computing the FNR
+    * \group_fnr Output the FNR per group
+    */
+  void ComputeFNR(const double *score, double probabilities_threshold, std::unordered_map<constraint_group_t, double> &group_fnr) const
+  {
+    std::unordered_map<constraint_group_t, int> false_negatives;
+    std::unordered_map<constraint_group_t, int> label_positives;
+
+    // #pragma omp parallel for schedule(static)        // TODO: https://github.com/feedzai/fairgbm/issues/6
+    for (data_size_t i = 0; i < num_data_; ++i)
+    {
+      constraint_group_t group = group_[i];
+
+      if (label_[i] == 1)
+      {
+        label_positives[group] += 1;
+
+        const double z = 1.0f / (1.0f + std::exp(-score[i]));
+        if (z < probabilities_threshold)
+          false_negatives[group] += 1;
+      }
+    }
+
+    for (auto group_id : group_values_)
+    {
+      double fnr;
+      if (label_positives[group_id] == 0)
+        fnr = 0;
+      else
+        fnr = ((double)false_negatives[group_id]) / ((double)label_positives[group_id]);
+      group_fnr[group_id] = fnr;
+    }
+  };
+
+  /**
+   * Computes global False-Negative Rate according to the given threshold.
+   * @param score
+   * @param probabilities_threshold
+   * @return the global FNR
+   */
+  double ComputeGlobalFNR(const double *score, double probabilities_threshold) const
+  {
+    int false_negatives = 0, label_positives = 0;
+
+    // #pragma omp parallel for schedule(static)        // TODO: https://github.com/feedzai/fairgbm/issues/6
+    for (data_size_t i = 0; i < num_data_; ++i)
+    {
+      if (label_[i] == 1)
+      {
+        label_positives += 1;
+
+        const double z = 1.0f / (1.0f + std::exp(-score[i]));
+        if (z < probabilities_threshold)
+          false_negatives += 1;
+      }
+    }
+
+    return (double)false_negatives / (double)label_positives;
+  }
+
+  /*!
+    * \brief Get hinge-proxy FNR w.r.t. a given margin.
+    * \param score prediction score in this round
+    * \param margin to consider for computing the FNR
+    * \group_fnr Output the proxy FNR per group
+    */
+  void ComputeHingeLossFNR(const double *score, std::unordered_map<constraint_group_t, double> &group_fnr) const
+  {
+    std::unordered_map<constraint_group_t, double> false_negatives; // map of group index to the respective hinge-proxy FNs
+    std::unordered_map<constraint_group_t, int> label_positives;
+
+    // #pragma omp parallel for schedule(static)        // TODO: https://github.com/feedzai/fairgbm/issues/6
+    for (data_size_t i = 0; i < num_data_; ++i)
+    {
+      constraint_group_t group = group_[i];
+
+      if (label_[i] == 1)
+      {
+        label_positives[group] += 1;
+
+        const double hinge_score = proxy_margin_ - score[i];
+        false_negatives[group] += std::max(0.0, hinge_score);
+      }
+    }
+
+    for (auto group_id : group_values_)
+    {
+      double fnr;
+      if (label_positives[group_id] == 0)
+        fnr = 0;
+      else
+        fnr = false_negatives[group_id] / label_positives[group_id];
+      group_fnr[group_id] = fnr;
+    }
+  };
+
+  /**
+   * Compute quadratic-proxy FNR (with a given margin).
+   *
+   * Proxy FNR: (1/2) * (H_i - margin)^2 * I[H_i < margin and y_i == 1]
+   *
+   * @param score array of scores
+   * @param group_fnr hash-map of group to proxy-FNR
+   */
+  void ComputeQuadraticLossFNR(const double *score, std::unordered_map<constraint_group_t, double> &group_fnr) const
+  {
+    std::unordered_map<constraint_group_t, double> false_negatives; // map of group index to the respective proxy FPs
+    std::unordered_map<constraint_group_t, int> label_positives;    // map of group index to the respective number of LNs
+
+    // #pragma omp parallel for schedule(static)        // TODO: https://github.com/feedzai/fairgbm/issues/6
+    for (data_size_t i = 0; i < num_data_; ++i)
+    {
+      constraint_group_t group = group_[i];
+
+      // FNR uses only label POSITIVES
+      if (label_[i] == 1 and score[i] < proxy_margin_)
+      { // Conditions for non-zero proxy-FNR value
+        label_positives[group] += 1;
+
+        // proxy_margin_ corresponds to the function's zero point; f(proxy_margin_)=0
+        const double quadratic_score = (1. / 2.) * std::pow(score[i] - proxy_margin_, 2);
+        assert(quadratic_score >= 0.);
+        false_negatives[group] += quadratic_score;
+      }
+    }
+
+    for (auto group_id : group_values_)
+    {
+      double fnr;
+      if (label_positives[group_id] == 0)
+        fnr = 0;
+      else
+        fnr = false_negatives[group_id] / label_positives[group_id];
+
+      group_fnr[group_id] = fnr;
+    }
+  }
+
+  /**
+   * Compute cross-entropy-proxy FNR.
+   * Function:
+   *      l(a) = log(1 + exp( -a + log(exp(b) - 1) )),        where b = proxy_margin_ = l(0)
+   *
+   * @param score array of scores
+   * @param group_fnr hash-map of group to proxy-FNR
+   */
+  void ComputeXEntropyLossFNR(const double *score, std::unordered_map<constraint_group_t, double> &group_fnr) const
+  {
+    std::unordered_map<constraint_group_t, double> false_negatives; // map of group index to the respective proxy FPs
+    std::unordered_map<constraint_group_t, int> label_positives;    // map of group index to the respective number of LNs
+    double xent_horizontal_shift = log(exp(proxy_margin_) - 1);
+
+    // #pragma omp parallel for schedule(static)        // TODO: https://github.com/feedzai/fairgbm/issues/6
+    for (data_size_t i = 0; i < num_data_; ++i)
+    {
+      constraint_group_t group = group_[i];
+
+      // FNR uses only label POSITIVES
+      if (label_[i] == 1)
+      {
+        label_positives[group] += 1;
+
+        // proxy_margin_ corresponds to the vertical margin at x=0; l(0) = proxy_margin_
+        const double xent_score = log(1 + exp(xent_horizontal_shift - score[i]));
+        assert(xent_score >= 0.);
+        false_negatives[group] += xent_score;
+      }
+    }
+
+    for (auto group_id : group_values_)
+    {
+      double fnr;
+      if (label_positives[group_id] == 0)
+        fnr = 0;
+      else
+        fnr = false_negatives[group_id] / label_positives[group_id];
+
+      group_fnr[group_id] = fnr;
+    }
+  }
+
+  /*!
+    * \brief Get label positive and label negative counts.
+    */
+  void ComputeLabelCounts()
+  {
+    // #pragma omp parallel for schedule(static)        // TODO: https://github.com/feedzai/fairgbm/issues/6
+    for (data_size_t i = 0; i < num_data_; ++i)
+    {
+      if (label_[i] == 1)
+      {
+        this->group_label_positives_[group_[i]] += 1;
+        this->total_label_positives_ += 1;
+      }
+
+      else if (label_[i] == 0)
+      {
+        this->group_label_negatives_[group_[i]] += 1;
+        this->total_label_negatives_ += 1;
+      }
+
+      else
+        throw std::runtime_error("invalid label type");
+    }
+  };
+
+protected:
+  static std::string ValidateProxyFunctionName(std::string func_name, bool allow_empty = false)
+  {
+    std::transform(func_name.begin(), func_name.end(), func_name.begin(), ::tolower);
+    if (func_name == "bce" or func_name == "xentropy" or func_name == "entropy")
+      func_name = "cross_entropy";
+
+    if (not(
+            func_name == "hinge" or
+            func_name == "quadratic" or
+            func_name == "cross_entropy" or
+            (allow_empty and func_name.empty())))
+    {
+      throw std::invalid_argument("Got invalid proxy function: '" + func_name + "'");
+    }
+
+    return func_name;
+  }
+
+  /*! \brief Number of data points */
+  data_size_t num_data_;
+  /*! \brief Pointer for label */
+  const label_t *label_;
+  /*! \brief Weights for data */
+  const label_t *weights_;
+
+  /*! \brief Pointer for group */
+  const constraint_group_t *group_;
+  /*! \brief Unique group values */
+  std::vector<constraint_group_t> group_values_;
+
+  /*! \brief Label positives per group */
+  std::unordered_map<constraint_group_t, int> group_label_positives_;
+  /*! \brief Label Negatives per group */
+  std::unordered_map<constraint_group_t, int> group_label_negatives_;
+
+  /*! \brief Total number of Label Positives */
+  int total_label_positives_ = 0;
+
+  /*! \brief Total number of Label Negatives */
+  int total_label_negatives_ = 0;
+
+  /*! \brief Type of constraint */
+  std::string constraint_type;
+
+  /*! \brief Function to use as a proxy for the step-wise function in CONSTRAINTS. */
+  std::string constraint_stepwise_proxy;
+
+  /*! \brief Function to use as a proxy for the step-wise function in the OBJECTIVE. */
+  std::string objective_stepwise_proxy;
+
+  /*! \brief Score threshold to compute confusion matrix (over predicted probabilities) */
+  score_t score_threshold_ = 0.5;
+
+  /*! \brief FPR threshold used in FPR constraints (small margin for constraint fulfillment) */
+  score_t fpr_threshold_ = 0.0;
+
+  /*! \brief FNR threshold used in FNR constraints (small margin for constraint fulfillment) */
+  score_t fnr_threshold_ = 0.0;
+
+  /*! \brief Margin threshold used in the Hinge approximation */
+  score_t proxy_margin_ = 1.0;
+
+  /*! \brief Type of global constraint */
+  std::string global_constraint_type;
+
+  /*! \brief Target value for the global FPR constraint */
+  score_t global_target_fpr_;
+
+  /*! \brief Target value for the global FNR constraint */
+  score_t global_target_fnr_;
+
+  /*! \brief Score threshold used for the global constraints */
+  score_t global_score_threshold_ = 0.5;
+
+  /*! \brief Where to save debug files to */
+  std::string debugging_output_dir_;
+};
+} // namespace LightGBM
 
 #endif   // LightGBM_OBJECTIVE_FUNCTION_H_
diff --git a/include/LightGBM/utils/common.h b/include/LightGBM/utils/common.h
index 43573573d..c9609f038 100644
--- a/include/LightGBM/utils/common.h
+++ b/include/LightGBM/utils/common.h
@@ -455,8 +455,8 @@ inline static std::vector<T> StringToArrayFast(const std::string& str, int n) {
   return ret;
 }
 
-template<typename T>
-inline static std::string Join(const std::vector<T>& strs, const char* delimiter, const bool force_C_locale = false) {
+template<typename T, typename Allocator>
+inline static std::string Join(const std::vector<T, Allocator>& strs, const char* delimiter, const bool force_C_locale = false) {
   if (strs.empty()) {
     return std::string("");
   }
@@ -1205,7 +1205,7 @@ struct __TToStringHelper<T, true, true> {
 * Converts an array to a string with with values separated by the space character.
 * This method replaces Common's ``ArrayToString`` and ``ArrayToStringFast`` functionality
 * and is locale-independent.
-* 
+*
 * \note If ``high_precision_output`` is set to true,
 *       floating point values are output with more digits of precision.
 */
diff --git a/include/LightGBM/utils/constrained.hpp b/include/LightGBM/utils/constrained.hpp
new file mode 100644
index 000000000..a8ce73a32
--- /dev/null
+++ b/include/LightGBM/utils/constrained.hpp
@@ -0,0 +1,78 @@
+/**
+ * The copyright of this file belongs to Feedzai. The file cannot be
+ * reproduced in whole or in part, stored in a retrieval system,
+ * transmitted in any form, or by any means electronic, mechanical,
+ * photocopying, or otherwise, without the prior permission of the owner.
+ *
+ * (c) 2021 Feedzai, Strictly Confidential
+ */
+
+#ifndef LIGHTGBM_CONSTRAINED_HPP
+#define LIGHTGBM_CONSTRAINED_HPP
+
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <string>
+#include <chrono>
+#include <ctime>
+#include <sstream>
+#include <fstream>
+#include <sys/stat.h>
+
+namespace LightGBM {
+namespace Constrained {
+
+/**
+ * Standard sigmoid mathematical function.
+ * @param x the input to the function.
+ * @return the sigmoid of the input.
+ */
+inline double sigmoid(double x) {
+  return 1. / (1. + std::exp(-x));
+}
+
+/**
+ * Finds the (key, value) pair with highest value.
+ * @tparam Key The type of the Map Key.
+ * @tparam Value The type of the Map Value.
+ * @param x Reference to the map to search over.
+ * @return The <K, V> pair with highest value V.
+ */
+template <class Key, class Value>
+std::pair<Key, Value> findMaxValuePair(std::unordered_map<Key, Value> const &x)
+{
+  return *std::max_element(
+          x.begin(), x.end(),
+          [](const std::pair<Key, Value> &p1, const std::pair<Key, Value> &p2) {
+              return p1.second < p2.second;
+          }
+  );
+}
+
+/**
+ * Writes the given values to the end of the given file.
+ * @tparam T The type of values in the input vector.
+ * @tparam Allocator The type of allocator in the input vector.
+ * @param dir The directory of the file to write on.
+ * @param filename The name of the file to write on.
+ * @param values A vector of the values to append to the file.
+ */
+template<typename T, typename Allocator = std::allocator<T>>
+void write_values(const std::string& dir, const std::string& filename,
+                  std::vector<T, Allocator> values) {
+  struct stat buf;
+
+  std::string filename_path = dir + "/" + filename;
+  bool file_exists = (stat(filename_path.c_str(), &buf) != -1);
+
+  std::ofstream outfile;
+  outfile.open(filename_path, std::ios::out | (file_exists ? std::ios::app : std::ios::trunc));
+  outfile << LightGBM::Common::Join(values, ",") << std::endl;
+
+  outfile.close();
+}
+}
+}
+
+#endif //LIGHTGBM_CONSTRAINED_HPP
diff --git a/src/application/application.cpp b/src/application/application.cpp
index e82cfcada..c0276d8e3 100644
--- a/src/application/application.cpp
+++ b/src/application/application.cpp
@@ -49,6 +49,11 @@ Application::~Application() {
   }
 }
 
+/**
+ * Loads the application config, either as command-line arguments or from a config file.
+ * @param argc Number of command-line arguments.
+ * @param argv Array of strings containing the command-line arguments. A common element is "config=<path>".
+ */
 void Application::LoadParameters(int argc, char** argv) {
   std::unordered_map<std::string, std::string> params;
   for (int i = 1; i < argc; ++i) {
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index d393d46d5..33959475c 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -10,6 +10,7 @@
 #include <LightGBM/prediction_early_stop.h>
 #include <LightGBM/utils/common.h>
 #include <LightGBM/utils/openmp_wrapper.h>
+#include <LightGBM/utils/constrained.hpp>
 
 #include <chrono>
 #include <ctime>
@@ -41,6 +42,7 @@ GBDT::GBDT()
   average_output_ = false;
   tree_learner_ = nullptr;
   linear_tree_ = false;
+  debugging_output_dir_ = ".";
 }
 
 GBDT::~GBDT() {
@@ -83,14 +85,34 @@ void GBDT::Init(const Config* config, const Dataset* train_data, const Objective
   if (objective_function_ != nullptr) {
     num_tree_per_iteration_ = objective_function_->NumModelPerIteration();
     if (objective_function_->IsRenewTreeOutput() && !config->monotone_constraints.empty()) {
-      Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.", objective_function_->GetName());
+      Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.",
+                 objective_function_->GetName());
     }
   }
 
   is_constant_hessian_ = GetIsConstHessian(objective_function);
 
-  tree_learner_ = std::unique_ptr<TreeLearner>(TreeLearner::CreateTreeLearner(config_->tree_learner, config_->device_type,
-                                                                              config_.get()));
+  // load output dir
+  debugging_output_dir_ = config->debugging_output_dir;
+
+  // constraint configurations
+  is_constrained_ = objective_function->IsConstrained();
+  lagrangian_learning_rate_ = config_->multiplier_learning_rate;
+
+  int num_constraints = objective_function->NumConstraints();
+
+  // If no Lagrangian multipliers are specified, start everything at zero
+  if ((config->init_lagrangian_multipliers).empty()) {
+    std::vector<double> default_lag_multipliers(num_constraints, 0);
+    lagrangian_multipliers_.push_back(default_lag_multipliers);
+  } else {
+    CHECK_EQ(num_constraints, (int) config->init_lagrangian_multipliers.size());
+    lagrangian_multipliers_.push_back(config->init_lagrangian_multipliers);
+  }
+
+  tree_learner_ = std::unique_ptr<TreeLearner>(
+    TreeLearner::CreateTreeLearner(config_->tree_learner, config_->device_type, config_.get())
+  );
 
   // init tree learner
   tree_learner_->Init(train_data_, is_constant_hessian_);
@@ -167,15 +189,51 @@ void GBDT::AddValidDataset(const Dataset* valid_data,
   }
 }
 
+/**
+ * @brief Computes gradients and hessians.
+ */
 void GBDT::Boosting() {
   Common::FunctionTimer fun_timer("GBDT::Boosting", global_timer);
   if (objective_function_ == nullptr) {
     Log::Fatal("No object function provided");
   }
-  // objective function will calculate gradients and hessians
+  // Objective function will calculate gradients and hessians
   int64_t num_score = 0;
-  objective_function_->
-    GetGradients(GetTrainingScore(&num_score), gradients_.data(), hessians_.data());
+
+  // Get predictions for all instances - in log-odds!
+  const double *score = GetTrainingScore(&num_score);
+
+  // GetGradients computes only gradients/hessians from the predictive loss!
+  // and will change the gradients and hessians variables in place.
+  objective_function_->GetGradients(score, gradients_.data(), hessians_.data());
+  // ^ a.k.a. GetPredictiveLossGradientsWRTModelOutput
+
+  if (is_constrained_) {
+    auto constrained_objective_function = dynamic_cast<const ConstrainedObjectiveFunction *>(objective_function_);
+
+    // Compute the contribution of the constraints for the Lagrangian!
+    // (as we're in the descent step, this may use the proxy constraints)
+    constrained_objective_function->GetConstraintGradientsWRTModelOutput(
+            lagrangian_multipliers_.back().data(),
+            score, gradients_.data(), hessians_.data());
+    // ^ will change gradients and hessians in place
+    //
+    // NOTE: lagrangian_multipliers is a vector of vectors - each element represents the multipliers at a given iteration;
+    // TODO: https://github.com/feedzai/fairgbm/issues/8
+
+#ifdef DEBUG
+    // Dump lagrangian multipliers
+    Constrained::write_values<double>(debugging_output_dir_, "lagrangian_multipliers.dat", lagrangian_multipliers_.back());
+
+    // Dump the gradients of the Lagrangian (grads of loss + grads of constraints)
+    Constrained::write_values<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>(
+        debugging_output_dir_, "gradients.lagrangian.dat", gradients_);
+
+    // Dump hessians, we don't currently use them though :P
+    Constrained::write_values<score_t, Common::AlignmentAllocator<score_t, kAlignedSize>>(
+        debugging_output_dir_, "hessians.lagrangian.dat", hessians_);
+#endif
+  }
 }
 
 data_size_t GBDT::BaggingHelper(data_size_t start, data_size_t cnt, data_size_t* buffer) {
@@ -263,13 +321,20 @@ void GBDT::Bagging(int iter) {
 
 void GBDT::Train(int snapshot_freq, const std::string& model_output_path) {
   Common::FunctionTimer fun_timer("GBDT::Train", global_timer);
-  bool is_finished = false;
+  bool is_finished = false, is_finished_lagrangian = false;
   auto start_time = std::chrono::steady_clock::now();
-  for (int iter = 0; iter < config_->num_iterations && !is_finished; ++iter) {
+
+  for (int iter = 0; iter < config_->num_iterations and (!is_finished or !is_finished_lagrangian); ++iter) {
+
+    // Do one training iteration
+    // - execute a descent step on the loss function;
+    // - (optionally) execute an ascent step w.r.t. the Lagrangian multipliers (only if using constrained optim.)
     is_finished = TrainOneIter(nullptr, nullptr);
+
     if (!is_finished) {
       is_finished = EvalAndCheckEarlyStopping();
     }
+
     auto end_time = std::chrono::steady_clock::now();
     // output used time per iteration
     Log::Info("%f seconds elapsed, finished iteration %d", std::chrono::duration<double,
@@ -360,34 +425,57 @@ double GBDT::BoostFromAverage(int class_id, bool update_scorer) {
     } else if (std::string(objective_function_->GetName()) == std::string("regression_l1")
                || std::string(objective_function_->GetName()) == std::string("quantile")
                || std::string(objective_function_->GetName()) == std::string("mape")) {
-      Log::Warning("Disabling boost_from_average in %s may cause the slow convergence", objective_function_->GetName());
+      Log::Warning("Disabling boost_from_average in %s may cause the slow convergence",
+                   objective_function_->GetName());
     }
   }
   return 0.0f;
 }
 
+/**
+ * @brief Descent step!
+ *
+ * @param gradients
+ * @param hessians
+ * @return whether training has ended
+ */
 bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
   Common::FunctionTimer fun_timer("GBDT::TrainOneIter", global_timer);
+
+  // Step 1. Initialize vector to store the scores for each tree in the iteration
+  // (boosting always uses 1 tree per iteration)
   std::vector<double> init_scores(num_tree_per_iteration_, 0.0);
+
+  // Step 2. Add first weak learner (predict the average value, aka, BoostFromAverage)
   // boosting first
   if (gradients == nullptr || hessians == nullptr) {
     for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
       init_scores[cur_tree_id] = BoostFromAverage(cur_tree_id, true);
     }
+
+    // Step 2.1. Compute gradients and hessians
     Boosting();
     gradients = gradients_.data();
     hessians = hessians_.data();
   }
+
+  // Step 3. Run bagging
   // bagging logic
-  Bagging(iter_);
+  Bagging(iter_);   // e.g., run GOSS if LightGBM (or Bagging only if RF)
 
+  // Step 4. Fit a weak learner (if RF, will run one split for multiple trees)
   bool should_continue = false;
   for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
     const size_t offset = static_cast<size_t>(cur_tree_id) * num_data_;
-    std::unique_ptr<Tree> new_tree(new Tree(2, false, false));
+
+    // Step 4.1. Create a new tree
+    std::unique_ptr<Tree> new_tree(new Tree(2, false, false));  // this is a placeholder pointer
+
+    // class_need_train_ will keep tabs of which trees in an RF haven't finished training
     if (class_need_train_[cur_tree_id] && train_data_->num_features() > 0) {
       auto grad = gradients + offset;
       auto hess = hessians + offset;
+
       // need to copy gradients for bagging subset.
       if (is_use_subset_ && bag_data_cnt_ < num_data_) {
         for (int i = 0; i < bag_data_cnt_; ++i) {
@@ -397,24 +485,41 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
         grad = gradients_.data() + offset;
         hess = hessians_.data() + offset;
       }
+
+      // Step 4.2. Train the tree (use grads and hessians)
       bool is_first_tree = models_.size() < static_cast<size_t>(num_tree_per_iteration_);
       new_tree.reset(tree_learner_->Train(grad, hess, is_first_tree));
     }
 
+    // We found at least a split!
     if (new_tree->num_leaves() > 1) {
       should_continue = true;
+      // Get current scores of each instance in dataset, given by this tree (aka offset)
       auto score_ptr = train_score_updater_->score() + offset;
-      auto residual_getter = [score_ptr](const label_t* label, int i) {return static_cast<double>(label[i]) - score_ptr[i]; };
+      // Get residual of each instance in dataset (label - predict)
+      auto residual_getter = [score_ptr](const label_t* label, int i) {
+        return static_cast<double>(label[i]) - score_ptr[i];
+      };
+      // Recompute tree leaf values given the specific objective function
       tree_learner_->RenewTreeOutput(new_tree.get(), objective_function_, residual_getter,
                                      num_data_, bag_data_indices_.data(), bag_data_cnt_);
+
       // shrinkage by learning rate
       new_tree->Shrinkage(shrinkage_rate_);
+
       // update score
       UpdateScore(new_tree.get(), cur_tree_id);
+
+      // Add bias if any was computed (from BoostFromAverage)
+      // (should only add for the first boosting iteration)
       if (std::fabs(init_scores[cur_tree_id]) > kEpsilon) {
         new_tree->AddBias(init_scores[cur_tree_id]);
       }
-    } else {
+    }
+
+    // The tree wasn't grown <=> we didn't find a split  w/ positive IG
+    // This means that the prediction will be simply to boost from average.
+    else {
       // only add default score one-time
       if (models_.size() < static_cast<size_t>(num_tree_per_iteration_)) {
         double output = 0.0;
@@ -425,6 +530,7 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
         } else {
           output = init_scores[cur_tree_id];
         }
+        // Add a constraint-value tree
         new_tree->AsConstantTree(output);
         // updates scores
         train_score_updater_->AddScore(output, cur_tree_id);
@@ -447,10 +553,60 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
     return true;
   }
 
+  // Only if running constrained optimization!
+  // Ascent step: update value of Lagrangian multipliers
+  if (is_constrained_) {
+    TrainLagrangianOneIter(nullptr, nullptr);
+  }
+
   ++iter_;
   return false;
 }
 
+// TODO: https://github.com/feedzai/fairgbm/issues/7
+//  - implement normalization / bound on multipliers;
+//  - implement early stopping criteria (convergence fulfilled);
+/*!
+* \brief Gradient ascent step w.r.t. Lagrangian multipliers (used only for constrained optimization)
+* \param gradients nullptr for using default objective, otherwise use self-defined boosting
+* \param hessians nullptr for using default objective, otherwise use self-defined boosting
+* \return True if cannot train anymore (or training has ended due to early stopping)
+*/
+bool GBDT::TrainLagrangianOneIter(const score_t* /* gradients */, const score_t* /* hessians */) {
+  auto constrained_objective_function = dynamic_cast<const ConstrainedObjectiveFunction *>(objective_function_);
+
+  int64_t num_score = 0;
+  // Get Lagrangian gradients w.r.t. multipliers
+  auto lag_updates = constrained_objective_function->GetLagrangianGradientsWRTMultipliers(
+      GetTrainingScore(&num_score));
+
+  // Get Lagrangian multipliers of the latest iteration
+  auto current_lag_multipliers = lagrangian_multipliers_.back();
+
+  // Initialize updated lagrangian multipliers w/ previous value
+  std::vector<double> updated_lag_multipliers(current_lag_multipliers);
+
+  // Gradient ascent in Lagrangian multipliers (or constraint space)
+  for (uint i = 0; i < lag_updates.size(); i++) {
+    updated_lag_multipliers[i] += lagrangian_learning_rate_ * lag_updates[i];
+
+    // Ensuring multipliers >= 0 -> using *INEQUALITY* constraints! c(theta) <= 0
+    updated_lag_multipliers[i] = std::max(0.0, updated_lag_multipliers[i]);
+    // NOTE
+    //  - This aims to guarantee that the problem remains bounded, which is true,
+    //    provided the Lagrangian multiplier remains >= 0;
+    //  - If multipliers are allowed to go negative -> using *EQUALITY* constraints!
+  }
+  lagrangian_multipliers_.push_back(updated_lag_multipliers);
+
+#ifdef DEBUG
+  // Log constraints violation to file
+  Constrained::write_values<double>(debugging_output_dir_, "functions_evals.dat", lag_updates);
+#endif
+
+  return false;
+}
+
 void GBDT::RollbackOneIter() {
   if (iter_ <= 0) { return; }
   // reset score
@@ -466,6 +622,11 @@ void GBDT::RollbackOneIter() {
   for (int cur_tree_id = 0; cur_tree_id < num_tree_per_iteration_; ++cur_tree_id) {
     models_.pop_back();
   }
+
+  // remove lagrangian multipliers if constrained objective
+  if (is_constrained_)
+    lagrangian_multipliers_.pop_back();
+
   --iter_;
 }
 
@@ -488,6 +649,7 @@ bool GBDT::EvalAndCheckEarlyStopping() {
   return is_met_early_stopping;
 }
 
+// This method updates the current (total) score associated with each instance
 void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
   Common::FunctionTimer fun_timer("GBDT::UpdateScore", global_timer);
   // update training score
@@ -496,7 +658,11 @@ void GBDT::UpdateScore(const Tree* tree, const int cur_tree_id) {
 
     // we need to predict out-of-bag scores of data for boosting
     if (num_data_ - bag_data_cnt_ > 0) {
-      train_score_updater_->AddScore(tree, bag_data_indices_.data() + bag_data_cnt_, num_data_ - bag_data_cnt_, cur_tree_id);
+      train_score_updater_->AddScore(
+        tree,
+        bag_data_indices_.data() + bag_data_cnt_,
+        num_data_ - bag_data_cnt_,
+        cur_tree_id);
     }
 
   } else {
@@ -611,7 +777,8 @@ void GBDT::PredictContrib(const double* features, double* output) const {
   for (int i = start_iteration_for_pred_; i < end_iteration_for_pred; ++i) {
     // predict all the trees for one iteration
     for (int k = 0; k < num_tree_per_iteration_; ++k) {
-      models_[i * num_tree_per_iteration_ + k]->PredictContrib(features, num_features, output + k*(num_features + 1));
+      models_[i * num_tree_per_iteration_ + k]->PredictContrib(
+        features, num_features, output + k*(num_features + 1));
     }
   }
 }
@@ -691,7 +858,9 @@ void GBDT::ResetTrainingData(const Dataset* train_data, const ObjectiveFunction*
   if (objective_function_ != nullptr) {
     CHECK_EQ(num_tree_per_iteration_, objective_function_->NumModelPerIteration());
     if (objective_function_->IsRenewTreeOutput() && !config_->monotone_constraints.empty()) {
-      Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.", objective_function_->GetName());
+      Log::Fatal(
+        "Cannot use ``monotone_constraints`` in %s objective, please disable it.",
+        objective_function_->GetName());
     }
   }
   is_constant_hessian_ = GetIsConstHessian(objective_function);
@@ -746,8 +915,11 @@ void GBDT::ResetConfig(const Config* config) {
   if (!config->feature_contri.empty()) {
     CHECK_EQ(static_cast<size_t>(train_data_->num_total_features()), config->feature_contri.size());
   }
-  if (objective_function_ != nullptr && objective_function_->IsRenewTreeOutput() && !config->monotone_constraints.empty()) {
-    Log::Fatal("Cannot use ``monotone_constraints`` in %s objective, please disable it.", objective_function_->GetName());
+  if (objective_function_ != nullptr && objective_function_->IsRenewTreeOutput() &&
+      !config->monotone_constraints.empty()) {
+    Log::Fatal(
+      "Cannot use ``monotone_constraints`` in %s objective, please disable it.",
+      objective_function_->GetName());
   }
   early_stopping_round_ = new_config->early_stopping_round;
   shrinkage_rate_ = new_config->learning_rate;
@@ -785,8 +957,11 @@ void GBDT::ResetBaggingConfig(const Config* config, bool is_change_dataset) {
   if ((config->bagging_fraction < 1.0 || balance_bagging_cond) && config->bagging_freq > 0) {
     need_re_bagging_ = false;
     if (!is_change_dataset &&
-      config_.get() != nullptr && config_->bagging_fraction == config->bagging_fraction && config_->bagging_freq == config->bagging_freq
-      && config_->pos_bagging_fraction == config->pos_bagging_fraction && config_->neg_bagging_fraction == config->neg_bagging_fraction) {
+        config_.get() != nullptr &&
+        config_->bagging_fraction == config->bagging_fraction &&
+        config_->bagging_freq == config->bagging_freq &&
+        config_->pos_bagging_fraction == config->pos_bagging_fraction &&
+        config_->neg_bagging_fraction == config->neg_bagging_fraction) {
       return;
     }
     if (balance_bagging_cond) {
diff --git a/src/boosting/gbdt.h b/src/boosting/gbdt.h
index a99b5fb9a..34acfdf4f 100644
--- a/src/boosting/gbdt.h
+++ b/src/boosting/gbdt.h
@@ -146,6 +146,14 @@ class GBDT : public GBDTBase {
   */
   bool TrainOneIter(const score_t* gradients, const score_t* hessians) override;
 
+  /*!
+  * \brief Training logic for the constrained optimization step.
+  * \param gradients nullptr for using default objective, otherwise use self-defined boosting
+  * \param hessians nullptr for using default objective, otherwise use self-defined boosting
+  * \return True if cannot train any more
+  */
+  bool TrainLagrangianOneIter(const score_t* gradients, const score_t* hessians) override;
+
   /*!
   * \brief Rollback one iteration
   */
@@ -534,6 +542,18 @@ class GBDT : public GBDTBase {
   ParallelPartitionRunner<data_size_t, false> bagging_runner_;
   Json forced_splits_json_;
   bool linear_tree_;
+
+  /*! \brief Whether we're running constrained optimization */
+  bool is_constrained_;
+
+  /*! \brief Shrinkage rate for the Ascent step */
+  double lagrangian_learning_rate_;
+
+  /*! \brief Lagrangian multiplier(s) per iteration */
+  std::vector<std::vector<double>> lagrangian_multipliers_;   // TODO: https://github.com/feedzai/fairgbm/issues/8
+
+  /*! \brief Output directory to store debug files (e.g., gradients/hessians) */
+  std::string debugging_output_dir_;
 };
 
 }  // namespace LightGBM
diff --git a/src/io/config.cpp b/src/io/config.cpp
index 9c91f9a24..d159358cf 100644
--- a/src/io/config.cpp
+++ b/src/io/config.cpp
@@ -226,10 +226,10 @@ void Config::Set(const std::unordered_map<std::string, std::string>& params) {
   std::sort(eval_at.begin(), eval_at.end());
 
   std::vector<std::string> new_valid;
-  for (size_t i = 0; i < valid.size(); ++i) {
-    if (valid[i] != data) {
+  for (const auto & i : valid) {
+    if (i != data) {
       // Only push the non-training data
-      new_valid.push_back(valid[i]);
+      new_valid.push_back(i);
     } else {
       is_provide_training_metric = true;
     }
@@ -253,6 +253,10 @@ void Config::Set(const std::unordered_map<std::string, std::string>& params) {
 
   // check for conflicts
   CheckParamConflict();
+
+#ifdef DEBUG
+  Log::Debug("Loading configs from Map; constraint_group_column=%s\n", this->constraint_group_column.c_str());
+#endif
 }
 
 bool CheckMultiClassObjective(const std::string& objective) {
diff --git a/src/io/config_auto.cpp b/src/io/config_auto.cpp
index 7eb368a06..82e2c46b1 100644
--- a/src/io/config_auto.cpp
+++ b/src/io/config_auto.cpp
@@ -166,6 +166,22 @@ const std::unordered_map<std::string, std::string>& Config::alias_table() {
   {"mlist", "machine_list_filename"},
   {"workers", "machines"},
   {"nodes", "machines"},
+
+  // FairGBM parameters
+  {"lagrangian_learning_rate", "multiplier_learning_rate"},
+  {"lagrangian_multiplier_learning_rate", "multiplier_learning_rate"},
+  {"lagrangian_multipliers", "init_lagrangian_multipliers"},
+  {"init_multipliers", "init_lagrangian_multipliers"},
+  {"output_dir", "debugging_output_dir"},
+  {"constraint_proxy_function", "constraint_stepwise_proxy"},
+  {"constraint_stepwise_proxy_function", "constraint_stepwise_proxy"},
+  {"objective_proxy_function", "objective_stepwise_proxy"},
+  {"objective_stepwise_proxy_function", "objective_stepwise_proxy"},
+  {"proxy_margin", "stepwise_proxy_margin"},
+  {"global_fpr", "global_target_fpr"},
+  {"target_global_fpr", "global_target_fpr"},
+  {"global_fnr", "global_target_fnr"},
+  {"target_global_fnr", "global_target_fnr"},
   });
   return aliases;
 }
@@ -302,6 +318,23 @@ const std::unordered_set<std::string>& Config::parameter_set() {
   "gpu_device_id",
   "gpu_use_dp",
   "num_gpu",
+
+  // FairGBM parameters
+  "debugging_output_dir",
+  "constraint_type",
+  "constraint_stepwise_proxy",
+  "objective_stepwise_proxy",
+  "stepwise_proxy_margin",
+  "constraint_group_column",
+  "constraint_fpr_threshold",
+  "constraint_fnr_threshold",
+  "score_threshold",
+  "init_lagrangian_multipliers",
+  "multiplier_learning_rate",
+  "global_constraint_type",
+  "global_target_fpr",
+  "global_target_fnr",
+  "global_score_threshold"
   });
   return params;
 }
@@ -627,6 +660,49 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
 
   GetInt(params, "num_gpu", &num_gpu);
   CHECK_GT(num_gpu, 0);
+
+  // FairGBM parameters
+  Config::GetString(params, "debugging_output_dir", &debugging_output_dir);
+
+  Config::GetString(params, "constraint_type", &constraint_type);
+
+  Config::GetString(params, "constraint_stepwise_proxy", &constraint_stepwise_proxy);
+
+  Config::GetString(params, "objective_stepwise_proxy", &objective_stepwise_proxy);
+
+  Config::GetDouble(params, "stepwise_proxy_margin", &stepwise_proxy_margin);
+
+  Config::GetString(params, "constraint_group_column", &constraint_group_column);
+
+  Config::GetDouble(params, "constraint_fpr_threshold", &constraint_fpr_threshold);
+  CHECK_GE(constraint_fpr_threshold, 0); CHECK_LT(constraint_fpr_threshold, 1);
+
+  Config::GetDouble(params, "constraint_fnr_threshold", &constraint_fnr_threshold);
+  CHECK_GE(constraint_fnr_threshold, 0); CHECK_LE(constraint_fnr_threshold, 1);
+
+  Config::GetDouble(params, "score_threshold", &score_threshold);
+  CHECK_GE(score_threshold, 0); CHECK_LE(score_threshold, 1);
+
+  Config::GetDouble(params, "multiplier_learning_rate", &multiplier_learning_rate);
+  CHECK_GE(multiplier_learning_rate, 0);
+
+  if (GetString(params, "init_lagrangian_multipliers", &tmp_str)) {
+    init_lagrangian_multipliers = Common::StringToArray<double>(tmp_str, ',');
+    for (auto lag : init_lagrangian_multipliers)
+      CHECK_GE(lag, 0);
+  }
+
+  // Parameters for global constraints
+  Config::GetString(params, "global_constraint_type", &global_constraint_type);
+
+  Config::GetDouble(params, "global_target_fpr", &global_target_fpr);
+  CHECK_GE(global_target_fpr, 0); CHECK_LE(global_target_fpr, 1);
+
+  Config::GetDouble(params, "global_target_fnr", &global_target_fnr);
+  CHECK_GE(global_target_fnr, 0); CHECK_LE(global_target_fnr, 1);
+
+  Config::GetDouble(params, "global_score_threshold", &global_score_threshold);
+  CHECK_GE(global_score_threshold, 0); CHECK_LE(global_score_threshold, 1);
 }
 
 std::string Config::SaveMembersToString() const {
@@ -735,6 +811,27 @@ std::string Config::SaveMembersToString() const {
   str_buf << "[gpu_device_id: " << gpu_device_id << "]\n";
   str_buf << "[gpu_use_dp: " << gpu_use_dp << "]\n";
   str_buf << "[num_gpu: " << num_gpu << "]\n";
+
+  str_buf << "[------- FAIRGBM ------]\n";
+  str_buf << "[debugging_output_dir: " << debugging_output_dir << "]\n";
+  str_buf << "[constraint_type: " << constraint_type << "]\n";
+  str_buf << "[stepwise_proxy_margin: " << stepwise_proxy_margin << "]\n";
+  str_buf << "[constraint_group_column: " << constraint_group_column << "]\n";
+  str_buf << "[score_threshold: " << score_threshold << "]\n";
+  str_buf << "[constraint_fpr_threshold: " << constraint_fpr_threshold << "]\n";
+  str_buf << "[constraint_fnr_threshold: " << constraint_fnr_threshold << "]\n";
+  str_buf << "[multiplier_learning_rate: " << multiplier_learning_rate << "]\n";
+  str_buf << "[init_lagrangian_multipliers: " << Common::Join(init_lagrangian_multipliers, ",") << "]\n";
+
+  // Global constraint parameters
+  str_buf << "[global_constraint_type: " << global_constraint_type << "]\n";
+  str_buf << "[global_target_fpr: " << global_target_fpr << "]\n";
+  str_buf << "[global_target_fnr: " << global_target_fnr << "]\n";
+  str_buf << "[global_score_threshold: " << global_score_threshold << "]\n";
+
+  // TODO -- Add option to normalize multipliers
+  // str_buf << "[normalize_lagrangian_multipliers: ";
+
   return str_buf.str();
 }
 
diff --git a/src/io/dataset.cpp b/src/io/dataset.cpp
index e5cabe682..30556834d 100644
--- a/src/io/dataset.cpp
+++ b/src/io/dataset.cpp
@@ -33,7 +33,7 @@ Dataset::Dataset(data_size_t num_data) {
   CHECK_GT(num_data, 0);
   data_filename_ = "noname";
   num_data_ = num_data;
-  metadata_.Init(num_data_, NO_SPECIFIC, NO_SPECIFIC);
+  metadata_.Init(num_data_, NO_SPECIFIC, NO_SPECIFIC, NO_SPECIFIC);
   is_finish_load_ = false;
   group_bin_boundaries_.push_back(0);
   has_raw_ = false;
@@ -850,8 +850,13 @@ bool Dataset::SetFloatField(const char* field_name, const float* field_data,
 #else
     metadata_.SetWeights(field_data, num_element);
 #endif
+  } else if (name == std::string("constraint_group") ||
+             name == std::string("fairness_group") ||
+             name == std::string("sensitive_group") ||
+             name == std::string("protected_group")) {
+    metadata_.SetConstraintGroup(field_data, num_element);
   } else {
-    return false;
+    return false;   // Not successful
   }
   return true;
 }
diff --git a/src/io/dataset_loader.cpp b/src/io/dataset_loader.cpp
index c51f8a4fd..1176b381f 100644
--- a/src/io/dataset_loader.cpp
+++ b/src/io/dataset_loader.cpp
@@ -22,6 +22,7 @@ DatasetLoader::DatasetLoader(const Config& io_config, const PredictFunction& pre
   label_idx_ = 0;
   weight_idx_ = NO_SPECIFIC;
   group_idx_ = NO_SPECIFIC;
+  constraint_group_idx_ = NO_SPECIFIC;
   SetHeader(filename);
   store_raw_ = false;
   if (io_config.linear_tree) {
@@ -143,7 +144,28 @@ void DatasetLoader::SetHeader(const char* filename) {
       }
       ignore_features_.emplace(group_idx_);
     }
+
+    // load constraint group column idx
+    if (config_.constraint_group_column.size() > 0) {
+      if (Common::StartsWith(config_.constraint_group_column, name_prefix)) {
+        std::string name = config_.constraint_group_column.substr(name_prefix.size());
+        if (name2idx.count(name) > 0) {
+          constraint_group_idx_ = name2idx[name];
+          Log::Info("Using column %s as constraint_group id", name.c_str());
+        } else {
+          Log::Fatal("Could not find constraint_group column %s in data file", name.c_str());
+        }
+      } else {
+        if (!Common::AtoiAndCheck(config_.constraint_group_column.c_str(), &constraint_group_idx_)) {
+          Log::Fatal("constraint_group_column is not a number,\n"
+                     "if you want to use a column name,\n"
+                     "please add the prefix \"name:\" to the column name");
+        }
+        Log::Info("Using column number %d as constraint_group id", constraint_group_idx_);
+      }
+    }
   }
+
   if (config_.categorical_feature.size() > 0) {
     if (Common::StartsWith(config_.categorical_feature, name_prefix)) {
       std::string names = config_.categorical_feature.substr(name_prefix.size());
@@ -217,7 +239,7 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
         dataset->ResizeRaw(dataset->num_data_);
       }
       // initialize label
-      dataset->metadata_.Init(dataset->num_data_, weight_idx_, group_idx_);
+      dataset->metadata_.Init(dataset->num_data_, weight_idx_, group_idx_, constraint_group_idx_);
       // extract features
       ExtractFeaturesFromMemory(&text_data, parser.get(), dataset.get());
       text_data.clear();
@@ -237,7 +259,7 @@ Dataset* DatasetLoader::LoadFromFile(const char* filename, int rank, int num_mac
         dataset->ResizeRaw(dataset->num_data_);
       }
       // initialize label
-      dataset->metadata_.Init(dataset->num_data_, weight_idx_, group_idx_);
+      dataset->metadata_.Init(dataset->num_data_, weight_idx_, group_idx_, constraint_group_idx_);
       Log::Info("Making second pass...");
       // extract features
       ExtractFeaturesFromFile(filename, parser.get(), used_data_indices, dataset.get());
@@ -279,7 +301,7 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename,
       auto text_data = LoadTextDataToMemory(filename, dataset->metadata_, 0, 1, &num_global_data, &used_data_indices);
       dataset->num_data_ = static_cast<data_size_t>(text_data.size());
       // initialize label
-      dataset->metadata_.Init(dataset->num_data_, weight_idx_, group_idx_);
+      dataset->metadata_.Init(dataset->num_data_, weight_idx_, group_idx_, constraint_group_idx_);
       dataset->CreateValid(train_data);
       if (dataset->has_raw()) {
         dataset->ResizeRaw(dataset->num_data_);
@@ -293,7 +315,7 @@ Dataset* DatasetLoader::LoadFromFileAlignWithOtherDataset(const char* filename,
       dataset->num_data_ = static_cast<data_size_t>(text_reader.CountLine());
       num_global_data = dataset->num_data_;
       // initialize label
-      dataset->metadata_.Init(dataset->num_data_, weight_idx_, group_idx_);
+      dataset->metadata_.Init(dataset->num_data_, weight_idx_, group_idx_, constraint_group_idx_);
       dataset->CreateValid(train_data);
       if (dataset->has_raw()) {
         dataset->ResizeRaw(dataset->num_data_);
@@ -996,6 +1018,7 @@ void DatasetLoader::ConstructBinMappersFromTextData(int rank, int num_machines,
   CHECK(label_idx_ >= 0 && label_idx_ <= dataset->num_total_features_);
   CHECK(weight_idx_ < 0 || weight_idx_ < dataset->num_total_features_);
   CHECK(group_idx_ < 0 || group_idx_ < dataset->num_total_features_);
+  CHECK(constraint_group_idx_ == NO_SPECIFIC || (constraint_group_idx_ >= 0 && constraint_group_idx_ < dataset->num_total_features_)); // FairGBM
 
   // fill feature_names_ if not header
   if (feature_names_.empty()) {
@@ -1178,6 +1201,8 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
             dataset->metadata_.SetQueryAt(i, static_cast<data_size_t>(inner_data.second));
           }
         }
+        if (inner_data.first == constraint_group_idx_)
+          dataset->metadata_.SetConstraintGroupAt(i, static_cast<constraint_group_t>(inner_data.second));
       }
       if (dataset->has_raw()) {
         for (size_t j = 0; j < feature_row.size(); ++j) {
@@ -1235,6 +1260,8 @@ void DatasetLoader::ExtractFeaturesFromMemory(std::vector<std::string>* text_dat
             dataset->metadata_.SetQueryAt(i, static_cast<data_size_t>(inner_data.second));
           }
         }
+        if (inner_data.first == constraint_group_idx_)
+          dataset->metadata_.SetConstraintGroupAt(i, static_cast<constraint_group_t>(inner_data.second));
       }
       dataset->FinishOneRow(tid, i, is_feature_added);
       if (dataset->has_raw()) {
@@ -1308,6 +1335,8 @@ void DatasetLoader::ExtractFeaturesFromFile(const char* filename, const Parser*
             dataset->metadata_.SetQueryAt(start_idx + i, static_cast<data_size_t>(inner_data.second));
           }
         }
+        if (inner_data.first == constraint_group_idx_)
+          dataset->metadata_.SetConstraintGroupAt(start_idx + i, static_cast<constraint_group_t>(inner_data.second));
       }
       if (dataset->has_raw()) {
         for (size_t j = 0; j < feature_row.size(); ++j) {
diff --git a/src/io/metadata.cpp b/src/io/metadata.cpp
index 49fc834b8..d97ecca46 100644
--- a/src/io/metadata.cpp
+++ b/src/io/metadata.cpp
@@ -32,7 +32,8 @@ void Metadata::Init(const char* data_filename) {
 Metadata::~Metadata() {
 }
 
-void Metadata::Init(data_size_t num_data, int weight_idx, int query_idx) {
+void Metadata::Init(data_size_t num_data, int weight_idx, int query_idx, int constraint_group_idx_) {
+  std::lock_guard<std::mutex> lock(mutex_);
   num_data_ = num_data;
   label_ = std::vector<label_t>(num_data_);
   if (weight_idx >= 0) {
@@ -53,6 +54,8 @@ void Metadata::Init(data_size_t num_data, int weight_idx, int query_idx) {
     queries_ = std::vector<data_size_t>(num_data_, 0);
     query_load_from_file_ = false;
   }
+  if (constraint_group_idx_ >= 0)
+    constraint_group_ = std::vector<constraint_group_t>(num_data_, 0);
 }
 
 void Metadata::Init(const Metadata& fullset, const data_size_t* used_indices, data_size_t num_used_indices) {
@@ -320,6 +323,24 @@ void Metadata::SetLabel(const label_t* label, data_size_t len) {
   }
 }
 
+void Metadata::SetConstraintGroup(const float* constraint_group, data_size_t len) {
+  std::lock_guard<std::mutex> lock(mutex_);
+  if (constraint_group == nullptr) {
+    Log::Fatal("constraint_group cannot be nullptr");
+  }
+  if (num_data_ != len) {
+    Log::Fatal("Length of constraint_group is not same as #data");
+  }
+  if (constraint_group_.empty()) {
+    constraint_group_.resize(num_data_);
+  }
+
+  #pragma omp parallel for schedule(static, 512) if (num_data_ >= 1024)
+  for (data_size_t i = 0; i < num_data_; ++i) {
+    constraint_group_[i] = static_cast<constraint_group_t>(Common::AvoidInf(constraint_group[i]));
+  }
+}
+
 void Metadata::SetWeights(const label_t* weights, data_size_t len) {
   std::lock_guard<std::mutex> lock(mutex_);
   // save to nullptr
@@ -503,6 +524,8 @@ void Metadata::LoadFromMemory(const void* memory) {
     query_load_from_file_ = true;
   }
   LoadQueryWeights();
+
+  // TODO! load constraint_group_ information from memory ??
 }
 
 void Metadata::SaveBinaryToFile(const VirtualFileWriter* writer) const {
@@ -538,5 +561,4 @@ size_t Metadata::SizesInByte() const {
   return size;
 }
 
-
 }  // namespace LightGBM
diff --git a/src/objective/constrained_recall_objective.hpp b/src/objective/constrained_recall_objective.hpp
new file mode 100644
index 000000000..b15168f41
--- /dev/null
+++ b/src/objective/constrained_recall_objective.hpp
@@ -0,0 +1,212 @@
+/**
+ * The copyright of this file belongs to Feedzai. The file cannot be
+ * reproduced in whole or in part, stored in a retrieval system,
+ * transmitted in any form, or by any means electronic, mechanical,
+ * photocopying, or otherwise, without the prior permission of the owner.
+ *
+ * (c) 2021 Feedzai, Strictly Confidential
+ */
+/*!
+ * Constrained proxy recall objective (minimize proxy FNR).
+ */
+
+#pragma clang diagnostic push
+#pragma ide diagnostic ignored "openmp-use-default-none"
+
+#ifndef LIGHTGBM_CONSTRAINED_RECALL_OBJECTIVE_HPP
+#define LIGHTGBM_CONSTRAINED_RECALL_OBJECTIVE_HPP
+
+#include <LightGBM/meta.h>
+#include <LightGBM/objective_function.h>
+#include <LightGBM/utils/common.h>
+#include <LightGBM/utils/constrained.hpp>
+#include "../metric/xentropy_metric.hpp"
+
+#include <string>
+#include <vector>
+
+/**
+ * Implements the proxy FNR loss (Recall as an objective).
+ *
+ * Minimizing FNR is equivalent to maximizing TPR (or Recall), as TPR = 1-FNR.
+ * Could use cross-entropy, quadratic, or hinge as proxy functions for FNR's step-wise function.
+ * > We need to use a differentiable proxy, as the step-wise function provides no gradient for optimization.
+ */
+
+namespace LightGBM {
+
+class ConstrainedRecallObjective : public ConstrainedObjectiveFunction {
+public:
+    explicit ConstrainedRecallObjective(const Config &config)
+            : deterministic_(config.deterministic) {
+      SetUpFromConfig(config);
+
+      if (not this->IsGlobalFPRConstrained())
+        throw std::invalid_argument("Must provide a global FPR constraint in order to optimize for Recall!");
+
+      if (objective_stepwise_proxy == "cross_entropy" or constraint_stepwise_proxy == "cross_entropy") {
+        if (proxy_margin_ < DBL_MIN) {
+          Log::Fatal("Proxy margin must be positive. It was %f.", proxy_margin_);
+        }
+      }
+
+      if (objective_stepwise_proxy.empty()) {
+        Log::Fatal("Must provide an `objective_stepwise_proxy` to optimize for Recall. Got empty input.");
+      }
+
+      // Disclaimer on using ConstrainedRecallObjective
+      Log::Warning("Directly optimizing for Recall is still being researched and is prone to high variability of outcomes.");
+    };
+
+    explicit ConstrainedRecallObjective(const std::vector<std::string> &)
+            : deterministic_(false) {
+      throw std::invalid_argument(
+              "I don't think this constructor should ever be called; "
+              "it's only here for consistency with other objective functions.");
+    }
+
+    ~ConstrainedRecallObjective() override = default;
+
+    const char *GetName() const override {
+      return "constrained_recall_objective";
+    }
+
+    std::string ToString() const override {
+      return this->GetName();
+    }
+
+    /**
+     * Compute proxy FNR loss.
+     *
+     * Loss function:
+     * - Quadratic: l(a) = (1/2) * (a - margin_)^2 * I[a < margin_],        where l(margin_) = 0
+     * - BCE:       l(a) = log( 1 + exp( -a + log(exp(margin_) - 1) ) ),    where l(0) = margin_
+     * - Hinge:     l(a) = (margin_ - a) * I[a < margin_],                  where l(margin_) = 0
+     *
+     * @param label The instance label.
+     * @param score The instance predicted score.
+     * @return The loss value.
+     */
+    double ComputePredictiveLoss(label_t label, double score) const override {
+      // If label is zero, loss will be zero
+      if (abs(label) < 1e-5) // if (y_i == 0)
+        return 0.;
+
+      if (objective_stepwise_proxy == "quadratic")
+        return score < proxy_margin_ ? (1./2.) * pow(score - proxy_margin_, 2) : 0.;  // proxy_margin_ is the HORIZONTAL margin!
+
+      else if (objective_stepwise_proxy == "cross_entropy") {
+        double xent_horizontal_shift = log(exp(proxy_margin_) - 1);       // proxy_margin_ is the VERTICAL margin!
+        return log(1 + exp(-score + xent_horizontal_shift));
+      }
+
+      else if (objective_stepwise_proxy == "hinge")
+        return score < proxy_margin_ ? proxy_margin_ - score : 0.;          // proxy_margin_ is the HORIZONTAL margin!
+
+      else
+        throw std::invalid_argument("Invalid objective_stepwise_proxy=" + objective_stepwise_proxy);
+    }
+
+    /*!
+     * The optimal constant-value model starts at logodds==0, as opposed to starting from the average score.
+     * This is due using a different objective function, plus using global constraints.
+     * @return 0
+     */
+    double BoostFromScore(int) const override {
+      Log::Info("constrained_recall_objective: boosting from scores == 0;");
+      return 0.;
+    }
+
+    /**
+     * > aka GetPredictiveLossGradientsWRTModelOutput
+     *
+     * Gradients of the proxy FNR loss w.r.t. the model output (scores).
+     *
+     * l(a) = (1/2) * (a - margin_)^2 * I[a < margin_]
+     *
+     * dl/da = (a - margin_) * I[a < margin_]
+     *
+     * @param score
+     * @param gradients
+     * @param hessians
+     */
+    void GetGradients(const double *score, score_t *gradients, score_t *hessians) const override {
+      /**
+       * How much to shift the cross-entropy function (horizontally) to get
+       * the target proxy_margin_ at x=0; i.e., f(0) = proxy_margin_
+       */
+      const double xent_horizontal_shift = log(exp(proxy_margin_) - 1);
+
+      /**
+       * NOTE
+       *  - https://github.com/feedzai/fairgbm/issues/11
+       *  - This value should be zero in order to optimize solely for TPR (Recall),
+       *  as TPR considers only label positives (LPs) and ignores label negatives (LNs).
+       *  - However, initial splits will have -inf information gain if the gradients
+       *  of all LNs are 0;
+       *  - Hence, we're adding a tiny positive weight to the gradient of all LNs;
+       */
+      const double label_negative_weight = 1e-2;
+
+      #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data_; ++i) {
+
+        // Proxy FNR (or proxy Recall) has no loss for label negative samples (they're ignored).
+        if (abs(label_[i] - 1) < 1e-5) {  // if (y_i == 1)
+          if (objective_stepwise_proxy == "quadratic") {
+            gradients[i] = (score_t) (score[i] < proxy_margin_ ? score[i] - proxy_margin_ : 0.);
+            hessians[i] = (score_t) (score[i] < proxy_margin_ ? 1. : 0.);
+          }
+
+          else if (objective_stepwise_proxy == "cross_entropy") {
+            const double z = Constrained::sigmoid(score[i] - xent_horizontal_shift);
+            gradients[i] = (score_t) (z - 1.);
+            hessians[i] = (score_t) (z * (1. - z));
+          }
+
+          else if (objective_stepwise_proxy == "hinge") {
+            gradients[i] = (score_t) (score[i] < proxy_margin_ ? -1. : 0.);
+            hessians[i] = (score_t) 0.;
+          }
+
+          else {
+            throw std::invalid_argument("Invalid objective proxy: " + objective_stepwise_proxy);
+          }
+
+          if (weights_ != nullptr) {
+            gradients[i] *= weights_[i];
+            hessians[i] *= weights_[i];
+          }
+
+        } else {
+          // NOTE: https://github.com/feedzai/fairgbm/issues/11
+          //  - This whole else clause should not be needed to optimize for Recall,
+          //  as LNs have no influence on the FNR loss function or its (proxy-)gradient;
+          //  - However, passing a zero gradient to all LNs leads to weird early stopping
+          //  behavior from the `GBDT::Train` function;
+          //  - Adding this tiny weight to the gradient of LNs seems to fix the issue with
+          //  no (apparent) unintended consequences, as the gradient flowing is really small;
+          const double z = Constrained::sigmoid(score[i] + xent_horizontal_shift);
+          gradients[i] = (score_t) (label_negative_weight * z);
+          hessians[i] = (score_t) (label_negative_weight * z * (1. - z));
+        }
+      }
+    }
+
+    void GetConstraintGradientsWRTModelOutput(const double *multipliers, const double *score, score_t *gradients,
+                                              score_t *hessians) const override {
+      if (not this->IsGlobalFPRConstrained())
+        throw std::invalid_argument("Recall objective function must have a global FPR constraint!");
+
+      ConstrainedObjectiveFunction::GetConstraintGradientsWRTModelOutput(multipliers, score, gradients, hessians);
+    }
+
+private:
+    const bool deterministic_;
+
+};
+
+
+}
+
+#endif //LIGHTGBM_CONSTRAINED_RECALL_OBJECTIVE_HPP
diff --git a/src/objective/constrained_xentropy_objective.hpp b/src/objective/constrained_xentropy_objective.hpp
new file mode 100644
index 000000000..499763c06
--- /dev/null
+++ b/src/objective/constrained_xentropy_objective.hpp
@@ -0,0 +1,157 @@
+/**
+ * The copyright of this file belongs to Feedzai. The file cannot be
+ * reproduced in whole or in part, stored in a retrieval system,
+ * transmitted in any form, or by any means electronic, mechanical,
+ * photocopying, or otherwise, without the prior permission of the owner.
+ *
+ * (c) 2021 Feedzai, Strictly Confidential
+ */
+/*!
+ * Copyright (c) 2017 Microsoft Corporation. All rights reserved.
+ * Licensed under the MIT License. See LICENSE file in the project root for license information.
+ */
+
+#pragma clang diagnostic push
+#pragma ide diagnostic ignored "openmp-use-default-none"
+
+#ifndef LIGHTGBM_OBJECTIVE_CONSTRAINED_XENTROPY_OBJECTIVE_HPP_
+#define LIGHTGBM_OBJECTIVE_CONSTRAINED_XENTROPY_OBJECTIVE_HPP_
+
+#include <LightGBM/meta.h>
+#include <LightGBM/objective_function.h>
+#include <LightGBM/utils/common.h>
+#include <LightGBM/utils/constrained.hpp>
+#include "../metric/xentropy_metric.hpp"
+
+#include <string>
+#include <algorithm>
+#include <cmath>
+#include <cstring>
+#include <vector>
+
+namespace LightGBM {
+
+/**
+ * Objective function for constrained optimization.
+ * Uses the well-known Binary Cross Entropy (BCE) function for measuring predictive loss, plus
+ * Uses a cross-entropy-based function as a proxy for the step-wise function when computing fairness constraints.
+ *
+ * NOTE:
+ *  - This `constrained_xentropy` objective generally leads to the best constrained results;
+ *  - All results from the FairGBM paper use this objective function with the "cross_entropy" step-wise proxy;
+ *    - This pairing of "constrained cross-entropy objective + cross-entropy proxy for constraints" was tested the most;
+ */
+class ConstrainedCrossEntropy : public ConstrainedObjectiveFunction { // TODO: inherit from both CrossEntropy and ConstrainedObjectiveFunction
+public:
+  explicit ConstrainedCrossEntropy(const Config &config)
+          : deterministic_(config.deterministic) {
+    SetUpFromConfig(config);
+
+    if (not objective_stepwise_proxy.empty()) {
+      Log::Warning("Ignoring argument objective_stepwise_proxy=%s.", objective_stepwise_proxy.c_str());
+    }
+  }
+
+  explicit ConstrainedCrossEntropy(const std::vector<std::string> &)
+          : deterministic_(false) {
+    Log::Warning(
+            "The objective function 'constrained_cross_entropy' was not properly loaded. "
+            "Resuming training is not available; everything else can be used as usual."
+            );  // TODO: https://github.com/feedzai/fairgbm/issues/10
+  }
+
+  ~ConstrainedCrossEntropy() override = default;
+
+  double ComputePredictiveLoss(label_t label, double score) const override {
+    return XentLoss(label, Constrained::sigmoid(score));
+  }
+
+  /**
+   * > aka GetPredictiveLossGradientsWRTModelOutput
+   *
+   * Gradient of the predictive loss w.r.t. model output (scores).
+   * This is a duplicate of the implementation in the CrossEntropy class.
+   *
+   * @param score Model outputs.
+   * @param gradients Reference to gradients' vector.
+   * @param hessians Reference to hessians' vector.
+   */
+  void GetGradients(const double *score, score_t *gradients, score_t *hessians) const override {
+    if (weights_ == nullptr) {
+      // compute pointwise gradients and Hessians with implied unit weights
+      #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data_; ++i) {
+        const double z = Constrained::sigmoid(score[i]);
+
+        gradients[i] = static_cast<score_t>(z - label_[i]);     // 1st derivative
+        hessians[i] = static_cast<score_t>(z * (1.0f - z));     // 2nd derivative
+        // NOTE: should we set the 2nd derivative to zero? to stick to a 1st order method in both descent and ascent steps.
+      }
+    } else {
+      // compute pointwise gradients and Hessians with given weights
+      #pragma omp parallel for schedule(static)
+      for (data_size_t i = 0; i < num_data_; ++i) {
+        const double z = Constrained::sigmoid(score[i]);
+
+        gradients[i] = static_cast<score_t>((z - label_[i]) * weights_[i]);
+        hessians[i] = static_cast<score_t>(z * (1.0f - z) * weights_[i]);
+      }
+    }
+  }
+
+  const char *GetName() const override {
+    return "constrained_cross_entropy";
+  }
+
+  std::string ToString() const override {
+    std::stringstream str_buf;
+    str_buf << GetName();
+    /* str_buf << "_->constraint_type->" << constraint_type;
+    str_buf << "_->groups(";
+    for (auto &group: group_values_)
+      str_buf << group << ",";
+    str_buf << ")";
+
+    str_buf << "_score_threshold->" << score_threshold_;
+    str_buf << "_fpr_threshold->" << fpr_threshold_;
+    str_buf << "_fnr_threshold->" << fnr_threshold_; */
+    return str_buf.str();
+  }
+
+  // implement custom average to boost from (if enabled among options)
+  double BoostFromScore(int) const override {
+    double suml = 0.0f;
+    double sumw = 0.0f;
+    if (weights_ != nullptr) {
+
+      #pragma omp parallel for schedule(static) reduction(+:suml, sumw) if (!deterministic_)
+      for (data_size_t i = 0; i < num_data_; ++i) {
+        suml += label_[i] * weights_[i];
+        sumw += weights_[i];
+      }
+    } else {
+      sumw = static_cast<double>(num_data_);
+
+      #pragma omp parallel for schedule(static) reduction(+:suml) if (!deterministic_)
+      for (data_size_t i = 0; i < num_data_; ++i) {
+        suml += label_[i];
+      }
+    }
+    double pavg = sumw > 0.0f ? suml / sumw : 0.0f;
+    pavg = std::min(pavg, 1.0 - kEpsilon);
+    pavg = std::max<double>(pavg, kEpsilon);
+    double initscore = std::log(pavg / (1.0f - pavg));
+    Log::Info("[%s:%s]: pavg = %f -> initscore = %f", GetName(), __func__, pavg, initscore);
+    return initscore;
+  }
+
+private:
+  const bool deterministic_;
+
+};
+
+}  // end namespace LightGBM
+
+#endif   // end #ifndef LIGHTGBM_OBJECTIVE_CONSTRAINED_XENTROPY_OBJECTIVE_HPP_
+
+#pragma clang diagnostic pop
\ No newline at end of file
diff --git a/src/objective/objective_function.cpp b/src/objective/objective_function.cpp
index 193353d93..dcfbd4901 100644
--- a/src/objective/objective_function.cpp
+++ b/src/objective/objective_function.cpp
@@ -9,6 +9,9 @@
 #include "rank_objective.hpp"
 #include "regression_objective.hpp"
 #include "xentropy_objective.hpp"
+#include "constrained_xentropy_objective.hpp"
+#include "constrained_recall_objective.hpp"
+
 
 namespace LightGBM {
 
@@ -37,6 +40,10 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
     return new MulticlassOVA(config);
   } else if (type == std::string("cross_entropy")) {
     return new CrossEntropy(config);
+  } else if (type == std::string("constrained_cross_entropy")) {      // Entry-point for FairGBM code!
+    return new ConstrainedCrossEntropy(config);
+  } else if (type == std::string("constrained_recall_objective")) {   // Entry-point for FairGBM code!
+    return new ConstrainedRecallObjective(config);
   } else if (type == std::string("cross_entropy_lambda")) {
     return new CrossEntropyLambda(config);
   } else if (type == std::string("mape")) {
@@ -79,6 +86,10 @@ ObjectiveFunction* ObjectiveFunction::CreateObjectiveFunction(const std::string&
     return new MulticlassOVA(strs);
   } else if (type == std::string("cross_entropy")) {
     return new CrossEntropy(strs);
+  } else if (type == std::string("constrained_cross_entropy")) {
+    return new ConstrainedCrossEntropy(strs);
+  } else if (type == std::string("constrained_recall_objective")) {
+    return new ConstrainedRecallObjective(strs);
   } else if (type == std::string("cross_entropy_lambda")) {
     return new CrossEntropyLambda(strs);
   } else if (type == std::string("mape")) {