From 9e0d7d859ee7b3bd7cc06fb289534959f502fddc Mon Sep 17 00:00:00 2001
From: Roman Lutz <rolutz@microsoft.com>
Date: Fri, 25 Oct 2019 08:03:39 -0700
Subject: [PATCH] Move exponentiated gradient logic into class and reformat
 existing documentation (#108)

* initial set of changes to move expgrad code into expgrad class, documentation adjustments

* minor correction to previous changes

* flake8

* remove posterior_predict and posterior_predict_proba
---
 .../exponentiated_gradient.py                 | 356 +++++++++---------
 1 file changed, 171 insertions(+), 185 deletions(-)

diff --git a/fairlearn/reductions/exponentiated_gradient/exponentiated_gradient.py b/fairlearn/reductions/exponentiated_gradient/exponentiated_gradient.py
index 9d8becd7..12587856 100644
--- a/fairlearn/reductions/exponentiated_gradient/exponentiated_gradient.py
+++ b/fairlearn/reductions/exponentiated_gradient/exponentiated_gradient.py
@@ -29,12 +29,39 @@ def _mean_pred(X, hs, weights):
 class ExponentiatedGradientResult:
     def __init__(self, best_classifier, best_gap, classifiers, weights, last_t, best_t,
                  n_oracle_calls):
+        """ Result object for the exponentiated gradient reduction operation.
+        """
+
+        """ A function that maps a DataFrame X containing covariates to a Series containing the
+        corresponding probabilistic decisions in [0,1]
+        """
         self._best_classifier = best_classifier
+
+        """ The quality of best_classifier; if the algorithm has converged then best_gap <= nu;
+        the solution best_classifier is guaranteed to have the classification error within
+        2*best_gap of the best error under constraint eps; the constraint violation is at most
+        2*(eps+best_gap)
+        """
         self._best_gap = best_gap
+
+        """ The base classifiers generated (instances of estimator).
+        """
         self._classifiers = classifiers
+
+        """ The weights of those classifiers within best_classifier.
+        """
         self._weights = weights
+
+        """ The last executed iteration; always last_t < T.
+        """
         self._last_t = last_t
+
+        """ The iteration in which best_classifier was obtained.
+        """
         self._best_t = best_t
+
+        """ The number of times the estimator was called.
+        """
         self._n_oracle_calls = n_oracle_calls
 
     def _as_dict(self):
@@ -52,26 +79,29 @@ def _as_dict(self):
 class ExponentiatedGradient(Reduction):
     """An Estimator which implements the exponentiated gradient approach to
     reductions described by `Agarwal et al. (2018) <https://arxiv.org/abs/1803.02453>`_.
-
-    :param estimator: The underlying estimator to be used. Must provide a
-        fit(X, y, sample_weights) method
-
-    :param constraints: Object describing the parity constraints
-    :type constraints: fairlearn.moments.Moment
-
-    :param eps: ?
-    :type eps: float
-
-    :param T: Maximum number of iterations
-    :type T: int
-
-    :param nu: ?
-    :type nu: ?
-
-    :param eta_mul: ?
-    :type eta_mul: float
     """
     def __init__(self, estimator, constraints, eps=0.01, T=50, nu=None, eta_mul=2.0):
+        """ The constructor for a mitigator object that applies the exponentiated gradient
+        reduction to a provided estimator to achieve the given constraints.
+
+        :param estimator: an estimator implementing methods fit(X, y, sample_weight) and
+            predict(X), where X is the set of features, y is the set of labels, and
+            sample_weight is a set of weights; labels y and predictions returned by predict(X)
+            are either 0 or 1.
+        :type estimator: an estimator
+        :param constraints: the disparity constraints expressed as moments
+        :type constraints: fairlearn.reductions.moments.Moment
+        :param eps: allowed fairness constraint violation (default 0.01)
+        :type eps: float
+        :param T: max number of iterations (default 50)
+        :type T: int
+        :param nu: convergence threshold for the duality gap (default None), corresponding to a
+            conservative automatic setting based on the statistical uncertainty in measuring
+            classification error)
+        :type nu: float
+        :param eta_mul: initial setting of the learning rate (default 2.0)
+        :type eta_mul: float
+        """
         self._estimator = estimator
         self._constraints = constraints
         self._eps = eps
@@ -82,8 +112,10 @@ def __init__(self, estimator, constraints, eps=0.01, T=50, nu=None, eta_mul=2.0)
         self._classifiers = None
 
     def fit(self, X, y, **kwargs):
+        """ Return a fair classifier under specified fairness constraints via
+            exponentiated-gradient reduction.
+        """
         # TODO: validate input data; unify between grid search and expgrad?
-
         if type(X) in [np.ndarray, list]:
             X_train = pd.DataFrame(X)
         else:
@@ -94,13 +126,95 @@ def fit(self, X, y, **kwargs):
         else:
             y_train = y
 
-        self._expgrad_result = exponentiated_gradient(X_train,
-                                                      kwargs[_KW_SENSITIVE_FEATURES],
-                                                      y_train,
-                                                      self._estimator,
-                                                      constraints=self._constraints,
-                                                      eps=self._eps, T=self._T, nu=self._nu,
-                                                      eta_mul=self._eta_mul)
+        A = kwargs[_KW_SENSITIVE_FEATURES]
+
+        n = X_train.shape[0]
+
+        logger.debug("...Exponentiated Gradient STARTING")
+
+        B = 1 / self._eps
+        lagrangian = _Lagrangian(X_train, A, y_train, self._estimator, self._constraints,
+                                 self._eps, B)
+
+        theta = pd.Series(0, lagrangian.constraints.index)
+        Qsum = pd.Series()
+        lambdas = pd.DataFrame()
+        gaps_EG = []
+        gaps = []
+        Qs = []
+
+        last_regret_checked = _REGRET_CHECK_START_T
+        last_gap = np.PINF
+        for t in range(0, self._T):
+            logger.debug("...iter=%03d" % t)
+
+            # set lambdas for every constraint
+            lambda_vec = B * np.exp(theta) / (1 + np.exp(theta).sum())
+            lambdas[t] = lambda_vec
+            lambda_EG = lambdas.mean(axis=1)
+
+            # select classifier according to best_h method
+            h, h_idx = lagrangian.best_h(lambda_vec)
+            pred_h = h(X_train)
+
+            if t == 0:
+                if self._nu is None:
+                    self._nu = _ACCURACY_MUL * (pred_h - y_train).abs().std() / np.sqrt(n)
+                eta_min = self._nu / (2 * B)
+                eta = self._eta_mul / B
+                logger.debug("...eps=%.3f, B=%.1f, nu=%.6f, T=%d, eta_min=%.6f"
+                             % (self._eps, B, self._nu, self._T, eta_min))
+
+            if h_idx not in Qsum.index:
+                Qsum.at[h_idx] = 0.0
+            Qsum[h_idx] += 1.0
+            gamma = lagrangian.gammas[h_idx]
+            Q_EG = Qsum / Qsum.sum()
+            result_EG = lagrangian.eval_gap(Q_EG, lambda_EG, self._nu)
+            gap_EG = result_EG.gap()
+            gaps_EG.append(gap_EG)
+
+            if t == 0 or not _RUN_LP_STEP:
+                gap_LP = np.PINF
+            else:
+                # saddle point optimization over the convex hull of
+                # classifiers returned so far
+                Q_LP, _, result_LP = lagrangian.solve_linprog(self._nu)
+                gap_LP = result_LP.gap()
+
+            # keep values from exponentiated gradient or linear programming
+            if gap_EG < gap_LP:
+                Qs.append(Q_EG)
+                gaps.append(gap_EG)
+            else:
+                Qs.append(Q_LP)
+                gaps.append(gap_LP)
+
+            logger.debug("%seta=%.6f, L_low=%.3f, L=%.3f, L_high=%.3f"
+                         ", gap=%.6f, disp=%.3f, err=%.3f, gap_LP=%.6f"
+                         % (_INDENTATION, eta, result_EG.L_low,
+                            result_EG.L, result_EG.L_high,
+                            gap_EG, result_EG.gamma.max(),
+                            result_EG.error, gap_LP))
+
+            if (gaps[t] < self._nu) and (t >= _MIN_T):
+                # solution found
+                break
+
+            # update regret
+            if t >= last_regret_checked * _REGRET_CHECK_INCREASE_T:
+                best_gap = min(gaps_EG)
+
+                if best_gap > last_gap * _SHRINK_REGRET:
+                    eta *= _SHRINK_ETA
+                last_regret_checked = t
+                last_gap = best_gap
+
+            # update theta based on learning rate
+            theta += eta * (gamma - self._eps)
+
+        self._expgrad_result = self._format_results(gaps, Qs, lagrangian, B, eta_min)
+
         self._best_classifier = self._expgrad_result._best_classifier
         self._classifiers = self._expgrad_result._classifiers
         # TODO: figure out whether we should keep the remaining data of the result object
@@ -113,162 +227,34 @@ def _pmf_predict(self, X):
         positive_probs = self._best_classifier(X)
         return np.concatenate((1-positive_probs, positive_probs), axis=1)
 
-
-def exponentiated_gradient(X, A, y, estimator,
-                           constraints,
-                           eps=0.01,
-                           T=50,
-                           nu=None,
-                           eta_mul=2.0):
-    """
-    Return a fair classifier under specified fairness constraints via exponentiated-gradient
-    reduction.
-
-    :param X: a DataFrame containing covariates
-    :param A: a Series containing the protected attribute
-    :param y: a Series containing labels in {0,1}
-    :param estimator: an estimator implementing methods fit(X,Y,W) and predict(X), where X is the
-        DataFrame of covariates, and Y and W are the Series containing the labels and weights,
-        respectively; labels Y and predictions returned by predict(X) are in {0,1}
-    :param constraints: the disparity constraints expressed as moments
-    :param eps: allowed fairness constraint violation (default 0.01)
-    :param T: max number of iterations (default 50)
-    :param nu: convergence threshold for the duality gap (default None), corresponding to a
-        conservative automatic setting based on the statistical uncertainty in measuring
-        classification error)
-    :param eta_mul: initial setting of the learning rate (default 2.0)
-    :param debug: if True, then debugging output is produced (default False)
-
-    :return: Returns named tuple with fields:
-      best_classifier -- a function that maps a DataFrame X containing
-                         covariates to a Series containing the corresponding
-                         probabilistic decisions in [0,1]
-      best_gap -- the quality of best_classifier; if the algorithm has
-                  converged then best_gap <= nu; the solution best_classifier
-                  is guaranteed to have the classification error within
-                  2*best_gap of the best error under constraint eps; the
-                  constraint violation is at most 2*(eps+best_gap)
-      classifiers -- the base classifiers generated (instances of estimator)
-      weights -- the weights of those classifiers within best_classifier
-      last_t -- the last executed iteration; always last_t < T
-      best_t -- the iteration in which best_classifier was obtained
-      n_oracle_calls -- how many times the estimator was called
-    """
-    n = X.shape[0]
-
-    logger.debug("...Exponentiated Gradient STARTING")
-
-    B = 1 / eps
-    lagrangian = _Lagrangian(X, A, y, estimator, constraints, eps, B)
-
-    theta = pd.Series(0, lagrangian.constraints.index)
-    Qsum = pd.Series()
-    lambdas = pd.DataFrame()
-    gaps_EG = []
-    gaps = []
-    Qs = []
-
-    last_regret_checked = _REGRET_CHECK_START_T
-    last_gap = np.PINF
-    for t in range(0, T):
-        logger.debug("...iter=%03d" % t)
-
-        # set lambdas for every constraint
-        lambda_vec = B * np.exp(theta) / (1 + np.exp(theta).sum())
-        lambdas[t] = lambda_vec
-        lambda_EG = lambdas.mean(axis=1)
-
-        # select classifier according to best_h method
-        h, h_idx = lagrangian.best_h(lambda_vec)
-        pred_h = h(X)
-
-        if t == 0:
-            if nu is None:
-                nu = _ACCURACY_MUL * (pred_h - y).abs().std() / np.sqrt(n)
-            eta_min = nu / (2 * B)
-            eta = eta_mul / B
-            logger.debug("...eps=%.3f, B=%.1f, nu=%.6f, T=%d, eta_min=%.6f"
-                         % (eps, B, nu, T, eta_min))
-
-        if h_idx not in Qsum.index:
-            Qsum.at[h_idx] = 0.0
-        Qsum[h_idx] += 1.0
-        gamma = lagrangian.gammas[h_idx]
-        Q_EG = Qsum / Qsum.sum()
-        result_EG = lagrangian.eval_gap(Q_EG, lambda_EG, nu)
-        gap_EG = result_EG.gap()
-        gaps_EG.append(gap_EG)
-
-        if t == 0 or not _RUN_LP_STEP:
-            gap_LP = np.PINF
-        else:
-            # saddle point optimization over the convex hull of
-            # classifiers returned so far
-            Q_LP, _, result_LP = lagrangian.solve_linprog(nu)
-            gap_LP = result_LP.gap()
-
-        # keep values from exponentiated gradient or linear programming
-        if gap_EG < gap_LP:
-            Qs.append(Q_EG)
-            gaps.append(gap_EG)
-        else:
-            Qs.append(Q_LP)
-            gaps.append(gap_LP)
-
-        logger.debug("%seta=%.6f, L_low=%.3f, L=%.3f, L_high=%.3f"
-                     ", gap=%.6f, disp=%.3f, err=%.3f, gap_LP=%.6f"
-                     % (_INDENTATION, eta, result_EG.L_low,
-                        result_EG.L, result_EG.L_high,
-                        gap_EG, result_EG.gamma.max(),
-                        result_EG.error, gap_LP))
-
-        if (gaps[t] < nu) and (t >= _MIN_T):
-            # solution found
-            break
-
-        # update regret
-        if t >= last_regret_checked * _REGRET_CHECK_INCREASE_T:
-            best_gap = min(gaps_EG)
-
-            if best_gap > last_gap * _SHRINK_REGRET:
-                eta *= _SHRINK_ETA
-            last_regret_checked = t
-            last_gap = best_gap
-
-        # update theta based on learning rate
-        theta += eta * (gamma - eps)
-
-    return _format_results(gaps, Qs, lagrangian, eps, B, nu, T, eta_min)
-
-
-def _format_results(gaps, Qs, lagrangian, eps, B, nu, T, eta_min):
-    gaps_series = pd.Series(gaps)
-    gaps_best = gaps_series[gaps_series <= gaps_series.min() + _PRECISION]
-    best_t = gaps_best.index[-1]
-    weights = Qs[best_t]
-    hs = lagrangian.hs
-    for h_idx in hs.index:
-        if h_idx not in weights.index:
-            weights.at[h_idx] = 0.0
-
-    def best_classifier(X): return _mean_pred(X, hs, weights)
-    best_gap = gaps[best_t]
-
-    last_t = len(Qs) - 1
-
-    result = ExponentiatedGradientResult(
-        best_classifier,
-        best_gap,
-        lagrangian.classifiers,
-        weights,
-        last_t,
-        best_t,
-        lagrangian.n_oracle_calls)
-
-    logger.debug("...eps=%.3f, B=%.1f, nu=%.6f, T=%d, eta_min=%.6f"
-                 % (eps, B, nu, T, eta_min))
-    logger.debug("...last_t=%d, best_t=%d, best_gap=%.6f, n_oracle_calls=%d, n_hs=%d"
-                 % (last_t, best_t, best_gap, lagrangian.n_oracle_calls,
-                    len(lagrangian.classifiers)))
-
-    return result
+    def _format_results(self, gaps, Qs, lagrangian, B, eta_min):
+        gaps_series = pd.Series(gaps)
+        gaps_best = gaps_series[gaps_series <= gaps_series.min() + _PRECISION]
+        best_t = gaps_best.index[-1]
+        weights = Qs[best_t]
+        hs = lagrangian.hs
+        for h_idx in hs.index:
+            if h_idx not in weights.index:
+                weights.at[h_idx] = 0.0
+
+        def best_classifier(X): return _mean_pred(X, hs, weights)
+        best_gap = gaps[best_t]
+
+        last_t = len(Qs) - 1
+
+        result = ExponentiatedGradientResult(
+            best_classifier,
+            best_gap,
+            lagrangian.classifiers,
+            weights,
+            last_t,
+            best_t,
+            lagrangian.n_oracle_calls)
+
+        logger.debug("...eps=%.3f, B=%.1f, nu=%.6f, T=%d, eta_min=%.6f"
+                     % (self._eps, B, self._nu, self._T, eta_min))
+        logger.debug("...last_t=%d, best_t=%d, best_gap=%.6f, n_oracle_calls=%d, n_hs=%d"
+                     % (last_t, best_t, best_gap, lagrangian.n_oracle_calls,
+                        len(lagrangian.classifiers)))
+
+        return result