diff --git a/river/linear_model/__init__.py b/river/linear_model/__init__.py index 756720490a..581113dbcb 100644 --- a/river/linear_model/__init__.py +++ b/river/linear_model/__init__.py @@ -5,10 +5,12 @@ from . import base from .alma import ALMAClassifier from .bayesian_lin_reg import BayesianLinearRegression +from .incrementalAUC import IncrementalAUC from .lin_reg import LinearRegression from .log_reg import LogisticRegression from .pa import PAClassifier, PARegressor from .perceptron import Perceptron +from .rls import RLS from .softmax import SoftmaxRegression __all__ = [ @@ -21,4 +23,6 @@ "PARegressor", "Perceptron", "SoftmaxRegression", + "RLS", + "IncrementalAUC", ] diff --git a/river/linear_model/incrementalAUC.py b/river/linear_model/incrementalAUC.py new file mode 100644 index 0000000000..5b89911d58 --- /dev/null +++ b/river/linear_model/incrementalAUC.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +import numpy as np + +from river import metrics + + +class IncrementalAUC(metrics.base.BinaryMetric): + """Calculates AUC incrementally.""" + + def __init__(self): + super().__init__() + self.positive_scores = [] + self.negative_scores = [] + + def update(self, y_true, y_pred): + """Updates the metric with the new prediction and true label.""" + if y_true == 1: + self.positive_scores.append(y_pred) + else: + self.negative_scores.append(y_pred) + return self + + def get( + self, X_train, y_train, X_test, y_test, epochs=900, lr=0.5, n_mc=500, gamma=1e-4, eps=0.01 + ): + """ + Implements the stochastic gradient ascent method to optimize theta and computes the AUC + based on the accumulated scores. + + Parameters: + - X_train: Training feature matrix. + - y_train: Training labels. + - X_test: Test feature matrix. + - y_test: Test labels. + - epochs: Number of training epochs. + - lr: Initial learning rate. + - n_mc: Number of Monte Carlo samples for gradient estimation. + - gamma: Learning rate discount factor. + - eps: Smoothing parameter for the sigmoid function. + + Returns: + - auc: Final AUC score based on the accumulated scores. + """ + from sklearn.metrics import roc_auc_score + + # Separate the classes + X1 = X_train[y_train == 1] + X0 = X_train[y_train == 0] + + # Initialize parameters + np.random.seed(123) + theta = np.random.randn(X_train.shape[1]) + current_lr = lr + + # Reset accumulated scores + self.positive_scores = [] + self.negative_scores = [] + + # Optimization loop + for seed, epoch in enumerate(range(epochs)): + # Update learning rate + current_lr = current_lr / (1 + gamma) + + # Update theta using stochastic gradient ascent + theta -= current_lr * self.stochastic_gradient( + theta, X1, X0, N=n_mc, eps=eps, random_state=seed + ) + + # After training, compute the scores on the test set + y_scores = np.dot(X_test, theta) + + # Update accumulated scores using y_test and y_scores + for y_true_sample, y_score in zip(y_test, y_scores): + self.update(y_true_sample, y_score) + + # Compute AUC based on accumulated scores + y_scores_accumulated = self.positive_scores + self.negative_scores + y_true_accumulated = [1] * len(self.positive_scores) + [0] * len(self.negative_scores) + + auc = roc_auc_score(y_true_accumulated, y_scores_accumulated) + return auc + + def sigma_eps(self, x, eps=0.01): + z = x / eps + if z > 35: + return 1 + elif z < -35: + return 0 + else: + return 1.0 / (1.0 + np.exp(-z)) + + def reg_u_statistic(self, y_true, y_probs, eps=0.01): + p = y_probs[y_true == 1] + q = y_probs[y_true == 0] + + aux = [] + for pp in p: + for qq in q: + aux.append(self.sigma_eps(pp - qq, eps=eps)) + + u = np.array(aux).mean() + return u + + def stochastic_gradient(self, theta, X1, X0, N=1000, eps=0.01, random_state=1): + np.random.seed(random_state) + + indices_1 = np.random.choice(np.arange(X1.shape[0]), size=N) + indices_0 = np.random.choice(np.arange(X0.shape[0]), size=N) + + X1_, X0_ = X1[indices_1], X0[indices_0] + + avg = np.zeros_like(theta) + for xi, xj in zip(X1_, X0_): + dx = xj - xi + sig = self.sigma_eps(theta @ dx, eps=eps) + avg = avg + sig * (1 - sig) * dx + + return avg / (N * eps) diff --git a/river/linear_model/rls.py b/river/linear_model/rls.py new file mode 100644 index 0000000000..fbdc40e54f --- /dev/null +++ b/river/linear_model/rls.py @@ -0,0 +1,139 @@ +from __future__ import annotations + +import numpy as np + + +class RLS: + """ + Recursive Least Squares (RLS) + + The Recursive Least Squares (RLS) algorithm is an adaptive filtering method that adjusts filter coefficients + to minimize the weighted least squares error between the desired and predicted outputs. It is widely used + in signal processing and control systems for applications requiring fast adaptation to changes in input signals. + + Parameters + ---------- + p : int + The order of the filter (number of coefficients to be estimated). + forgetting_factor : float, optional, default=0.99 + Forgetting factor (0 < l ≤ 1). Controls how quickly the algorithm forgets past data. + A smaller value makes the algorithm more responsive to recent data. + delta : float, optional, default=1000000 + Initial value for the inverse correlation matrix (P(0)). A large value ensures numerical stability at the start. + + Attributes + ---------- + p : int + Filter order. + forgetting_factor : float + Forgetting factor. + delta : float + Initialization value for P(0). + currentStep : int + The current iteration step of the RLS algorithm. + x : numpy.ndarray + Input vector of size (p+1, 1). Stores the most recent inputs. + P : numpy.ndarray + Inverse correlation matrix, initialized to a scaled identity matrix. + estimates : list of numpy.ndarray + List of estimated weight vectors (filter coefficients) at each step. + Pks : list of numpy.ndarray + List of inverse correlation matrices (P) at each step. + + Methods + ------- + estimate(xn, dn) + Updates the filter coefficients using the current input (`xn`) and desired output (`dn`). + + + Examples + -------- + >>> from river import linear_model + >>> import numpy as np + + >>> # Initialize the RLS filter with order 2, forgetting factor 0.98, and delta 1e6 + >>> rls = linear_model.RLS(p=2, forgetting_factor=0.98, delta=1e6) + + >>> # Simulate some data + >>> np.random.seed(42) + >>> num_samples = 100 + >>> x_data = np.sin(np.linspace(0, 10, num_samples)) # Input signal + >>> noise = np.random.normal(0, 0.1, num_samples) # Add some noise + >>> d_data = 0.5 * x_data + 0.3 + noise # Desired output + + >>> # Apply RLS algorithm + >>> for xn, dn in zip(x_data, d_data): + ... weights = rls.estimate(xn, dn) + >>> print("Final Weights:", rls.estimates[-1].flatten()) + Final Weights: [ 3.48065382 -6.15301727 3.3361416 ] + """ + + def __init__(self, p: int, forgetting_factor=0.99, delta=1000000): + """ + Initializes the Recursive Least Squares (RLS) filter. + + Parameters + ---------- + p : int + Filter order (number of coefficients). + forgetting_factor : float, optional + Forgetting factor (default is 0.99). + delta : float, optional + Initial value for the inverse correlation matrix (default is 1,000,000). + """ + self.p = p # Filter order + self.forgetting_factor = forgetting_factor # Forgetting factor + self.delta = delta # Value to initialise P(0) + + self.currentStep = 0 + + self.x = np.zeros((p + 1, 1)) # Column vector + self.P = np.identity(p + 1) * self.delta + + self.estimates = [] + self.estimates.append(np.zeros((p + 1, 1))) # Weight vector initialized to zeros + + self.Pks = [] + self.Pks.append(self.P) + + def estimate(self, xn: float, dn: float): + """ + Performs one iteration of the RLS algorithm to update filter coefficients. + + Parameters + ---------- + xn : float + The current input sample. + dn : float + The desired output corresponding to the current input. + + Returns + ------- + numpy.ndarray + Updated weight vector (filter coefficients) after the current iteration. + """ + # Update input vector + self.x = np.roll(self.x, -1) + self.x[-1, 0] = xn + + # Get previous weight vector + wn_prev = self.estimates[-1] + + # Compute gain vector + denominator = self.forgetting_factor + self.x.T @ self.Pks[-1] @ self.x + gn = (self.Pks[-1] @ self.x) / denominator + + # Compute a priori error + alpha = dn - (self.x.T @ wn_prev) + + # Update inverse correlation matrix + Pn = (self.Pks[-1] - gn @ self.x.T @ self.Pks[-1]) / self.forgetting_factor + self.Pks.append(Pn) + + # Update weight vector + wn = wn_prev + gn * alpha + self.estimates.append(wn) + + self.currentStep += 1 + + return wn