danon6868 · glitchheadgit · Feb 11, 2024 · Feb 11, 2024 · Mar 11, 2024 · Mar 11, 2024
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+lecture_5_ensemples/homework/data/churn.csv
+lecture_5_ensemples/homework/data/heart.csv
diff --git a/lecture_1_intro_knn/homework/KNN.ipynb b/lecture_1_intro_knn/homework/KNN.ipynb
diff --git a/lecture_1_intro_knn/homework/code/KNN.ipynb b/lecture_1_intro_knn/homework/code/KNN.ipynb
diff --git a/lecture_1_intro_knn/homework/knn.py → lecture_1_intro_knn/homework/code/knn.py b/lecture_1_intro_knn/homework/knn.py → lecture_1_intro_knn/homework/code/knn.py
@@ -5,20 +5,18 @@ class KNNClassifier:
     """
     K-neariest-neighbor classifier using L1 loss
     """
-    
+
     def __init__(self, k=1):
         self.k = k
-
 
     def fit(self, X, y):
         self.train_X = X
         self.train_y = y
 
-
     def predict(self, X, n_loops=0):
         """
         Uses the KNN model to predict clases for the data samples provided
-        
+
         Arguments:
         X, np array (num_samples, num_features) - samples to run
            through the model
@@ -28,38 +26,41 @@ def predict(self, X, n_loops=0):
         predictions, np array of ints (num_samples) - predicted class
            for each sample
         """
-        
+
         if n_loops == 0:
             distances = self.compute_distances_no_loops(X)
         elif n_loops == 1:
-            distances = self.compute_distances_one_loops(X)
+            distances = self.compute_distances_one_loop(X)
         else:
             distances = self.compute_distances_two_loops(X)
-        
+
         if len(np.unique(self.train_y)) == 2:
             return self.predict_labels_binary(distances)
         else:
             return self.predict_labels_multiclass(distances)
 
-
     def compute_distances_two_loops(self, X):
         """
         Computes L1 distance from every sample of X to every training sample
         Uses simplest implementation with 2 Python loops
 
         Arguments:
         X, np array (num_test_samples, num_features) - samples to run
-        
+
         Returns:
         distances, np array (num_test_samples, num_train_samples) - array
            with distances between each test and each train sample
         """
-
-        """
-        YOUR CODE IS HERE
-        """
-        pass
 
+        num_test = X.shape[0]
+        num_train = self.train_X.shape[0]
+        distances = np.zeros((num_test, num_train))
+
+        for i in range(num_test):
+            for j in range(num_train):
+                distances[i, j] = np.sum(np.abs(X[i] - self.train_X[j]))
+
+        return distances
 
     def compute_distances_one_loop(self, X):
         """
@@ -68,17 +69,19 @@ def compute_distances_one_loop(self, X):
 
         Arguments:
         X, np array (num_test_samples, num_features) - samples to run
-        
+
         Returns:
         distances, np array (num_test_samples, num_train_samples) - array
            with distances between each test and each train sample
         """
 
-        """
-        YOUR CODE IS HERE
-        """
-        pass
+        num_test = X.shape[0]
+        num_train = self.train_X.shape[0]
+        distances = np.zeros((num_test, num_train))
+        for i in range(num_test):
+            distances[i, :] = np.sum(np.abs(self.train_X - X[i]), axis=(1, 2))
 
+        return distances
 
     def compute_distances_no_loops(self, X):
         """
@@ -87,57 +90,65 @@ def compute_distances_no_loops(self, X):
 
         Arguments:
         X, np array (num_test_samples, num_features) - samples to run
-        
+
         Returns:
         distances, np array (num_test_samples, num_train_samples) - array
            with distances between each test and each train sample
         """
 
-        """
-        YOUR CODE IS HERE
-        """
-        pass
+        num_test = X.shape[0]
+        num_train = self.train_X.shape[0]
 
+        X_train = self.train_X.reshape(1, num_train, -1)
+        X = X.reshape(num_test, 1, -1)
+        distances = np.sum(np.abs(X - X_train), axis=2)
+
+        return distances
 
     def predict_labels_binary(self, distances):
         """
         Returns model predictions for binary classification case
-        
+
         Arguments:
         distances, np array (num_test_samples, num_train_samples) - array
            with distances between each test and each train sample
         Returns:
-        pred, np array of bool (num_test_samples) - binary predictions 
+        pred, np array of bool (num_test_samples) - binary predictions
            for every test sample
         """
 
-        n_train = distances.shape[1]
         n_test = distances.shape[0]
         prediction = np.zeros(n_test)
 
-        """
-        YOUR CODE IS HERE
-        """
-        pass
+        nearest_neighbours_idx = np.argpartition(distances, self.k, axis=1)
+        for sample in range(n_test):
+            prediction[sample] = (
+                self.train_y[nearest_neighbours_idx[sample, : self.k]].sum() / self.k
+            ) >= 0.5
 
+        return prediction
 
     def predict_labels_multiclass(self, distances):
         """
         Returns model predictions for multi-class classification case
-        
+
         Arguments:
         distances, np array (num_test_samples, num_train_samples) - array
            with distances between each test and each train sample
         Returns:
-        pred, np array of int (num_test_samples) - predicted class index 
+        pred, np array of int (num_test_samples) - predicted class index
            for every test sample
         """
 
-        n_train = distances.shape[0]
         n_test = distances.shape[0]
-        prediction = np.zeros(n_test, np.int)
+        prediction = np.zeros(n_test)
 
-        """
-        YOUR CODE IS HERE
-        """
-        pass
+        nearest_neighbours_idx = np.argpartition(distances, self.k, axis=1)
+        for sample in range(n_test):
+            prediction[sample] = np.bincount(
+                self.train_y.astype(int)[
+                    nearest_neighbours_idx[sample, : self.k]
+                ].flatten()
+            ).argmax()
+
+        return prediction
diff --git a/lecture_1_intro_knn/homework/code/metrics.py b/lecture_1_intro_knn/homework/code/metrics.py
@@ -0,0 +1,123 @@
+import numpy as np
+
+
+def binary_classification_metrics(y_pred, y_true):
+    """
+    Computes metrics for binary classification
+    Arguments:
+    y_pred, np array (num_samples) - model predictions
+    y_true, np array (num_samples) - true labels
+    Returns:
+    precision, recall, f1, accuracy - classification metrics
+    """
+
+    # TODO: implement metrics!
+    # Some helpful links:
+    # https://en.wikipedia.org/wiki/Precision_and_recall
+    # https://en.wikipedia.org/wiki/F1_score
+
+    y_true = y_true.squeeze()
+    tp = np.sum((y_pred == 1) & (y_true == 1))
+    tn = np.sum((y_pred == 0) & (y_true == 0))
+    fp = np.sum((y_pred == 1) & (y_true == 0))
+    fn = np.sum((y_pred == 0) & (y_true == 1))
+    try:
+        precision = tp / (tp + fp)
+    except ZeroDivisionError:
+        precision = None
+        print("Precision contains 0 division")
+    try:
+        recall = tp / (tp + fn)
+    except ZeroDivisionError:
+        recall = None
+        print("Recall contains 0 division")
+    try:
+        f1 = 2 * (precision * recall) / (precision + recall)
+    except ZeroDivisionError:
+        f1 = None
+        print("F1 contains 0 division")
+    try:
+        accuracy = (tp + tn) / (tp + tn + fn + fp)
+    except ZeroDivisionError:
+        accuracy = None
+        print("Accuracy calculations contain zero division")
+
+    return precision, recall, f1, accuracy
+
+
+def multiclass_accuracy(y_pred, y_true):
+    """
+    Computes metrics for multiclass classification
+    Arguments:
+    y_pred, np array of int (num_samples) - model predictions
+    y_true, np array of int (num_samples) - true labels
+    Returns:
+    accuracy - ratio of accurate predictions to total samples
+    """
+    tp, tn, fp, fn = 0, 0, 0, 0
+    y_true = y_true.astype(int).squeeze()
+    classes = np.unique(y_true)
+    for i in range(len(classes)):
+        tp += np.sum((y_pred == classes[i]) & (y_true == classes[i]))
+        tn += np.sum((y_pred != classes[i]) & (y_true != classes[i]))
+        fp += np.sum((y_pred == classes[i]) & (y_true != classes[i]))
+        fn += np.sum((y_pred != classes[i]) & (y_true == classes[i]))
+    try:
+        accuracy = (tp + tn) / (tp + tn + fp + fn)
+    except ZeroDivisionError:
+        accuracy = None
+        print("Accuracy calculations contain zero division")
+
+    return accuracy
+
+
+def r_squared(y_pred, y_true):
+    """
+    Computes r-squared for regression
+    Arguments:
+    y_pred, np array of int (num_samples) - model predictions
+    y_true, np array of int (num_samples) - true values
+    Returns:
+    r2 - r-squared value
+    """
+
+    y_mean = np.mean(y_true)
+    total_sum_squares = np.sum((y_true - y_mean) ** 2)
+    residual_sum_squares = np.sum((y_true - y_pred) ** 2)
+    r2 = 1 - (residual_sum_squares / total_sum_squares)
+
+    return r2
+
+
+def mse(y_pred, y_true):
+    """
+    Computes mean squared error
+    Arguments:
+    y_pred, np array of int (num_samples) - model predictions
+    y_true, np array of int (num_samples) - true values
+    Returns:
+    mse - mean squared error
+    """
+
+    """
+    YOUR CODE IS HERE
+    """
+
+    return ((y_true - y_pred) ** 2).mean()
+
+
+def mae(y_pred, y_true):
+    """
+    Computes mean absolut error
+    Arguments:
+    y_pred, np array of int (num_samples) - model predictions
+    y_true, np array of int (num_samples) - true values
+    Returns:
+    mae - mean absolut error
+    """
+
+    """
+    YOUR CODE IS HERE
+    """
+
+    return np.abs((y_true - y_pred)).mean()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		lecture_5_ensemples/homework/data/churn.csv
		lecture_5_ensemples/homework/data/heart.csv