Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Complete hw 5 #10

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
lecture_5_ensemples/homework/data/churn.csv
lecture_5_ensemples/homework/data/heart.csv
778 changes: 0 additions & 778 deletions lecture_1_intro_knn/homework/KNN.ipynb

This file was deleted.

1,554 changes: 1,554 additions & 0 deletions lecture_1_intro_knn/homework/code/KNN.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,18 @@ class KNNClassifier:
"""
K-neariest-neighbor classifier using L1 loss
"""

def __init__(self, k=1):
self.k = k


def fit(self, X, y):
self.train_X = X
self.train_y = y


def predict(self, X, n_loops=0):
"""
Uses the KNN model to predict clases for the data samples provided

Arguments:
X, np array (num_samples, num_features) - samples to run
through the model
Expand All @@ -28,38 +26,41 @@ def predict(self, X, n_loops=0):
predictions, np array of ints (num_samples) - predicted class
for each sample
"""

if n_loops == 0:
distances = self.compute_distances_no_loops(X)
elif n_loops == 1:
distances = self.compute_distances_one_loops(X)
distances = self.compute_distances_one_loop(X)
else:
distances = self.compute_distances_two_loops(X)

if len(np.unique(self.train_y)) == 2:
return self.predict_labels_binary(distances)
else:
return self.predict_labels_multiclass(distances)


def compute_distances_two_loops(self, X):
"""
Computes L1 distance from every sample of X to every training sample
Uses simplest implementation with 2 Python loops

Arguments:
X, np array (num_test_samples, num_features) - samples to run

Returns:
distances, np array (num_test_samples, num_train_samples) - array
with distances between each test and each train sample
"""

"""
YOUR CODE IS HERE
"""
pass

num_test = X.shape[0]
num_train = self.train_X.shape[0]
distances = np.zeros((num_test, num_train))

for i in range(num_test):
for j in range(num_train):
distances[i, j] = np.sum(np.abs(X[i] - self.train_X[j]))

return distances

def compute_distances_one_loop(self, X):
"""
Expand All @@ -68,17 +69,19 @@ def compute_distances_one_loop(self, X):

Arguments:
X, np array (num_test_samples, num_features) - samples to run

Returns:
distances, np array (num_test_samples, num_train_samples) - array
with distances between each test and each train sample
"""

"""
YOUR CODE IS HERE
"""
pass
num_test = X.shape[0]
num_train = self.train_X.shape[0]
distances = np.zeros((num_test, num_train))
for i in range(num_test):
distances[i, :] = np.sum(np.abs(self.train_X - X[i]), axis=(1, 2))

return distances

def compute_distances_no_loops(self, X):
"""
Expand All @@ -87,57 +90,65 @@ def compute_distances_no_loops(self, X):

Arguments:
X, np array (num_test_samples, num_features) - samples to run

Returns:
distances, np array (num_test_samples, num_train_samples) - array
with distances between each test and each train sample
"""

"""
YOUR CODE IS HERE
"""
pass
num_test = X.shape[0]
num_train = self.train_X.shape[0]

X_train = self.train_X.reshape(1, num_train, -1)
X = X.reshape(num_test, 1, -1)
distances = np.sum(np.abs(X - X_train), axis=2)

return distances

def predict_labels_binary(self, distances):
"""
Returns model predictions for binary classification case

Arguments:
distances, np array (num_test_samples, num_train_samples) - array
with distances between each test and each train sample
Returns:
pred, np array of bool (num_test_samples) - binary predictions
pred, np array of bool (num_test_samples) - binary predictions
for every test sample
"""

n_train = distances.shape[1]
n_test = distances.shape[0]
prediction = np.zeros(n_test)

"""
YOUR CODE IS HERE
"""
pass
nearest_neighbours_idx = np.argpartition(distances, self.k, axis=1)
for sample in range(n_test):
prediction[sample] = (
self.train_y[nearest_neighbours_idx[sample, : self.k]].sum() / self.k
) >= 0.5

return prediction

def predict_labels_multiclass(self, distances):
"""
Returns model predictions for multi-class classification case

Arguments:
distances, np array (num_test_samples, num_train_samples) - array
with distances between each test and each train sample
Returns:
pred, np array of int (num_test_samples) - predicted class index
pred, np array of int (num_test_samples) - predicted class index
for every test sample
"""

n_train = distances.shape[0]
n_test = distances.shape[0]
prediction = np.zeros(n_test, np.int)
prediction = np.zeros(n_test)

"""
YOUR CODE IS HERE
"""
pass
nearest_neighbours_idx = np.argpartition(distances, self.k, axis=1)
for sample in range(n_test):
prediction[sample] = np.bincount(
self.train_y.astype(int)[
nearest_neighbours_idx[sample, : self.k]
].flatten()
).argmax()

return prediction
123 changes: 123 additions & 0 deletions lecture_1_intro_knn/homework/code/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import numpy as np


def binary_classification_metrics(y_pred, y_true):
"""
Computes metrics for binary classification
Arguments:
y_pred, np array (num_samples) - model predictions
y_true, np array (num_samples) - true labels
Returns:
precision, recall, f1, accuracy - classification metrics
"""

# TODO: implement metrics!
# Some helpful links:
# https://en.wikipedia.org/wiki/Precision_and_recall
# https://en.wikipedia.org/wiki/F1_score

y_true = y_true.squeeze()
tp = np.sum((y_pred == 1) & (y_true == 1))
tn = np.sum((y_pred == 0) & (y_true == 0))
fp = np.sum((y_pred == 1) & (y_true == 0))
fn = np.sum((y_pred == 0) & (y_true == 1))
try:
precision = tp / (tp + fp)
except ZeroDivisionError:
precision = None
print("Precision contains 0 division")
try:
recall = tp / (tp + fn)
except ZeroDivisionError:
recall = None
print("Recall contains 0 division")
try:
f1 = 2 * (precision * recall) / (precision + recall)
except ZeroDivisionError:
f1 = None
print("F1 contains 0 division")
try:
accuracy = (tp + tn) / (tp + tn + fn + fp)
except ZeroDivisionError:
accuracy = None
print("Accuracy calculations contain zero division")

return precision, recall, f1, accuracy


def multiclass_accuracy(y_pred, y_true):
"""
Computes metrics for multiclass classification
Arguments:
y_pred, np array of int (num_samples) - model predictions
y_true, np array of int (num_samples) - true labels
Returns:
accuracy - ratio of accurate predictions to total samples
"""
tp, tn, fp, fn = 0, 0, 0, 0
y_true = y_true.astype(int).squeeze()
classes = np.unique(y_true)
for i in range(len(classes)):
tp += np.sum((y_pred == classes[i]) & (y_true == classes[i]))
tn += np.sum((y_pred != classes[i]) & (y_true != classes[i]))
fp += np.sum((y_pred == classes[i]) & (y_true != classes[i]))
fn += np.sum((y_pred != classes[i]) & (y_true == classes[i]))
try:
accuracy = (tp + tn) / (tp + tn + fp + fn)
except ZeroDivisionError:
accuracy = None
print("Accuracy calculations contain zero division")

return accuracy


def r_squared(y_pred, y_true):
"""
Computes r-squared for regression
Arguments:
y_pred, np array of int (num_samples) - model predictions
y_true, np array of int (num_samples) - true values
Returns:
r2 - r-squared value
"""

y_mean = np.mean(y_true)
total_sum_squares = np.sum((y_true - y_mean) ** 2)
residual_sum_squares = np.sum((y_true - y_pred) ** 2)
r2 = 1 - (residual_sum_squares / total_sum_squares)

return r2


def mse(y_pred, y_true):
"""
Computes mean squared error
Arguments:
y_pred, np array of int (num_samples) - model predictions
y_true, np array of int (num_samples) - true values
Returns:
mse - mean squared error
"""

"""
YOUR CODE IS HERE
"""

return ((y_true - y_pred) ** 2).mean()


def mae(y_pred, y_true):
"""
Computes mean absolut error
Arguments:
y_pred, np array of int (num_samples) - model predictions
y_true, np array of int (num_samples) - true values
Returns:
mae - mean absolut error
"""

"""
YOUR CODE IS HERE
"""

return np.abs((y_true - y_pred)).mean()
Loading