-
Notifications
You must be signed in to change notification settings - Fork 105
/
Copy pathkernelizedSVM_pytorch.py
208 lines (183 loc) · 9.29 KB
/
kernelizedSVM_pytorch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 24 01:56:39 2023
A (for demonstration only) implementatin of both linear (weight-based) and kernelized SVM in PyTorch
Inspired from: https://gist.github.com/mlaves/c98cd4e6bcb9dbd4d0c03b34bacb0f65
@author: Fayyaz Minhas
"""
#%% Just some visualization functions (you can ignore)
import numpy as np
from sklearn.svm import LinearSVC as Classifier
import numpy as np
import matplotlib.pyplot as plt
import itertools
from numpy.random import randn #importing randn
import numpy as np #importing numpy
import matplotlib.pyplot as plt #importing plotting module
import itertools
import warnings
def plotit(X,Y=None,clf=None, conts = None, ccolors = ('b','k','r'), colors = ('c','y'), markers = ('s','o'), hold = False, transform = None,extent = None,**kwargs):
"""
A function for showing data scatter plot and classification boundary
of a classifier for 2D data
X: nxd matrix of data points
Y: (optional) n vector of class labels
clf: (optional) classification/discriminant function handle
conts: (optional) contours (if None, contours are drawn for each class boundary)
ccolors: (optional) colors for contours
colors: (optional) colors for each class (sorted wrt class id)
can be 'scaled' or 'random' or a list/tuple of color ids
markers: (optional) markers for each class (sorted wrt class id)
hold: Whether to hold the plot or not for overlay (default: False).
transform: (optional) a function handle for transforming data before passing to clf
kwargs: any keyword arguments to be passed to clf (if any)
"""
if clf is not None and X.shape[1]!=2:
warnings.warn("Data Dimensionality is not 2. Unable to plot.")
return
if markers is None:
markers = ('.',)
eps=1e-6
d0,d1 = (0,1)
if extent is None:
minx, maxx = np.min(X[:,d0])-eps, np.max(X[:,d0])+eps
miny, maxy = np.min(X[:,d1])-eps, np.max(X[:,d1])+eps
extent = [minx,maxx,miny,maxy]
else:
[minx,maxx,miny,maxy] = extent
if Y is not None:
classes = sorted(set(Y))
if conts is None:
conts = list(classes)
vmin,vmax = classes[0]-eps,classes[-1]+eps
else:
vmin,vmax=-2-eps,2+eps
if conts is None:
conts = sorted([-1+eps,0,1-eps])
if clf is not None:
npts = 150
x = np.linspace(minx,maxx,npts)
y = np.linspace(miny,maxy,npts)
t = np.array(list(itertools.product(x,y)))
if transform is not None:
t = transform(t)
z = clf(t,**kwargs)
z = np.reshape(z,(npts,npts)).T
plt.contour(x,y,z,conts,linewidths = [2],colors=ccolors,extent=extent, label='f(x)=0')
#plt.imshow(np.flipud(z), extent = extent, cmap=plt.cm.Purples, vmin = -2, vmax = +2); plt.colorbar()
plt.pcolormesh(x, y, z,cmap=plt.cm.Purples,vmin=vmin,vmax=vmax);plt.colorbar()
plt.axis(extent)
if Y is not None:
for i,y in enumerate(classes):
if colors is None or colors=='scaled':
cc = np.array([[i,i,i]])/float(len(classes))
elif colors =='random':
cc = np.array([[np.random.rand(),np.random.rand(),np.random.rand()]])
else:
cc = colors[i%len(colors)]
mm = markers[i%len(markers)]
plt.scatter(X[Y==y,d0],X[Y==y,d1], marker = mm,c = cc, s = 30)
else:
plt.scatter(X[:,d0],X[:,d1],marker = markers[0], c = 'k', s = 5)
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')
if not hold:
plt.grid()
plt.show()
return extent
#%% Importing torch and setting up functions for GPU access and transfer
import torch
USE_CUDA = False # torch.cuda.is_available() #can enable GPU execution
from torch.autograd import Variable
def cuda(v):
if USE_CUDA:
return v.cuda()
return v
def toTensor(v,dtype = torch.float,requires_grad = False):
return cuda(Variable(torch.tensor(v)).type(dtype).requires_grad_(requires_grad))
def toNumpy(v):
if USE_CUDA:
return v.detach().cpu().numpy()
return v.detach().numpy()
print('Using CUDA:',USE_CUDA)
#%% This is the implementation of Both Linear (weight based) and Kernelized SVM
def hinge_loss(x, y): #implements (1/N)*sum_{i=1}^N max(0,1-y_i*x_i)
return torch.max(torch.zeros_like(y), 1-y*x).mean()
class KernelSVM(torch.nn.Module):
def __init__(self, train_data_x, kernel='poly', degree = 2, gamma = 1.0):
super().__init__()
assert kernel in ['linear', 'rbf','poly']
self._train_data_x = train_data_x #save training data
self.kernel = kernel #kernel string
self._n_trainable = x.size(0) #by default we set the number of trainable parameters to the number of training examples for the kernelized implementation
self._gamma = torch.nn.Parameter(torch.FloatTensor([gamma]),
requires_grad=True) #set this to true if you want to train the gamma (see poly and rbf)
self.degree = degree #save degree of polynomial kernel
#note both gamma and degree remain unused if kernel is 'linear'
if kernel == 'linear':
self._kernel = self.linear
self._n_trainable = x.shape[1]
elif kernel == 'poly':
self._kernel = self.poly
elif kernel == 'rbf':
self._kernel = self.rbf
else: #not implemented
assert False
self._walpha = torch.nn.Linear(in_features=self._n_trainable, out_features=1,bias = True) #most important line
# _walpha is a _trainable+1 dimensional dimensional vector of trainable parameters (weights (for linear SVM) or alphas (for kernelized SVM)) and the bias term
def poly(self,x): #implements k(xa,xb)=(gamma+xa.T*xb)^degree in vectorized code
return (self._gamma+x@self._train_data_x.T)**self.degree
def rbf(self, x): #implements k(xa,xb)=exp(-gamma*||xa-xb||^2) in vectorized code
d = torch.cdist(x,self._train_data_x)
return torch.exp(-self._gamma*d**2)
def getTrainingKernelMatrix(self):#return NxN Kernel matrix with K_ij = k(xi,xj)
if self.kernel !='linear':
return self._kernel(self._train_data_x)
else:
return self._train_data_x@self._train_data_x.T
def getRegularizationPenalty(self): #return regulrization penalty
if self.kernel!='linear': #implements alphas.T*K*alphas = sum_{i=1}^N sum_{j=1}^N alpha_i * K(xi,xj) * alpha_j for kernelized SVM in vectorized code
alphas = self._walpha.weight.T
K = self.getTrainingKernelMatrix()
return alphas.T@K@alphas
else:
w = self._walpha.weight.T #implements w.T*w for linear SVM
return w.T@w
@staticmethod
def linear(x): #note this is not a kernel function per se
return x
def forward(self, x): #returns f(x) ## REPRESENTATION
k = self._kernel(x) #for a single example x in each row: for kernelized SVM this would be k(x,:)[k(x,x_1), k(x,x_2),...,k(x,x_N)] whereas for linear SVM this would simply be x
z = self._walpha(k) #for linear SVM this would be f(x) = w.T@x+b whereas for kernelized SVM this would be f(x) = b+alphas.T*k(x,:) = b + sum_{i=1}^N alpha_i * k(x,x_i)
return z #return f(x)
if __name__=='__main__':
#loading data
#x0 = np.array([[0,0],[0,1],[1,0],[1,1]]); y0 = np.array([-1,-1,-1,1]) #AND
x0 = np.array([[0,0],[0,1],[1,0],[1,1]]); y0 = np.array([-1,1,1,-1]) #XOR
#from sklearn.datasets import make_moons; x0, y0 = make_moons(20, noise=0.1); y0[np.where(y0==0)] = -1 #Moons
#portint data torch
x = toTensor(x0)
y = toTensor(y0)
#creating SVM
model = KernelSVM(x,kernel='rbf',degree = 4, gamma = 1)
model=cuda(model)
C = 100 #C Hyperparameter in SVM formulation
optimizer = torch.optim.Adam(model.parameters(), lr=0.01) #optimizer stochastic gradient descent (however because we are not picking examples randomly this is gradient descent)
History = [] #Just for plotting the convergence
for i in range(10000): #1000 iterations should be good enough
z = model(x) #calculate f(x) #Representation
avg_loss = hinge_loss(z, y.unsqueeze(1)) #calculate error #Evaluation
reg_penalty = model.getRegularizationPenalty() #calculate penalty term for regularization
objective = 0.5*reg_penalty+C*avg_loss #Implement Structural Risk Minimization Objective for SVM
#optimization steps that will update alpha for kernelized SVM and weighths for linear one
History.append((reg_penalty.item(),avg_loss.item(),objective.item()))
optimizer.zero_grad()
objective.backward()
optimizer.step()
#just plotting
plt.figure();plt.plot(np.array(History)); plt.legend(['Regularization penalty','Average Loss','Overall Objective']);plt.xlabel('Iterations')
plt.figure()
plotit(x0,y0,clf = lambda x0: toNumpy(model(toTensor(x0))) ,conts=[0],extent=[-1.5,+1.5,-1.5,+2])
print("Final Objective Value", objective.item())
print("Final weights or alphas:",model._walpha.weight)