-
Notifications
You must be signed in to change notification settings - Fork 74
/
Copy pathplot_loss_function.py
164 lines (129 loc) · 5.44 KB
/
plot_loss_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""
Loss Function and Fast Linear
=============================
A couple of learners expose a parameter called *loss_function*
which affects the error the model tries to minimize.
Let's see on a short example how these parameters impact
the training. We will illustrate that in a case of a linear classifier.
The impact is higher for linear learners
in the case of outliers.
.. contents::
:local:
.. index:: regression, wine
Dummy data
----------
We prepare some dummy data. We need two classes quite
well separated. We choose a line and choose a class depending on
the side the point :math:`(x,y)` falls into.
"""
import matplotlib.pyplot as plt
import pandas
import numpy
import numpy.random as rand
def formula(x, y, e):
return x*2+y-1.75+e
N = 200
x = rand.rand(N)
y = rand.rand(N) * 2
e = (rand.rand(N)-0.5)
data = pandas.DataFrame(dict(x=x, y=y, line=formula(x, y, e)))
data["clas"] = data.line.apply(lambda z: 1 if z > 0 else 0).astype(float)
data = data.drop("line", axis=1).copy()
print(data.groupby("clas").count())
ax = data[data.clas==0].plot(x="x", y="y", color="red", label="clas=0", kind="scatter")
data[data.clas==1].plot(x="x", y="y", color="blue", label="clas=1", ax=ax, kind="scatter")
ax.plot([0, 1], [1.75, -0.25], "--")
ax.set_title("Initial cloud of points")
#########################################
# Learn a model
# -------------
#
# Let's see how a fast linear model is doing.
from microsoftml import rx_fast_linear, rx_predict
model = rx_fast_linear("clas ~ x + y", data=data)
pred = rx_predict(model, data, extra_vars_to_write=["x", "y"])
print(pred.head())
#####################################
# We plot the model decision by filling the background of the graph.
def plot_color_class(data, model, ax, fig, side=True):
X = data[["x", "y"]].as_matrix()
xmin, xmax = numpy.min(X[:, 0]), numpy.max(X[:, 0])
ymin, ymax = numpy.min(X[:, 1]), numpy.max(X[:, 1])
dx, dy = (xmax - xmin) / 10, (ymax - ymin) / 10
xmin -= dx
xmax += dx
ymin -= dy
ymax += dy
hx = (xmax - xmin) / 100
hy = (ymax - ymin) / 100
xx, yy = numpy.mgrid[xmin:xmax:hx, ymin:ymax:hy]
grid = numpy.c_[xx.ravel(), yy.ravel()]
dfgrid = pandas.DataFrame(data=grid, columns=["x", "y"])
probs = rx_predict(model, dfgrid).as_matrix()[:, 1].reshape(xx.shape)
contour = ax.contourf(xx, yy, probs, 25, cmap="RdBu", vmin=0, vmax=1)
if side:
ax_c = fig.colorbar(contour)
ax_c.set_label("$P(y = 1)$")
ax_c.set_ticks([0, .25, .5, .75, 1])
ax.set_xlim([xmin, xmax])
ax.set_ylim([ymin, ymax])
data[data.clas==0].plot(x="x", y="y", color="red", label="clas=0", kind="scatter", ax=ax)
data[data.clas==1].plot(x="x", y="y", color="blue", label="clas=1", ax=ax, kind="scatter")
ax.plot([0, 1], [1.75, -0.25], "--")
fig, ax = plt.subplots(1, 1, figsize=(7, 5))
plot_color_class(data, model, ax, fig)
ax.set_title("Initial cloud of points\nDefault settings for logisitic regression")
########################################
# Let's add outliers
# ------------------
#
# This problem was design to be linear. Let's
# make it more difficult by adding outlier far from
# the original linear separation.
xo = numpy.arange(0.3, 0.4, 0.02)
yo = [2.2] * len(xo)
claso = [0] * len(xo)
outlier = pandas.DataFrame(dict(x=xo, y=yo, clas=claso))
new_data = pandas.concat([data, outlier])
print(outlier.tail())
ax = new_data[new_data.clas==0].plot(x="x", y="y", color="red", label="clas=0", kind="scatter")
new_data[new_data.clas==1].plot(x="x", y="y", color="blue", label="clas=1", ax=ax, kind="scatter")
ax.plot([0, 1], [1.75, -0.25], "--")
ax.set_title("Cloud of points with outliers")
################################
# This is obviously outliers. Let's see how the model
# is behaving on those.
model = rx_fast_linear("clas ~ x + y", data=new_data)
pred = rx_predict(model, new_data, extra_vars_to_write=["x", "y"])
fig, ax = plt.subplots(1, 1, figsize=(7, 5))
plot_color_class(new_data, model, ax, fig)
ax.set_title("Cloud of points with outliers\nDefault settings for fast linear")
##########################
# The prediction is significantly impacted
# by the new points. We switch to another loss function
# :epkg:`microsoftml.hinge_loss`.
# It is linear error and not a log loss anymore.
# It is less sensitive to high values
# and as a consequence less sensitive to outliers.
from microsoftml import hinge_loss
consts = [0, 0.1, 1, 10]
fig, ax = plt.subplots(2, len(consts) // 2, figsize=(15, 15))
for i, const in enumerate(consts):
a = ax[i // 2, i % 2]
model = rx_fast_linear("clas ~ x + y", data=new_data,
loss_function=hinge_loss(const))
pred = rx_predict(model, new_data, extra_vars_to_write=["x", "y"])
plot_color_class(new_data, model, a, fig, side=False)
a.set_title("Cloud of points with outliers\Hinge Loss\nmargin={0}".format(const))
##########################
# We can also use :epkg:`microsoftml.smoothed_hinge_loss`.
from microsoftml import smoothed_hinge_loss
consts = [0, 0.1, 1, 10]
fig, ax = plt.subplots(2, len(consts) // 2, figsize=(15, 15))
for i, smooth_const in enumerate(consts):
a = ax[i // 2, i % 2]
model = rx_fast_linear("clas ~ x + y", data=new_data,
loss_function=smoothed_hinge_loss(smooth_const))
pred = rx_predict(model, new_data, extra_vars_to_write=["x", "y"])
plot_color_class(new_data, model, a, fig, side=False)
a.set_title("Cloud of points with outliers\nSmooth Hinge Loss\nconst={0}".format(smooth_const))