forked from microsoft/ML-Server-Python-Samples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_iris.py
126 lines (104 loc) · 3.88 KB
/
plot_iris.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# -*- coding: utf-8 -*-
"""
Iris Dataset - multi-class classification problem
=================================================
The `Iris DataSet <http://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html>`_
became quite popular in machine learning. It is a short and simple classification
problem with three classes. We first see how to handle it with
:epkg:`scikit-learn` and then with :epkg:`MicrosoftML`.
.. contents::
:local:
.. index:: classification, multi-class, iris
"""
########################################
#
# scikit-learn
# ------------
#
###########################################
# We retrieves the data with scikit-learn.
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
import sphinx_gallery
iris = datasets.load_iris()
X = iris.data[:, :2]
Y = iris.target
###########################################
# We train a logistic regression model
# with :epkg:`scikit-learn`.
# Training.
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
logreg.fit(X, Y)
###########################################
# We compute the predictions on a grid.
h = .02
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
gridX = np.c_[xx.ravel(), yy.ravel()]
###########################
# We run the predictions on this grid.
grid = logreg.predict(gridX)
######################
# We plot the predictions.
zgrid = grid.reshape(xx.shape)
plt.figure(figsize=(4, 3))
plt.pcolormesh(xx, yy, zgrid, cmap=plt.cm.Paired)
plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())
plt.title("scikit-learn")
########################################
#
# microsoftml
# -----------
#
# We use :epkg:`microsoftml` to do the same.
# The main difference is the features and the label
# must appear in a same dataframe. A formula specifies
# which role the column play during the training.
# We also modify the label type which must be
# a boolean, a float or a category for this kind of problem.
import pandas
df = pandas.DataFrame(data=X, columns=["X1", "X2"])
df["Label"] = Y.astype(float)
###########################################################################
# :epkg:`microsoftml` must be told it is a multi-class classification problem.
# It may seem a regression compare to :epkg:`scikit-learn`.
# However because :epkg:`microsoftml` can deal with out-of-memory datasets,
# the third class could appear at the end of the training dataset.
# The parameter *verbose* can take values into 0, 1, 2.
# If > 0, :epkg:`microsoftml` displays information about the training
# on the standard output.
from microsoftml import rx_logistic_regression, rx_predict
logregml = rx_logistic_regression("Label ~ X1 + X2", data=df, method="multiClass", verbose=1)
###################################
# We convert the grid (numpy array) into a dataframe.
dfgrid = pandas.DataFrame(data=gridX, columns=["X1", "X2"])
gridml = rx_predict(logregml, dfgrid)
##################################
# :epkg:`microsoftml` returns three scores.
print(gridml.head(n=3))
##################################
# We need to pick the best one.
predicted_classes = np.argmax(gridml.as_matrix(), axis=1)
#####################
# We plot.
zgrid = predicted_classes.reshape(xx.shape)
plt.figure(figsize=(4, 3))
plt.pcolormesh(xx, yy, zgrid, cmap=plt.cm.Paired)
plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())
plt.title("microsoftml")
# plt.show()