forked from OmkarPathak/ScriptsDump
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathk_means_clusturing.py
55 lines (44 loc) · 1.95 KB
/
k_means_clusturing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import numpy as np
import os
def compute_euclidean_distance(point, centroid):
return np.sqrt(np.sum((point - centroid)**2))
def assign_label_cluster(distance, data_point, centroids):
index_of_minimum = min(distance, key=distance.get)
return [index_of_minimum, data_point, centroids[index_of_minimum]]
def compute_new_centroids(cluster_label, centroids):
return np.array(cluster_label + centroids)/2
def iterate_k_means(data_points, centroids, total_iteration):
label = []
cluster_label = []
total_points = len(data_points)
k = len(centroids)
for iteration in range(0, total_iteration):
for index_point in range(0, total_points):
distance = {}
for index_centroid in range(0, k):
distance[index_centroid] = compute_euclidean_distance(data_points[index_point], centroids[index_centroid])
label = assign_label_cluster(distance, data_points[index_point], centroids)
centroids[label[0]] = compute_new_centroids(label[1], centroids[label[0]])
if iteration == (total_iteration - 1):
cluster_label.append(label)
return [cluster_label, centroids]
def print_label_data(result):
print("Result of k-Means Clustering: \n")
for data in result[0]:
print("data point: {}".format(data[1]))
print("cluster number: {} \n".format(data[0]))
print("Last centroids position: \n {}".format(result[1]))
def create_centroids():
centroids = []
centroids.append([5.0, 0.0])
centroids.append([45.0, 70.0])
centroids.append([50.0, 90.0])
return np.array(centroids)
if __name__ == "__main__":
filename = os.path.dirname(__file__) + "\data.csv"
data_points = np.genfromtxt(filename, delimiter=",")
centroids = create_centroids()
total_iteration = 100
[cluster_label, new_centroids] = iterate_k_means(data_points, centroids, total_iteration)
print_label_data([cluster_label, new_centroids])
print()