| import random
|
| import math
|
| import sys
|
|
|
| def distance(x1, x2):
|
| return ((abs(x1[0] - x2[0]))**2 + (abs(x1[1] - x2[1])**2)) **(1/2)
|
|
|
| data_points = [(1,7),(1,11),(3,17),(7,18),(8,4),(8,12),(11,7),(12,14),(13,17),(16,11)]
|
|
|
|
|
| for K in range(1,len(data_points)+1):
|
| print("==============================")
|
| print("K: {0}".format(K))
|
|
|
|
|
| C = random.sample(data_points,K)
|
|
|
|
|
| iteration = 1000000
|
|
|
|
|
| min_decrease_sse = 1e-5
|
|
|
|
|
| previous_sse = float("inf")
|
| current_iteration = 0
|
|
|
| while(True):
|
| current_iteration +=1
|
| current_sse = 0.0
|
|
|
|
|
| cms_dict = {}
|
| for c in C:
|
| cms_dict[c] = []
|
|
|
| for x in data_points:
|
| max = float("inf")
|
| closest = ()
|
|
|
| for c in C:
|
| d= distance(x,c)
|
| if(d <= max):
|
| max = d
|
| closest = c
|
|
|
|
|
| cms_dict[closest].append(x)
|
|
|
|
|
| C = []
|
| for cm in cms_dict:
|
| cm_total_distance = 0.0
|
| new_c_x = 0.0
|
| new_c_y = 0.0
|
|
|
|
|
| for x in cms_dict[cm]:
|
| new_c_x += x[0]
|
| new_c_y += x[1]
|
| new_c_x /= len(cms_dict[cm])
|
| new_c_y /= len(cms_dict[cm])
|
| C.append((new_c_x,new_c_y))
|
|
|
|
|
| for x in cms_dict[cm]:
|
| cm_total_distance += distance(x,(new_c_x,new_c_y))**2
|
| current_sse += cm_total_distance
|
|
|
|
|
| if(previous_sse - current_sse <= min_decrease_sse or current_iteration > iteration):
|
| print("Final SSE: {0}".format(current_sse))
|
| print("Final Iteration: {0}".format(current_iteration))
|
| i = 0
|
| for cm in cms_dict:
|
| i+=1
|
| print("Cluster {0}: {1}".format(i,cms_dict[cm]))
|
| break
|
| else:
|
| print("Current SSE: {0}".format(current_sse))
|
| previous_sse = current_sse |