发布于2019-08-15 12:07 阅读(644) 评论(0) 点赞(3) 收藏(2)
import numpy as np
def load_data(file_name):
f = open(file_name)
data = []
for line in f.readlines():
row = []
lines = line.strip().split(’\t’)
for x in lines:
row.append(float(x))
data.append(row)
f.close()
return np.mat(data)
def distance(vecA, vecB):
distance = (vecA - vecB) * (vecA - vecB).T
return distance
def randCent(data, k):
‘’’
随机初始化聚类中心
input: data(需要分类的数据,mat); k(类别个数,int)
output: 中心点的坐标
‘’’
n = np.shape(data)[1]
centroids = np.mat(np.zeros([k, n]))
for j in range(n):
rangeJ = np.max(data[:, j]) - np.min(data[:, j])
centroids[:, j] = np.min(data[:, j]) * np.mat(np.ones((k , 1)))
+ np.random.rand(k, 1) * rangeJ
return centroids
def kmeans(data, k, centroids):
‘’’
分类
input: data(需要分类的数据);k(类别个数); initial centroids
output: final centroids, subCenter
‘’’
m, n = np.shape(data) # m: 样本个数,n: 特征维数
subCenter = np.mat(np.zeros((m, n)))# 初始化每一个样本的分类结果,第一列记录类别,第二列记录与聚点的距离
change = True
while change:
for i in range(m):
change = False
minDist = np.inf
minIndex = 0
for j in range(k):
dist = distance(data[i, ], centroids[j, ])
if dist < minDist:
minDist = dist
minIndex = j
if subCenter[i, 0] != minIndex:
change = True
subCenter[i, ] = np.mat([minIndex, minDist])
# 分类完成后,重新计算聚类中心, 大循环的是种类
for j in range(k):
sum_all = np.mat(np.zeros((1, n)))
r = 0
for i in range(m):
if subCenter[i, 0] == j:
sum_all += data[i, 1]
r += 1
for z in range(n):
try:
centroids[j, z] = sum_all[0, z] / r
except:
print(‘r is zero’)
return subCenter, centroids
def save_model(file_name, source):
f = open(file_name, ‘w’)
m, n = np.shape(source)
for i in range(m):
temp = []
for j in range(n):
temp.append(str(source[i, j]))
f.write(’\t’.join(temp) + ‘\n’)
f.close()
if name == ‘main’:
k = 4
file_path = ‘kmeans.txt’
print(’---------------1. 载入数据---------------’)
data = load_data(file_path)
print(’---------------2. 随机初始化聚类中心---------------’)
init_centroids = randCent(data, k)
print(‘The centroids: %s’ % init_centroids)
print(’---------------3. 执行kmeans分类---------------’)
subcen, fin_centroids = kmeans(data, k, init_centroids)
print(’---------------4. 输出分类结果到文本---------------’)
save_model(‘sub.txt’, subcen)
save_model(‘center.txt’, fin_centroids)
作者:dfh8374
链接:https://www.pythonheidong.com/blog/article/36085/a3ec60896d669d7835ed/
来源:python黑洞网
任何形式的转载都请注明出处,如有侵权 一经发现 必将追究其法律责任
昵称:
评论内容:(最多支持255个字符)
---无人问津也好,技不如人也罢,你都要试着安静下来,去做自己该做的事,而不是让内心的烦躁、焦虑,坏掉你本来就不多的热情和定力
Copyright © 2018-2021 python黑洞网 All Rights Reserved 版权所有,并保留所有权利。 京ICP备18063182号-1
投诉与举报,广告合作请联系vgs_info@163.com或QQ3083709327
免责声明:网站文章均由用户上传,仅供读者学习交流使用,禁止用做商业用途。若文章涉及色情,反动,侵权等违法信息,请向我们举报,一经核实我们会立即删除!