高维聚类结果可视化

利用sklearn包里的BIRCH算法,以iris数据集,聚类结果可视化html

代码以下算法

 

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
from sklearn.cluster import Birch
import urllib.request
import sys
from sklearn.manifold import TSNE
import pandas as pd
# import ybirch

with open("F:\Tabtad\Downloads\iris\Iris.txt", 'r') as f:#打开数据文件
    line = f.readlines()
# data = urllib.request .urlopen(target_url),encoding = 'utf-8'
xList = []
# labels = []

for data in line:
    # line = line.decode()
    row = data.strip().split(",")#切词,并将数据变成浮点形式
    row = list(map(float,row))

    # labels.append(row[0])
    del row[0]
    xList.append(row)

X = np.array(xList)#转为numpy的矩阵形式
print(xList)
print(X)
#未使用Birch以前的数据状况
plt.scatter(X[:,0], X[:,1],X[:,2], marker = 'o')
plt.show()
# print(labels)
nrow = len(xList)
ncol = len(xList[0])
print ("Number of Rows of Data = " + str(len(xList)) + '\n')
sys.stdout.write("Number of Columns of Data = " + str(len(xList[1])) + '\n')
#y = make_blobs(n_samples=150, n_features=4, cluster_std=[0.4, 0.3, 0.4, 0.3])
# #
#设置birch函数,训练函数
model = Birch(n_clusters = 3,threshold = 0.4)
y_pred = model.fit_predict(X)
#
print(y_pred)
# 输出标签下样本数目
r1 = pd.Series(model.labels_).value_counts()
print(r1)#统计各个类别的数目

# 绘图
plt.scatter(X[:,0], X[:,1],X[:,2], c= y_pred)
plt.show()
from sklearn import  metrics
print("Calinski_Harabasz Score",metrics.calinski_harabasz_score(X,y_pred))
# # help(Birch)
# # print(len(y_pred))