无监督学习之K-Means算法python实现

2022-08-04,,,,

1.定义

  • 监督学习:无明确的惩罚,以某种形式的回报激励决策,典型代表有K-Means聚类与主成分分析,无监督学习的目标是使得奖励最大化,被视为人工智能的实现方法。
  • K-Means聚类:将无标注的样本通过迭代聚类称为k个簇。

2.K-Means算法实现步骤

  1. 选定k个簇心(可随意)

  2. 分别计算每个样本到簇心距离,并进行归属

  3. 重新计算簇重心,更新k个簇心

  4. 反复迭代知道达到终止条件 

3.Python实现代码

import numpy as np
import matplotlib.pyplot as plt
import math
import os 

def gene_save_data(f):
    p1 = np.array([2,3])
    p2 = np.array([5,8])
    p3 = np.array([7,4])

    g = [1,1,1]
    g[0] = list(np.add(p1, np.random.uniform(-2, 2, (40,1,2))))
    g[1] = list(np.add(p2, np.random.uniform(-2, 2, (40,1,2))))
    g[2] = list(np.add(p3, np.random.uniform(-2, 2, (40,1,2))))

    for i in range(0,3):
        for j in range(0,40):
            f.write(str(g[i][j][0]))
            f.write(',')
        f.write('\n')

    return(0)
    
def draw_scatter(g1, g2, g3, h, num):
    # colors = [plt.cm.tab10(i/2.0) for i in range(3)]
    colors1 = ['c','g', 'm']#点的颜色
    colors2 = 'r'

    plt.figure(1)
    plt.subplot(2, 10, num+1)
    plt.scatter(x=[g1[j][0] for j in range(len(g1))], y= [float(g1[k][1]) for k in range(len(g1))], c='#AFEEEE', alpha=0.4)
    plt.scatter(x=[g2[j][0] for j in range(len(g2))], y= [float(g2[k][1]) for k in range(len(g2))], c='#98FB98', alpha=0.4)
    plt.scatter(x=[g3[j][0] for j in range(len(g3))], y= [float(g3[k][1]) for k in range(len(g3))], c='#EE82EE', alpha=0.4)
    plt.scatter(x=[h[j*2] for j in range(3)], y= [h[j*2+1] for j in range(3)], c=colors1, marker='o')
    plt.scatter(x=[2, 5, 7], y= [3, 8, 4], c=colors2, marker='x')
    # Decorations
    plt.gca().set(xlim=(-1.0, 12.0), ylim=(-1.0, 12.0))

    plt.xticks(fontsize=10); plt.yticks(fontsize=10)
    plt.title("no.%d clustering"%num, fontsize=8)

def distance(point, heart):
    dist = 0
    dist = math.sqrt((point[0]-heart[0])**2 + (point[1]-heart[1])**2)

    return dist

def list_string2float(g):
    for i in range(40):
        g[i][0] = float(g[i][0])
        g[i][1] = float(g[i][1])
    return(0)

def cal_heart(heart1, heart2, heart3, g1, g2, g3):
    heart1 = np.mean(np.array(g1), axis=0).tolist()
    # print(heart2)
    # print(g2)
    heart2 = np.mean(np.array(g2), axis=0).tolist()
    # print(heart2)
    heart3 = np.mean(np.array(g3), axis=0).tolist()
    return(heart1, heart2, heart3)


def belong_group(all, h1, h2, h3, g1, g2, g3):
    g1 *= 0
    g2 *= 0
    g3 *= 0
    for i in range(120):
        dis = [distance(all[i], h1), distance(all[i], h2), distance(all[i], h3)]
        ind = dis.index(min(dis))
        if(ind==0):
            g1.append(all[i])
        elif(ind==1):
            g2.append(all[i])
        else:
            g3.append(all[i])

    return(0)

def kmeans():
    #生成测试数据
    if(os.path.exists('data.txt') and os.path.getsize('data.txt')):
        f = open('data.txt', 'r')
        print('data already exist and not empty, pass!')
        pass
    else:
        f = open('data.txt','w')
        gene_save_data(f)
        f.close()
        f = open('data.txt', 'r')

    #选定簇点
    heart1 = input('输入第一个初始簇点,格式如: 1 1\n')
    heart1 = [float(heart1[0]), float(heart1[2])]
    heart2 = input('输入第二个初始簇点,格式如: 1 1\n')
    heart2 = [float(heart2[0]), float(heart2[2])]
    heart3 = input('输入第三个初始簇点,格式如: 1 1\n')
    heart3 = [float(heart3[0]), float(heart3[2])]

    #读取样本数据并进行归属
    lines = f.readlines()
    g1 = lines[0].strip().strip(',').split(',')
    g1 = [g1[i].strip('[').strip(']').strip().split( ) for i in range(40)]
    list_string2float(g1)
    g2 = lines[1].strip().strip(',').split(',')
    g2 = [g2[i].strip('[').strip(']').strip().split( ) for i in range(40)]
    list_string2float(g2)
    g3 = lines[2].strip().strip(',').split(',')
    g3 = [g3[i].strip('[').strip(']').strip().split( ) for i in range(40)]
    list_string2float(g3)

    f.close()
    
    draw_scatter(g1,g2,g3,(heart1+heart2+heart3), 0)

    all = g1 + g2 + g3
    for i in range(19):
        print('第%d次的簇点:'%(i+1))
        print(heart1, heart2, heart3)
        belong_group(all, heart1, heart2, heart3, g1, g2, g3)
        heart1, heart2, heart3 = cal_heart(heart1, heart2, heart3, g1, g2, g3)
        draw_scatter(g1,g2,g3,(heart1+heart2+heart3), i+1)
    plt.show()

if __name__ == '__main__':
    kmeans()

4.实现效果 

  •  控制台输出

  • 输出图像

本文地址:https://blog.csdn.net/weixin_46318945/article/details/107326010

《无监督学习之K-Means算法python实现.doc》

下载本文的Word格式文档,以方便收藏与打印。