public WawaKMeans(double[][] data, int K)
{
_coordinates = data;
_coordCount = data.Length;
_k = K;
_clusters = new WawaCluster[K];
_clusterAssignments = new int[_coordCount];
_nearestCluster = new int[_coordCount];
_distanceCache = new double[_coordCount,data.Length];
InitRandom();
}
public void Start()
{
int iter = 0;
while (true)
{
Console.WriteLine("Iteration " + (iter++) + "...");
//1、重新计算每个聚类的均值
for (int i = 0; i < _k; i++)
{
_clusters[i].UpdateMean(_coordinates);
}
//2、计算每个数据和每个聚类中心的距离
for (int i = 0; i < _coordCount; i++)
{
for (int j = 0; j < _k; j++)
{
double dist = getDistance(_coordinates[i], _clusters[j].Mean);
_distanceCache[i,j] = dist;
}
}
//3、计算每个数据离哪个聚类最近
for (int i = 0; i < _coordCount; i++)
{
_nearestCluster[i] = nearestCluster(i);
}
//4、比较每个数据最近的聚类是否就是它所属的聚类
//如果全相等表示所有的点已经是最佳距离了,直接返回;
int k = 0;
for (int i = 0; i < _coordCount; i++)
{
if (_nearestCluster[i] == _clusterAssignments[i])
k++;
}
if (k == _coordCount)
break;
//5、否则需要重新调整资料点和群聚类的关系,调整完毕后再重新开始循环;
//需要修改每个聚类的成员和表示某个数据属于哪个聚类的变量
for (int j = 0; j < _k; j++)
{
_clusters[j].CurrentMembership.Clear();
}
for (int i = 0; i < _coordCount; i++)
{
_clusters[_nearestCluster[i]].CurrentMembership.Add(i);
_clusterAssignments[i] = _nearestCluster[i];
}
}
}
/// <summary>
/// 计算某个数据离哪个聚类最近
/// </summary>
/// <param name="ndx"></param>
/// <returns></returns>
int nearestCluster(int ndx)
{
int nearest = -1;
double min = Double.MaxValue;
for (int c = 0; c < _k; c++)
{
double d = _distanceCache[ndx,c];
if (d < min)
{
min = d;
nearest = c;
}