Skip to content

Commit b417b4d

Browse files
committed
KMeans improvement
- fixed returned compactness value - added centers drawing to the example app - added compactness test
1 parent 74defef commit b417b4d

File tree

3 files changed

+68
-28
lines changed

3 files changed

+68
-28
lines changed

modules/core/src/kmeans.cpp

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,13 @@ class KMeansDistanceComputer : public ParallelLoopBody
165165
KMeansDistanceComputer( double *_distances,
166166
int *_labels,
167167
const Mat& _data,
168-
const Mat& _centers )
168+
const Mat& _centers,
169+
bool _onlyDistance = false )
169170
: distances(_distances),
170171
labels(_labels),
171172
data(_data),
172-
centers(_centers)
173+
centers(_centers),
174+
onlyDistance(_onlyDistance)
173175
{
174176
}
175177

@@ -183,6 +185,12 @@ class KMeansDistanceComputer : public ParallelLoopBody
183185
for( int i = begin; i<end; ++i)
184186
{
185187
const float *sample = data.ptr<float>(i);
188+
if (onlyDistance)
189+
{
190+
const float* center = centers.ptr<float>(labels[i]);
191+
distances[i] = normL2Sqr(sample, center, dims);
192+
continue;
193+
}
186194
int k_best = 0;
187195
double min_dist = DBL_MAX;
188196

@@ -210,6 +218,7 @@ class KMeansDistanceComputer : public ParallelLoopBody
210218
int *labels;
211219
const Mat& data;
212220
const Mat& centers;
221+
bool onlyDistance;
213222
};
214223

215224
}
@@ -259,6 +268,7 @@ double cv::kmeans( InputArray _data, int K,
259268
Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type);
260269
std::vector<int> counters(K);
261270
std::vector<Vec2f> _box(dims);
271+
Mat dists(1, N, CV_64F);
262272
Vec2f* box = &_box[0];
263273
double best_compactness = DBL_MAX, compactness = 0;
264274
RNG& rng = theRNG();
@@ -430,19 +440,16 @@ double cv::kmeans( InputArray _data, int K,
430440
}
431441
}
432442

433-
if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon )
434-
break;
443+
bool isLastIter = (++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon);
435444

436445
// assign labels
437-
Mat dists(1, N, CV_64F);
446+
dists = 0;
438447
double* dist = dists.ptr<double>(0);
439-
parallel_for_(Range(0, N),
440-
KMeansDistanceComputer(dist, labels, data, centers));
441-
compactness = 0;
442-
for( i = 0; i < N; i++ )
443-
{
444-
compactness += dist[i];
445-
}
448+
parallel_for_(Range(0, N), KMeansDistanceComputer(dist, labels, data, centers, isLastIter));
449+
compactness = sum(dists)[0];
450+
451+
if (isLastIter)
452+
break;
446453
}
447454

448455
if( compactness < best_compactness )

modules/core/test/test_math.cpp

Lines changed: 42 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2748,21 +2748,23 @@ class CV_KMeansSingularTest : public cvtest::BaseTest
27482748
protected:
27492749
void run(int inVariant)
27502750
{
2751+
RNG& rng = ts->get_rng();
27512752
int i, iter = 0, N = 0, N0 = 0, K = 0, dims = 0;
27522753
Mat labels;
2753-
try
2754+
27542755
{
2755-
RNG& rng = theRNG();
27562756
const int MAX_DIM=5;
27572757
int MAX_POINTS = 100, maxIter = 100;
27582758
for( iter = 0; iter < maxIter; iter++ )
27592759
{
27602760
ts->update_context(this, iter, true);
27612761
dims = rng.uniform(inVariant == MAT_1_N_CDIM ? 2 : 1, MAX_DIM+1);
2762-
N = rng.uniform(1, MAX_POINTS+1);
2762+
N = rng.uniform(2, MAX_POINTS+1);
27632763
N0 = rng.uniform(1, MAX(N/10, 2));
27642764
K = rng.uniform(1, N+1);
27652765

2766+
Mat centers;
2767+
27662768
if (inVariant == VECTOR)
27672769
{
27682770
dims = 2;
@@ -2775,7 +2777,7 @@ class CV_KMeansSingularTest : public cvtest::BaseTest
27752777
data[i] = data0[rng.uniform(0, N0)];
27762778

27772779
kmeans(data, K, labels, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 30, 0),
2778-
5, KMEANS_PP_CENTERS);
2780+
5, KMEANS_PP_CENTERS, centers);
27792781
}
27802782
else
27812783
{
@@ -2820,28 +2822,24 @@ class CV_KMeansSingularTest : public cvtest::BaseTest
28202822
}
28212823

28222824
kmeans(data, K, labels, TermCriteria(TermCriteria::MAX_ITER+TermCriteria::EPS, 30, 0),
2823-
5, KMEANS_PP_CENTERS);
2825+
5, KMEANS_PP_CENTERS, centers);
28242826
}
28252827

2828+
ASSERT_EQ(centers.rows, K);
2829+
ASSERT_EQ(labels.rows, N);
2830+
28262831
Mat hist(K, 1, CV_32S, Scalar(0));
28272832
for( i = 0; i < N; i++ )
28282833
{
28292834
int l = labels.at<int>(i);
2830-
CV_Assert(0 <= l && l < K);
2835+
ASSERT_GE(l, 0);
2836+
ASSERT_LT(l, K);
28312837
hist.at<int>(l)++;
28322838
}
28332839
for( i = 0; i < K; i++ )
2834-
CV_Assert( hist.at<int>(i) != 0 );
2840+
ASSERT_GT(hist.at<int>(i), 0);
28352841
}
28362842
}
2837-
catch(...)
2838-
{
2839-
ts->printf(cvtest::TS::LOG,
2840-
"context: iteration=%d, N=%d, N0=%d, K=%d\n",
2841-
iter, N, N0, K);
2842-
std::cout << labels << std::endl;
2843-
ts->set_failed_test_info(cvtest::TS::FAIL_MISMATCH);
2844-
}
28452843
}
28462844
};
28472845

@@ -2859,6 +2857,35 @@ TEST_P(Core_KMeans_InputVariants, singular)
28592857

28602858
INSTANTIATE_TEST_CASE_P(AllVariants, Core_KMeans_InputVariants, KMeansInputVariant::all());
28612859

2860+
TEST(Core_KMeans, compactness)
2861+
{
2862+
const int N = 1024;
2863+
const int attempts = 4;
2864+
const TermCriteria crit = TermCriteria(TermCriteria::COUNT, 5, 0); // low number of iterations
2865+
cvtest::TS& ts = *cvtest::TS::ptr();
2866+
for (int K = 1; K <= N; K *= 2)
2867+
{
2868+
Mat data(N, 1, CV_32FC2);
2869+
cvtest::randUni(ts.get_rng(), data, Scalar(-200, -200), Scalar(200, 200));
2870+
Mat labels, centers;
2871+
double compactness = kmeans(data, K, labels, crit, attempts, KMEANS_PP_CENTERS, centers);
2872+
centers = centers.reshape(2);
2873+
EXPECT_EQ(labels.rows, N);
2874+
EXPECT_EQ(centers.rows, K);
2875+
EXPECT_GE(compactness, 0.0);
2876+
double expected = 0.0;
2877+
for (int i = 0; i < N; ++i)
2878+
{
2879+
int l = labels.at<int>(i);
2880+
Point2f d = data.at<Point2f>(i) - centers.at<Point2f>(l);
2881+
expected += d.x * d.x + d.y * d.y;
2882+
}
2883+
EXPECT_NEAR(expected, compactness, expected * 1e-8);
2884+
if (K == N)
2885+
EXPECT_DOUBLE_EQ(compactness, 0.0);
2886+
}
2887+
}
2888+
28622889
TEST(CovariationMatrixVectorOfMat, accuracy)
28632890
{
28642891
unsigned int col_problem_size = 8, row_problem_size = 8, vector_size = 16;

samples/cpp/kmeans.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ int main( int /*argc*/, char** /*argv*/ )
5353

5454
randShuffle(points, 1, &rng);
5555

56-
kmeans(points, clusterCount, labels,
56+
double compactness = kmeans(points, clusterCount, labels,
5757
TermCriteria( TermCriteria::EPS+TermCriteria::COUNT, 10, 1.0),
5858
3, KMEANS_PP_CENTERS, centers);
5959

@@ -65,6 +65,12 @@ int main( int /*argc*/, char** /*argv*/ )
6565
Point ipt = points.at<Point2f>(i);
6666
circle( img, ipt, 2, colorTab[clusterIdx], FILLED, LINE_AA );
6767
}
68+
for (i = 0; i < centers.rows; ++i)
69+
{
70+
Point2f c = centers.at<Point2f>(i);
71+
circle( img, c, 40, colorTab[i], 1, LINE_AA );
72+
}
73+
cout << "Compactness: " << compactness << endl;
6874

6975
imshow("clusters", img);
7076

0 commit comments

Comments
 (0)