TPCCLIB
Loading...
Searching...
No Matches
fcmc.c File Reference

Fuzzy C means clustering algorithm. More...

#include "tpcclibConfig.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>
#include "tpcfcmc.h"

Go to the source code of this file.

Functions

void fcmcInit (FCMC *fcmc)
 
void fcmcFree (FCMC *fcmc)
 
int fcmcAllocate (FCMC *fcmc, unsigned int sampleNr, unsigned int dimNr, unsigned int clusterNr)
 
void fcmcPrint (FCMC *fcmc, FILE *fp)
 
double fcmcEuclideanDistances (FCMC *fcmc, unsigned int si, unsigned int ci)
 
int fcmcClusterInitialize (FCMC *fcmc, const int cinit, int verbose)
 
int fcmclustering (FCMC *fcmc, const int cinit, int verbose)
 

Detailed Description

Fuzzy C means clustering algorithm.

Definition in file fcmc.c.

Function Documentation

◆ fcmcAllocate()

int fcmcAllocate ( FCMC * fcmc,
unsigned int sampleNr,
unsigned int dimNr,
unsigned int clusterNr )

Free memory allocated for FCMC data. All contents are destroyed.

See also
fcmcInit, fcmcFree
Author
Vesa Oikonen
Returns
Returns 0 if ok.
Parameters
fcmcPointer to FCMC struct; any pre-existing contents are deleted.
Precondition
Before first use initialize the FCMC struct with fcmcInit().
Postcondition
After last use free allocated memory with fcmcFree().
Parameters
sampleNrNumber of data samples.
dimNrNumber of dimensions for each sample.
clusterNrNumber of clusters.

Definition at line 69 of file fcmc.c.

81 {
82 if(fcmc==NULL) return(1);
83 if(sampleNr<1) return(2);
84 if(dimNr<1) return(3);
85 if(clusterNr<1) return(4);
86 fcmcFree(fcmc);
87
88 /* Allocate memory for the sample data */
89 fcmc->d=(double**)malloc(sampleNr*sizeof(double*));
90 if(fcmc->d==NULL) return(11);
91 for(unsigned i=0; i<sampleNr; i++) {
92 fcmc->d[i]=(double*)malloc(dimNr*sizeof(double));
93 if(fcmc->d[i]==NULL) {
94 for(unsigned j=0; j<i; j++) free(fcmc->d[j]);
95 free(fcmc->d);
96 return(12);
97 }
98 }
99 fcmc->sampleNr=sampleNr;
100 fcmc->dimNr=dimNr;
101
102 /* Allocate memory for the cluster data */
103 fcmc->cc=(double**)malloc(clusterNr*sizeof(double*));
104 if(fcmc->cc==NULL) {
105 fcmcFree(fcmc);
106 return(21);
107 }
108 for(unsigned i=0; i<clusterNr; i++) {
109 fcmc->cc[i]=(double*)malloc(dimNr*sizeof(double));
110 if(fcmc->cc[i]==NULL) {
111 for(unsigned j=0; j<i; j++) free(fcmc->cc[j]);
112 free(fcmc->cc);
113 fcmcFree(fcmc);
114 return(22);
115 }
116 }
117 fcmc->clusterNr=clusterNr;
118
119 /* Allocate memory for the selected cluster for each sample */
120 fcmc->sc=(unsigned int*)malloc(sampleNr*sizeof(unsigned int));
121 if(fcmc->sc==NULL) {
122 fcmcFree(fcmc);
123 return(31);
124 }
125
126 return(0);
127}
void fcmcFree(FCMC *fcmc)
Definition fcmc.c:43
double ** d
Definition tpcfcmc.h:39
unsigned int * sc
Definition tpcfcmc.h:43
unsigned int dimNr
Definition tpcfcmc.h:34
double ** cc
Definition tpcfcmc.h:41
unsigned int clusterNr
Definition tpcfcmc.h:36
unsigned int sampleNr
Definition tpcfcmc.h:32

◆ fcmcClusterInitialize()

int fcmcClusterInitialize ( FCMC * fcmc,
const int cinit,
int verbose )

Initialize cluster centres. You probably want to replace this function with one tailored for your data; this works only if sample numbers are reasonably well balanced between clusters.

Reference: Yedla M, Pathakota SR, Srinivasa TM. Enhancing K-means clustering algorithm with improved initial center. Int J Comp Sci Inform Technol. 2010;1(2): 121-125.

See also
fcmclustering, fcmcInit, fcmcFree
Returns
Returns 0 if ok.
Parameters
fcmcPointer to FCMC struct.
cinitInitialization method:
  • 0 and 1: sample distances to sample means,
  • 2: sample distances to zero.
verboseVerbose level; if zero, then nothing is printed to stderr or stdout.

Definition at line 220 of file fcmc.c.

230 {
231 if(verbose>0) {printf("fcmcClusterInitialize(*FCMC, %d)\n", cinit); fflush(stdout);}
232 if(fcmc==NULL) return(1);
233 if(fcmc->sampleNr<1 || fcmc->dimNr<1 || fcmc->clusterNr<1) return(1);
234
235 double mns[fcmc->dimNr];
236 if(cinit==2) {
237 for(unsigned int di=0; di<fcmc->dimNr; di++) mns[di]=0.0;
238 } else { // default
239 /* Calculate sample means in each dimension */
240 for(unsigned int di=0; di<fcmc->dimNr; di++) mns[di]=0.0;
241 for(unsigned int si=0; si<fcmc->sampleNr; si++)
242 for(unsigned int di=0; di<fcmc->dimNr; di++)
243 mns[di]+=fcmc->d[si][di];
244 for(unsigned int di=0; di<fcmc->dimNr; di++) mns[di]/=(double)fcmc->sampleNr;
245 if(verbose>2) {
246 printf("\nmeans:\n");
247 for(unsigned int di=0; di<fcmc->dimNr; di++) printf(" %g\n", mns[di]);
248 }
249 }
250
251 /* Calculate Euclidean distances of samples to the above set 'cluster centres' */
252 FCMC_ED *ed=malloc(fcmc->sampleNr*sizeof(FCMC_ED));
253 if(ed==NULL) return(10);
254 double v;
255 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
256 ed[si].s=si;
257 ed[si].d=0.0;
258 for(unsigned int di=0; di<fcmc->dimNr; di++) {
259 v=mns[di]-fcmc->d[si][di];
260 ed[si].d+=v*v;
261 }
262 /* sqrt(d) is not done now because only the order is used later */
263 } // next sample
264
265 /* Sort by increasing distance */
266 FCMC_ED *edptr=ed;
267 qsort(edptr, fcmc->sampleNr, sizeof(FCMC_ED), fcmcQSortSamplesByDistance);
268 if(verbose>2) {
269 printf("min_distance := %g\n", sqrt(ed[0].d));
270 printf("max_distance := %g\n", sqrt(ed[fcmc->sampleNr-1].d));
271 }
272
273 /* How many samples per cluster? */
274 unsigned int sperc=fcmc->sampleNr/fcmc->clusterNr;
275 if(verbose>1) printf("sperc := %u\n", sperc);
276 if(sperc<1) {free(ed); return(11);}
277
278 /* Calculate cluster centres */
279 /* Centre of each cluster, cc[0..clusterNr-1][0..dimNr-1]. */
280
281 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
282 for(unsigned int di=0; di<fcmc->dimNr; di++) fcmc->cc[ci][di]=0.0;
283 for(unsigned int i=ci*sperc; i<(ci+1)*sperc; i++) {
284 unsigned int ii=ed[i].s;
285 if(verbose>100) printf("ci=%u i=%u ii=%u d=%g\n", ci, i, ii, ed[i].d);
286 for(unsigned int di=0; di<fcmc->dimNr; di++)
287 fcmc->cc[ci][di]+=fcmc->d[ii][di];
288 }
289 for(unsigned int di=0; di<fcmc->dimNr; di++)
290 fcmc->cc[ci][di]/=(double)sperc;
291 }
292
293 free(ed);
294 return(0);
295}

Referenced by fcmclustering().

◆ fcmcEuclideanDistances()

double fcmcEuclideanDistances ( FCMC * fcmc,
unsigned int si,
unsigned int ci )

Euclidean distance for fuzzy C means clustering.

See also
fcmclustering, fcmcInit, fcmcFree
Returns
Returns the Euclidean distance, or NaN in case of an error.
Parameters
fcmcPointer to FCMC struct.
siSample index [0..sampleNr-1].
ciCluster index [0..clusterNr-1].

Definition at line 176 of file fcmc.c.

183 {
184 if(fcmc==NULL) return(nan(""));
185 if(si>=fcmc->sampleNr || ci>=fcmc->clusterNr || fcmc->dimNr<1) return(nan(""));
186
187 double distance=0.0;
188 for(unsigned int di=0; di<fcmc->dimNr; di++)
189 distance+=pow( (fcmc->cc[ci][di] - fcmc->d[si][di]), 2.0);
190 distance = sqrt(distance);
191 return(distance);
192}

Referenced by fcmclustering().

◆ fcmcFree()

void fcmcFree ( FCMC * fcmc)

Free memory allocated for FCMC data. All contents are destroyed.

See also
fcmcInit, fcmcAllocate
Author
Vesa Oikonen
Parameters
fcmcPointer to FCMC struct;
Precondition
Before first use initialize the FCMC struct with fcmcInit().

Definition at line 43 of file fcmc.c.

48 {
49 if(fcmc==NULL) return;
50 for(unsigned i=0; i<fcmc->sampleNr; i++) {
51 free(fcmc->d[i]);
52 }
53 free(fcmc->d);
54 for(unsigned i=0; i<fcmc->clusterNr; i++) {
55 free(fcmc->cc[i]);
56 }
57 free(fcmc->cc);
58 free(fcmc->sc);
59 fcmcInit(fcmc);
60}
void fcmcInit(FCMC *fcmc)
Definition fcmc.c:21

Referenced by fcmcAllocate().

◆ fcmcInit()

void fcmcInit ( FCMC * fcmc)

Initiate the FCMC struct before any use.

See also
fcmcFree, fcmcAllocate
Author
Vesa Oikonen
Parameters
fcmcPointer to FCMC struct.

Definition at line 21 of file fcmc.c.

24 {
25 if(fcmc==NULL) return;
26 fcmc->sampleNr=fcmc->dimNr=fcmc->clusterNr=0;
27 fcmc->d=fcmc->cc=NULL;
28 fcmc->sc=NULL;
29 /* Max number of iterations. */
30 fcmc->maxIter=100;
31 /* Max accepted u difference. */
32 fcmc->limitMaxUDiff=1.0E-05;
33 /* Fuzziness coefficient. */
34 fcmc->fc=9;
35}
double limitMaxUDiff
Definition tpcfcmc.h:27
unsigned int fc
Definition tpcfcmc.h:29
unsigned int maxIter
Definition tpcfcmc.h:25

Referenced by fcmcFree().

◆ fcmclustering()

int fcmclustering ( FCMC * fcmc,
const int cinit,
int verbose )

Fuzzy C means clustering. Works, but code optimization still needed.

The following parameters in FCMC struct can be changed before calling this function:

  • Max number of iterations, maxIter
  • Acceptance limit for max u difference, limitMaxUDiff
  • Fuzziness coefficient, fc.

This code and its tests are based on C code in https://github.com/JRC1995/Fuzzy-C-Mean.

References:

See also
fcmcInit, fcmcFree, fcmcClusterInitialize
Author
Vesa Oikonen
Returns
Returns 0 if ok.
Parameters
fcmcPointer to filled FCMC struct.
Precondition
Before first use initialize the FCMC struct with fcmcInit().
Postcondition
After last use free allocated memory with fcmcFree().
Parameters
cinitInitialize cluster centres locally: 0=no (user has already initialized those), 1=yes (using fcmcClusterInitialize() with sample distances to mean), 2=yes (using fcmcClusterInitialize() with sample distances to zero).
verboseVerbose level; if zero, then nothing is printed to stderr or stdout.

Definition at line 326 of file fcmc.c.

340 {
341 if(verbose>0) {printf("fcmclustering()\n"); fflush(stdout);}
342 if(fcmc==NULL) return(1);
343 if(fcmc->sampleNr<1 || fcmc->dimNr<1 || fcmc->clusterNr<1) return(1);
344
345 /* Initialize cluster centres to mean */
346 if(cinit>0) {
347 if(fcmcClusterInitialize(fcmc, cinit, verbose-2)!=0) return(2);
348 if(verbose>20) fcmcPrint(fcmc, stdout);
349 }
350
351 /* Iterations */
352 unsigned int iter=0;
353 double maxUDiff=1.0;
354 double oldJ=0.0;
355 double newJ=999999999.;
356 double minJ=9999999999999999.;
357
358 double vnum[fcmc->clusterNr][fcmc->dimNr]; // cluster centroid numerator
359 double vden[fcmc->clusterNr][fcmc->dimNr]; // cluster centroid denominator
360 double ccMin[fcmc->clusterNr][fcmc->dimNr]; // cluster centroid at minimum
361 double currU[fcmc->sampleNr][fcmc->clusterNr]; // current membership
362 double oldU[fcmc->sampleNr][fcmc->clusterNr]; // previous membership
363 double minU[fcmc->sampleNr][fcmc->clusterNr];
364 double d[fcmc->sampleNr][fcmc->clusterNr];
365 double dk[fcmc->sampleNr];
366 double sumU[fcmc->sampleNr];
367
368 while(iter<fcmc->maxIter && maxUDiff>fcmc->limitMaxUDiff) {
369 iter++; if(verbose>1) {printf("iteration %d\n", iter); fflush(stdout);}
370
371 /* Initialize cluster centroid numerator and denominator */
372 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
373 for(unsigned int di=0; di<fcmc->dimNr; di++) {
374 vnum[ci][di]=vden[ci][di]=0.0;
375 }
376 }
377
378 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
379 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
380 if(iter==1) oldU[si][ci]=0.0; else oldU[si][ci]=currU[si][ci];
381 }
382 }
383
384 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
385 dk[si]=0.0;
386 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
387 d[si][ci] = fcmcEuclideanDistances(fcmc, /*fcmc->d, fcmc->cc,*/ si, ci);
388 dk[si]+=d[si][ci];
389 }
390
391 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
392 if(d[si][ci]==0.0)
393 currU[si][ci]=999.;
394 else
395 currU[si][ci] = 1.0/(d[si][ci]/dk[si]);
396 currU[si][ci]=pow(currU[si][ci], (2.0/(double)(fcmc->fc-1)) );
397 }
398 }
399
400 /* Normalize the U */
401 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
402 sumU[si]=0.0;
403 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
404 sumU[si]+=currU[si][ci];
405 }
406 }
407
408 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
409 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
410 currU[si][ci]=currU[si][ci]/sumU[si];
411 }
412 }
413
414 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
415 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
416 for(unsigned int di=0; di<fcmc->dimNr; di++) {
417 vnum[ci][di] += pow( currU[si][ci], fcmc->fc ) * fcmc->d[si][di];
418 vden[ci][di] += pow( currU[si][ci], fcmc->fc );
419 }
420 }
421 }
422
423 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
424 for(unsigned int di=0; di<fcmc->dimNr; di++) {
425 fcmc->cc[ci][di] = vnum[ci][di]/vden[ci][di];
426 }
427 }
428
429 oldJ=newJ;
430 newJ=0.0;
431
432 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
433 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
434 newJ += pow(currU[si][ci], fcmc->fc) * pow(d[si][ci], 2.0);
435 }
436 }
437 if(verbose>2) {
438 printf(" objective_func_val %g -> %g\n", oldJ, newJ);
439 }
440
441 if(newJ<minJ) {
442 minJ=newJ;
443 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
444 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
445 minU[si][ci]=currU[si][ci];
446 }
447 }
448 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
449 for(unsigned int di=0; di<fcmc->dimNr; di++) {
450 ccMin[ci][di]=fcmc->cc[ci][di];
451 }
452 }
453 }
454
455 maxUDiff=0.0;
456 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
457 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
458 double v=fabs(currU[si][ci]-oldU[si][ci]);
459 if(v>maxUDiff) maxUDiff=v;
460 }
461 }
462 if(verbose>2) {
463 printf(" max_diff_betw_prev_and_curr_membership_degree := %g\n", maxUDiff);
464 }
465
466 } // next iteration
467
468 /* Get final cluster centres */
469 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
470 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
471 currU[si][ci]=minU[si][ci];
472 }
473 }
474
475 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
476 for(unsigned int di=0; di<fcmc->dimNr; di++) {
477 fcmc->cc[ci][di]=ccMin[ci][di];
478 }
479 }
480
481 /* Set the optimal cluster for each sample */
482 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
483 double maxU=0.0;
484 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
485 if(currU[si][ci]>maxU) {
486 maxU=currU[si][ci];
487 fcmc->sc[si]=ci;
488 }
489 }
490 }
491
492 return(0);
493}
void fcmcPrint(FCMC *fcmc, FILE *fp)
Definition fcmc.c:132
double fcmcEuclideanDistances(FCMC *fcmc, unsigned int si, unsigned int ci)
Definition fcmc.c:176
int fcmcClusterInitialize(FCMC *fcmc, const int cinit, int verbose)
Definition fcmc.c:220

◆ fcmcPrint()

void fcmcPrint ( FCMC * fcmc,
FILE * fp )

Print the contents of FCMC data struct into specified file pointer.

Parameters
fcmcPointer to FCMC struct.
fpFile pointer, usually stdout.

Definition at line 132 of file fcmc.c.

137 {
138 fprintf(fp, "\nFCMC struct contents:\n");
139 if(fcmc==NULL) {fprintf(fp, "Empty data.\n"); return;}
140
141 fprintf(fp, "sampleNr := %u\n", fcmc->sampleNr);
142 fprintf(fp, "dimNr := %u\n", fcmc->dimNr);
143 fprintf(fp, "clusterNr := %u\n", fcmc->clusterNr);
144
145 if(fcmc->sampleNr>0 && fcmc->dimNr>0) {
146 fprintf(fp, "\nFCMC sample data:\n");
147 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
148 fprintf(fp, "%g", fcmc->d[si][0]);
149 for(unsigned int di=1; di<fcmc->dimNr; di++)
150 fprintf(fp, ",%g", fcmc->d[si][di]);
151 fprintf(fp, "\n");
152 }
153 fprintf(fp, "\n");
154 }
155
156 if(fcmc->clusterNr>0 && fcmc->dimNr>0) {
157 fprintf(fp, "\nFCMC cluster data:\n");
158 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
159 fprintf(fp, "%g", fcmc->cc[ci][0]);
160 for(unsigned int di=1; di<fcmc->dimNr; di++)
161 fprintf(fp, ",%g", fcmc->cc[ci][di]);
162 fprintf(fp, "\n");
163 }
164 fprintf(fp, "\n");
165 }
166
167 return;
168}

Referenced by fcmclustering().