TPCCLIB
Loading...
Searching...
No Matches
fcmc.c
Go to the documentation of this file.
1
4/*****************************************************************************/
5#include "tpcclibConfig.h"
6/*****************************************************************************/
7#include <stdio.h>
8#include <stdlib.h>
9#include <math.h>
10#include <time.h>
11#include <string.h>
12/*****************************************************************************/
13#include "tpcfcmc.h"
14/*****************************************************************************/
15
16/*****************************************************************************/
23 FCMC *fcmc
24) {
25 if(fcmc==NULL) return;
26 fcmc->sampleNr=fcmc->dimNr=fcmc->clusterNr=0;
27 fcmc->d=fcmc->cc=NULL;
28 fcmc->sc=NULL;
29 /* Max number of iterations. */
30 fcmc->maxIter=100;
31 /* Max accepted u difference. */
32 fcmc->limitMaxUDiff=1.0E-05;
33 /* Fuzziness coefficient. */
34 fcmc->fc=9;
35}
36/*****************************************************************************/
37
38/*****************************************************************************/
47 FCMC *fcmc
48) {
49 if(fcmc==NULL) return;
50 for(unsigned i=0; i<fcmc->sampleNr; i++) {
51 free(fcmc->d[i]);
52 }
53 free(fcmc->d);
54 for(unsigned i=0; i<fcmc->clusterNr; i++) {
55 free(fcmc->cc[i]);
56 }
57 free(fcmc->cc);
58 free(fcmc->sc);
59 fcmcInit(fcmc);
60}
61/*****************************************************************************/
62
63/*****************************************************************************/
74 FCMC *fcmc,
76 unsigned int sampleNr,
78 unsigned int dimNr,
80 unsigned int clusterNr
81) {
82 if(fcmc==NULL) return(1);
83 if(sampleNr<1) return(2);
84 if(dimNr<1) return(3);
85 if(clusterNr<1) return(4);
86 fcmcFree(fcmc);
87
88 /* Allocate memory for the sample data */
89 fcmc->d=(double**)malloc(sampleNr*sizeof(double*));
90 if(fcmc->d==NULL) return(11);
91 for(unsigned i=0; i<sampleNr; i++) {
92 fcmc->d[i]=(double*)malloc(dimNr*sizeof(double));
93 if(fcmc->d[i]==NULL) {
94 for(unsigned j=0; j<i; j++) free(fcmc->d[j]);
95 free(fcmc->d);
96 return(12);
97 }
98 }
99 fcmc->sampleNr=sampleNr;
100 fcmc->dimNr=dimNr;
101
102 /* Allocate memory for the cluster data */
103 fcmc->cc=(double**)malloc(clusterNr*sizeof(double*));
104 if(fcmc->cc==NULL) {
105 fcmcFree(fcmc);
106 return(21);
107 }
108 for(unsigned i=0; i<clusterNr; i++) {
109 fcmc->cc[i]=(double*)malloc(dimNr*sizeof(double));
110 if(fcmc->cc[i]==NULL) {
111 for(unsigned j=0; j<i; j++) free(fcmc->cc[j]);
112 free(fcmc->cc);
113 fcmcFree(fcmc);
114 return(22);
115 }
116 }
117 fcmc->clusterNr=clusterNr;
118
119 /* Allocate memory for the selected cluster for each sample */
120 fcmc->sc=(unsigned int*)malloc(sampleNr*sizeof(unsigned int));
121 if(fcmc->sc==NULL) {
122 fcmcFree(fcmc);
123 return(31);
124 }
125
126 return(0);
127}
128/*****************************************************************************/
129
130/*****************************************************************************/
134 FCMC *fcmc,
136 FILE *fp
137) {
138 fprintf(fp, "\nFCMC struct contents:\n");
139 if(fcmc==NULL) {fprintf(fp, "Empty data.\n"); return;}
140
141 fprintf(fp, "sampleNr := %u\n", fcmc->sampleNr);
142 fprintf(fp, "dimNr := %u\n", fcmc->dimNr);
143 fprintf(fp, "clusterNr := %u\n", fcmc->clusterNr);
144
145 if(fcmc->sampleNr>0 && fcmc->dimNr>0) {
146 fprintf(fp, "\nFCMC sample data:\n");
147 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
148 fprintf(fp, "%g", fcmc->d[si][0]);
149 for(unsigned int di=1; di<fcmc->dimNr; di++)
150 fprintf(fp, ",%g", fcmc->d[si][di]);
151 fprintf(fp, "\n");
152 }
153 fprintf(fp, "\n");
154 }
155
156 if(fcmc->clusterNr>0 && fcmc->dimNr>0) {
157 fprintf(fp, "\nFCMC cluster data:\n");
158 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
159 fprintf(fp, "%g", fcmc->cc[ci][0]);
160 for(unsigned int di=1; di<fcmc->dimNr; di++)
161 fprintf(fp, ",%g", fcmc->cc[ci][di]);
162 fprintf(fp, "\n");
163 }
164 fprintf(fp, "\n");
165 }
166
167 return;
168}
169/*****************************************************************************/
170
171/*****************************************************************************/
178 FCMC *fcmc,
180 unsigned int si,
182 unsigned int ci
183) {
184 if(fcmc==NULL) return(nan(""));
185 if(si>=fcmc->sampleNr || ci>=fcmc->clusterNr || fcmc->dimNr<1) return(nan(""));
186
187 double distance=0.0;
188 for(unsigned int di=0; di<fcmc->dimNr; di++)
189 distance+=pow( (fcmc->cc[ci][di] - fcmc->d[si][di]), 2.0);
190 distance = sqrt(distance);
191 return(distance);
192}
193/*****************************************************************************/
194
195/*****************************************************************************/
197typedef struct FCMC_ED {
198 double d;
199 unsigned int s;
200} FCMC_ED;
201
203
204static int fcmcQSortSamplesByDistance(const void *c1, const void *c2)
205{
206 return( ((FCMC_ED*)c1)->d > ((FCMC_ED*)c2)->d );
207}
209
222 FCMC *fcmc,
227 const int cinit,
229 int verbose
230) {
231 if(verbose>0) {printf("fcmcClusterInitialize(*FCMC, %d)\n", cinit); fflush(stdout);}
232 if(fcmc==NULL) return(1);
233 if(fcmc->sampleNr<1 || fcmc->dimNr<1 || fcmc->clusterNr<1) return(1);
234
235 double mns[fcmc->dimNr];
236 if(cinit==2) {
237 for(unsigned int di=0; di<fcmc->dimNr; di++) mns[di]=0.0;
238 } else { // default
239 /* Calculate sample means in each dimension */
240 for(unsigned int di=0; di<fcmc->dimNr; di++) mns[di]=0.0;
241 for(unsigned int si=0; si<fcmc->sampleNr; si++)
242 for(unsigned int di=0; di<fcmc->dimNr; di++)
243 mns[di]+=fcmc->d[si][di];
244 for(unsigned int di=0; di<fcmc->dimNr; di++) mns[di]/=(double)fcmc->sampleNr;
245 if(verbose>2) {
246 printf("\nmeans:\n");
247 for(unsigned int di=0; di<fcmc->dimNr; di++) printf(" %g\n", mns[di]);
248 }
249 }
250
251 /* Calculate Euclidean distances of samples to the above set 'cluster centres' */
252 FCMC_ED *ed=malloc(fcmc->sampleNr*sizeof(FCMC_ED));
253 if(ed==NULL) return(10);
254 double v;
255 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
256 ed[si].s=si;
257 ed[si].d=0.0;
258 for(unsigned int di=0; di<fcmc->dimNr; di++) {
259 v=mns[di]-fcmc->d[si][di];
260 ed[si].d+=v*v;
261 }
262 /* sqrt(d) is not done now because only the order is used later */
263 } // next sample
264
265 /* Sort by increasing distance */
266 FCMC_ED *edptr=ed;
267 qsort(edptr, fcmc->sampleNr, sizeof(FCMC_ED), fcmcQSortSamplesByDistance);
268 if(verbose>2) {
269 printf("min_distance := %g\n", sqrt(ed[0].d));
270 printf("max_distance := %g\n", sqrt(ed[fcmc->sampleNr-1].d));
271 }
272
273 /* How many samples per cluster? */
274 unsigned int sperc=fcmc->sampleNr/fcmc->clusterNr;
275 if(verbose>1) printf("sperc := %u\n", sperc);
276 if(sperc<1) {free(ed); return(11);}
277
278 /* Calculate cluster centres */
279 /* Centre of each cluster, cc[0..clusterNr-1][0..dimNr-1]. */
280
281 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
282 for(unsigned int di=0; di<fcmc->dimNr; di++) fcmc->cc[ci][di]=0.0;
283 for(unsigned int i=ci*sperc; i<(ci+1)*sperc; i++) {
284 unsigned int ii=ed[i].s;
285 if(verbose>100) printf("ci=%u i=%u ii=%u d=%g\n", ci, i, ii, ed[i].d);
286 for(unsigned int di=0; di<fcmc->dimNr; di++)
287 fcmc->cc[ci][di]+=fcmc->d[ii][di];
288 }
289 for(unsigned int di=0; di<fcmc->dimNr; di++)
290 fcmc->cc[ci][di]/=(double)sperc;
291 }
292
293 free(ed);
294 return(0);
295}
296/*****************************************************************************/
297
298/*****************************************************************************/
331 FCMC *fcmc,
337 const int cinit,
339 int verbose
340) {
341 if(verbose>0) {printf("fcmclustering()\n"); fflush(stdout);}
342 if(fcmc==NULL) return(1);
343 if(fcmc->sampleNr<1 || fcmc->dimNr<1 || fcmc->clusterNr<1) return(1);
344
345 /* Initialize cluster centres to mean */
346 if(cinit>0) {
347 if(fcmcClusterInitialize(fcmc, cinit, verbose-2)!=0) return(2);
348 if(verbose>20) fcmcPrint(fcmc, stdout);
349 }
350
351 /* Iterations */
352 unsigned int iter=0;
353 double maxUDiff=1.0;
354 double oldJ=0.0;
355 double newJ=999999999.;
356 double minJ=9999999999999999.;
357
358 double vnum[fcmc->clusterNr][fcmc->dimNr]; // cluster centroid numerator
359 double vden[fcmc->clusterNr][fcmc->dimNr]; // cluster centroid denominator
360 double ccMin[fcmc->clusterNr][fcmc->dimNr]; // cluster centroid at minimum
361 double currU[fcmc->sampleNr][fcmc->clusterNr]; // current membership
362 double oldU[fcmc->sampleNr][fcmc->clusterNr]; // previous membership
363 double minU[fcmc->sampleNr][fcmc->clusterNr];
364 double d[fcmc->sampleNr][fcmc->clusterNr];
365 double dk[fcmc->sampleNr];
366 double sumU[fcmc->sampleNr];
367
368 while(iter<fcmc->maxIter && maxUDiff>fcmc->limitMaxUDiff) {
369 iter++; if(verbose>1) {printf("iteration %d\n", iter); fflush(stdout);}
370
371 /* Initialize cluster centroid numerator and denominator */
372 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
373 for(unsigned int di=0; di<fcmc->dimNr; di++) {
374 vnum[ci][di]=vden[ci][di]=0.0;
375 }
376 }
377
378 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
379 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
380 if(iter==1) oldU[si][ci]=0.0; else oldU[si][ci]=currU[si][ci];
381 }
382 }
383
384 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
385 dk[si]=0.0;
386 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
387 d[si][ci] = fcmcEuclideanDistances(fcmc, /*fcmc->d, fcmc->cc,*/ si, ci);
388 dk[si]+=d[si][ci];
389 }
390
391 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
392 if(d[si][ci]==0.0)
393 currU[si][ci]=999.;
394 else
395 currU[si][ci] = 1.0/(d[si][ci]/dk[si]);
396 currU[si][ci]=pow(currU[si][ci], (2.0/(double)(fcmc->fc-1)) );
397 }
398 }
399
400 /* Normalize the U */
401 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
402 sumU[si]=0.0;
403 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
404 sumU[si]+=currU[si][ci];
405 }
406 }
407
408 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
409 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
410 currU[si][ci]=currU[si][ci]/sumU[si];
411 }
412 }
413
414 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
415 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
416 for(unsigned int di=0; di<fcmc->dimNr; di++) {
417 vnum[ci][di] += pow( currU[si][ci], fcmc->fc ) * fcmc->d[si][di];
418 vden[ci][di] += pow( currU[si][ci], fcmc->fc );
419 }
420 }
421 }
422
423 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
424 for(unsigned int di=0; di<fcmc->dimNr; di++) {
425 fcmc->cc[ci][di] = vnum[ci][di]/vden[ci][di];
426 }
427 }
428
429 oldJ=newJ;
430 newJ=0.0;
431
432 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
433 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
434 newJ += pow(currU[si][ci], fcmc->fc) * pow(d[si][ci], 2.0);
435 }
436 }
437 if(verbose>2) {
438 printf(" objective_func_val %g -> %g\n", oldJ, newJ);
439 }
440
441 if(newJ<minJ) {
442 minJ=newJ;
443 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
444 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
445 minU[si][ci]=currU[si][ci];
446 }
447 }
448 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
449 for(unsigned int di=0; di<fcmc->dimNr; di++) {
450 ccMin[ci][di]=fcmc->cc[ci][di];
451 }
452 }
453 }
454
455 maxUDiff=0.0;
456 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
457 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
458 double v=fabs(currU[si][ci]-oldU[si][ci]);
459 if(v>maxUDiff) maxUDiff=v;
460 }
461 }
462 if(verbose>2) {
463 printf(" max_diff_betw_prev_and_curr_membership_degree := %g\n", maxUDiff);
464 }
465
466 } // next iteration
467
468 /* Get final cluster centres */
469 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
470 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
471 currU[si][ci]=minU[si][ci];
472 }
473 }
474
475 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
476 for(unsigned int di=0; di<fcmc->dimNr; di++) {
477 fcmc->cc[ci][di]=ccMin[ci][di];
478 }
479 }
480
481 /* Set the optimal cluster for each sample */
482 for(unsigned int si=0; si<fcmc->sampleNr; si++) {
483 double maxU=0.0;
484 for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
485 if(currU[si][ci]>maxU) {
486 maxU=currU[si][ci];
487 fcmc->sc[si]=ci;
488 }
489 }
490 }
491
492 return(0);
493}
494/*****************************************************************************/
495
496/*****************************************************************************/
void fcmcInit(FCMC *fcmc)
Definition fcmc.c:21
void fcmcPrint(FCMC *fcmc, FILE *fp)
Definition fcmc.c:132
double fcmcEuclideanDistances(FCMC *fcmc, unsigned int si, unsigned int ci)
Definition fcmc.c:176
int fcmcClusterInitialize(FCMC *fcmc, const int cinit, int verbose)
Definition fcmc.c:220
int fcmcAllocate(FCMC *fcmc, unsigned int sampleNr, unsigned int dimNr, unsigned int clusterNr)
Definition fcmc.c:69
void fcmcFree(FCMC *fcmc)
Definition fcmc.c:43
int fcmclustering(FCMC *fcmc, const int cinit, int verbose)
Definition fcmc.c:326
Definition tpcfcmc.h:23
double limitMaxUDiff
Definition tpcfcmc.h:27
unsigned int fc
Definition tpcfcmc.h:29
double ** d
Definition tpcfcmc.h:39
unsigned int * sc
Definition tpcfcmc.h:43
unsigned int dimNr
Definition tpcfcmc.h:34
double ** cc
Definition tpcfcmc.h:41
unsigned int clusterNr
Definition tpcfcmc.h:36
unsigned int sampleNr
Definition tpcfcmc.h:32
unsigned int maxIter
Definition tpcfcmc.h:25
Header file for library libtpcfcmc.