/** @file fcmc.c
 *  @brief Fuzzy C means clustering algorithm.
 */
/*****************************************************************************/
#include "tpcclibConfig.h"
/*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>
/*****************************************************************************/
#include "tpcfcmc.h"
/*****************************************************************************/

/*****************************************************************************/
/** Initiate the FCMC struct before any use.
    @sa fcmcFree, fcmcAllocate
    @author Vesa Oikonen
 */
void fcmcInit(
  /** Pointer to FCMC struct. */
  FCMC *fcmc
) {
  if(fcmc==NULL) return;
  fcmc->sampleNr=fcmc->dimNr=fcmc->clusterNr=0; 
  fcmc->d=fcmc->cc=NULL;
  fcmc->sc=NULL;
  /* Max number of iterations. */
  fcmc->maxIter=100;
  /* Max accepted u difference. */
  fcmc->limitMaxUDiff=1.0E-05;
  /* Fuzzyness coefficient. */
  fcmc->fc=9;
}
/*****************************************************************************/

/*****************************************************************************/
/** Free memory allocated for FCMC data. All contents are destroyed.
    @sa fcmcInit, fcmcAllocate
    @author Vesa Oikonen
 */
void fcmcFree(
  /** Pointer to FCMC struct;
     @pre Before first use initialize the FCMC struct with fcmcInit().
  */
  FCMC *fcmc
) {
  if(fcmc==NULL) return;
  for(unsigned i=0; i<fcmc->sampleNr; i++) {
    free(fcmc->d[i]);
  }
  free(fcmc->d);
  for(unsigned i=0; i<fcmc->clusterNr; i++) {
    free(fcmc->cc[i]);
  }
  free(fcmc->cc);
  free(fcmc->sc);
  fcmcInit(fcmc);
}
/*****************************************************************************/

/*****************************************************************************/
/** Free memory allocated for FCMC data. All contents are destroyed.
    @sa fcmcInit, fcmcFree
    @author Vesa Oikonen
    @return Returns 0 if ok.
 */
int fcmcAllocate(
  /** Pointer to FCMC struct; any pre-existing contents are deleted.
     @pre Before first use initialize the FCMC struct with fcmcInit().
     @post After last use free allocated memory with fcmcFree().
  */
  FCMC *fcmc,
  /** Number of data samples. */
  unsigned int sampleNr,
  /** Number of dimensions for each sample. */
  unsigned int dimNr,
  /** Number of clusters. */
  unsigned int clusterNr
) {
  if(fcmc==NULL) return(1);
  if(sampleNr<1) return(2);
  if(dimNr<1) return(3);
  if(clusterNr<1) return(4);
  fcmcFree(fcmc);

  /* Allocate memory for the sample data */
  fcmc->d=(double**)malloc(sampleNr*sizeof(double*));
  if(fcmc->d==NULL) return(11);
  for(unsigned i=0; i<sampleNr; i++) {
    fcmc->d[i]=(double*)malloc(dimNr*sizeof(double));
    if(fcmc->d[i]==NULL) {
      for(unsigned j=0; j<i; j++) free(fcmc->d[j]);
      free(fcmc->d);
      return(12);
    }
  }
  fcmc->sampleNr=sampleNr;
  fcmc->dimNr=dimNr;

  /* Allocate memory for the cluster data */
  fcmc->cc=(double**)malloc(clusterNr*sizeof(double*));
  if(fcmc->cc==NULL) {
    fcmcFree(fcmc);
    return(21);
  }
  for(unsigned i=0; i<clusterNr; i++) {
    fcmc->cc[i]=(double*)malloc(dimNr*sizeof(double));
    if(fcmc->cc[i]==NULL) {
      for(unsigned j=0; j<i; j++) free(fcmc->cc[j]);
      free(fcmc->cc);
      fcmcFree(fcmc);
      return(22);
    }
  }
  fcmc->clusterNr=clusterNr;

  /* Allocate memory for the selected cluster for each sample */
  fcmc->sc=(unsigned int*)malloc(sampleNr*sizeof(unsigned int));
  if(fcmc->sc==NULL) {
    fcmcFree(fcmc);
    return(31);
  }

  return(0);
}
/*****************************************************************************/

/*****************************************************************************/
/** Print the contents of FCMC data struct into specified file pointer. */
void fcmcPrint(
  /** Pointer to FCMC struct. */
  FCMC *fcmc,
  /** File pointer, usually stdout. */
  FILE *fp
) {
  fprintf(fp, "\nFCMC struct contents:\n");
  if(fcmc==NULL) {fprintf(fp, "Empty data.\n"); return;}

  fprintf(fp, "sampleNr := %u\n", fcmc->sampleNr);
  fprintf(fp, "dimNr := %u\n", fcmc->dimNr);
  fprintf(fp, "clusterNr := %u\n", fcmc->clusterNr);

  if(fcmc->sampleNr>0 && fcmc->dimNr>0) {
    fprintf(fp, "\nFCMC sample data:\n");
    for(unsigned int si=0; si<fcmc->sampleNr; si++) {
      fprintf(fp, "%g", fcmc->d[si][0]);
      for(unsigned int di=1; di<fcmc->dimNr; di++)
        fprintf(fp, ",%g", fcmc->d[si][di]);
      fprintf(fp, "\n");
    }
    fprintf(fp, "\n");
  }

  if(fcmc->clusterNr>0 && fcmc->dimNr>0) {
    fprintf(fp, "\nFCMC cluster data:\n");
    for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
      fprintf(fp, "%g", fcmc->cc[ci][0]);
      for(unsigned int di=1; di<fcmc->dimNr; di++)
        fprintf(fp, ",%g", fcmc->cc[ci][di]);
      fprintf(fp, "\n");
    }
    fprintf(fp, "\n");
  }

  return;
}
/*****************************************************************************/

/*****************************************************************************/
/** Euclidean distance for fuzzy C means clustering.
    @sa fcmclustering, fcmcInit, fcmcFree
    @return Returns the Euclidean distance, or NaN in case of an error.
 */
double fcmcEuclideanDistances(
  /** Pointer to FCMC struct. */
  FCMC *fcmc,
  /** Sample index [0..sampleNr-1]. */
  unsigned int si,
  /** Cluster index [0..clusterNr-1]. */
  unsigned int ci
) {
  if(fcmc==NULL) return(nan(""));
  if(si>=fcmc->sampleNr || ci>=fcmc->clusterNr || fcmc->dimNr<1) return(nan(""));

  double distance=0.0;
  for(unsigned int di=0; di<fcmc->dimNr; di++)
    distance+=pow( (fcmc->cc[ci][di] - fcmc->d[si][di]), 2.0);
  distance = sqrt(distance);
  return(distance);
}
/*****************************************************************************/

/*****************************************************************************/
/** Data struct needed by fcmcClusterInitialize(). */
typedef struct FCMC_ED {
  double d;
  unsigned int s;
} FCMC_ED;

/// @cond
/** Local function */
static int fcmcQSortSamplesByDistance(const void *c1, const void *c2)
{
  return( ((FCMC_ED*)c1)->d > ((FCMC_ED*)c2)->d );
}
/// @endcond

/** Initialize cluster centers. You probably want to replace this function with
     one tailored for your data; this works only if sample numbers are reasonably
     well balanced between clusters.

    @details Reference: Yedla M, Pathakota SR, Srinivasa TM. Enhancing K-means clustering
     algorithm with improved initial center. Int J Comp Sci Inform Technol. 2010;1(2): 121-125.

    @sa fcmclustering, fcmcInit, fcmcFree
    @return Returns 0 if ok.
 */
int fcmcClusterInitialize(
  /** Pointer to FCMC struct. */
  FCMC *fcmc,
  /** Initialization method: 
      - 0 and 1: sample distances to sample means,
      - 2: sample distances to zero.
  */
  const int cinit,
  /** Verbose level; if zero, then nothing is printed to stderr or stdout. */
  int verbose
) {
  if(verbose>0) {printf("fcmcClusterInitialize(*FCMC, %d)\n", cinit); fflush(stdout);}
  if(fcmc==NULL) return(1);
  if(fcmc->sampleNr<1 || fcmc->dimNr<1 || fcmc->clusterNr<1) return(1);

  double mns[fcmc->dimNr];
  if(cinit==2) {
    for(unsigned int di=0; di<fcmc->dimNr; di++) mns[di]=0.0;
  } else { // default
    /* Calculate sample means in each dimension */
    for(unsigned int di=0; di<fcmc->dimNr; di++) mns[di]=0.0;
    for(unsigned int si=0; si<fcmc->sampleNr; si++)
      for(unsigned int di=0; di<fcmc->dimNr; di++)
        mns[di]+=fcmc->d[si][di];
    for(unsigned int di=0; di<fcmc->dimNr; di++) mns[di]/=(double)fcmc->sampleNr;
    if(verbose>2) {
      printf("\nmeans:\n");
      for(unsigned int di=0; di<fcmc->dimNr; di++) printf(" %g\n", mns[di]);
    }
  }

  /* Calculate Euclidean distances of samples to the above set 'cluster centres' */
  FCMC_ED *ed=malloc(fcmc->sampleNr*sizeof(FCMC_ED));
  if(ed==NULL) return(10);
  double v;
  for(unsigned int si=0; si<fcmc->sampleNr; si++) {
    ed[si].s=si;
    ed[si].d=0.0;
    for(unsigned int di=0; di<fcmc->dimNr; di++) {
      v=mns[di]-fcmc->d[si][di];
      ed[si].d+=v*v;
    }
    /* sqrt(d) is not done now because only the order is used later */ 
  } // next sample

  /* Sort by increasing distance */
  FCMC_ED *edptr=ed;
  qsort(edptr, fcmc->sampleNr, sizeof(FCMC_ED), fcmcQSortSamplesByDistance);
  if(verbose>2) {
    printf("min_distance := %g\n", sqrt(ed[0].d));
    printf("max_distance := %g\n", sqrt(ed[fcmc->sampleNr-1].d));
  }

  /* How many samples per cluster? */
  unsigned int sperc=fcmc->sampleNr/fcmc->clusterNr;
  if(verbose>1) printf("sperc := %u\n", sperc);
  if(sperc<1) {free(ed); return(11);}

  /* Calculate cluster centers */
  /* Centre of each cluster, cc[0..clusterNr-1][0..dimNr-1]. */

  for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
    for(unsigned int di=0; di<fcmc->dimNr; di++) fcmc->cc[ci][di]=0.0;
    for(unsigned int i=ci*sperc; i<(ci+1)*sperc; i++) {
      unsigned int ii=ed[i].s;
      if(verbose>100) printf("ci=%u i=%u ii=%u d=%g\n", ci, i, ii, ed[i].d);
      for(unsigned int di=0; di<fcmc->dimNr; di++)
        fcmc->cc[ci][di]+=fcmc->d[ii][di];
    }
    for(unsigned int di=0; di<fcmc->dimNr; di++)
      fcmc->cc[ci][di]/=(double)sperc;
  }

  free(ed);
  return(0);
}
/*****************************************************************************/

/*****************************************************************************/
/** Fuzzy C means clustering. Works, but code optimization still needed.

   @details 
    The following parameters in FCMC struct can be changed before calling this
    function:
    - Max number of iterations, maxIter
    - Accetance limit for max u difference, limitMaxUDiff
    - Fuzzyness coefficient, fc.

    This code and its tests are based on C code in 
    https://github.com/JRC1995/Fuzzy-C-Mean.

    References:
    - https://home.deib.polimi.it/matteucc/Clustering/tutorial_html/cmeans.html
    - Dunn JC. A fuzzy relative of the ISODATA process and its use in detecting 
      compact well-separated clusters. J Cybernetics 1974;3(3):32-57.
      http://dx.doi.org/10.1080/01969727308546046
    - Bezdek JC: Pattern Recognition with Fuzzy Objective Function Algoritms.
      Plenum Press, 1981. DOI 10.1007/978-1-4757-0450-1.
    - Bezdek JC, Keller J, Krisnapuram R, Pal NR: Fuzzy Models and Algorithms
      for Pattern Recognition and Image Processing. Springer, 2005.
      ISBN 0-387-24515-4.

    @sa fcmcInit, fcmcFree, fcmcClusterInitialize
    @author Vesa Oikonen
    @return Returns 0 if ok.
 */
int fcmclustering(
  /** Pointer to filled FCMC struct.
     @pre Before first use initialize the FCMC struct with fcmcInit().
     @post After last use free allocated memory with fcmcFree().
  */
  FCMC *fcmc,
  /** Initialize cluster centers locally: 
      0=no (user has already initialized those),
      1=yes (using fcmcClusterInitialize() with sample distances to mean),
      2=yes (using fcmcClusterInitialize() with sample distances to zero).
  */
  const int cinit,
  /** Verbose level; if zero, then nothing is printed to stderr or stdout. */
  int verbose
) {
  if(verbose>0) {printf("fcmclustering()\n"); fflush(stdout);}
  if(fcmc==NULL) return(1);
  if(fcmc->sampleNr<1 || fcmc->dimNr<1 || fcmc->clusterNr<1) return(1);

  /* Initialize cluster centers to mean */
  if(cinit>0) {
    if(fcmcClusterInitialize(fcmc, cinit, verbose-2)!=0) return(2);
    if(verbose>20) fcmcPrint(fcmc, stdout);
  }

  /* Iterations */
  unsigned int iter=0;
  double maxUDiff=1.0;
  double oldJ=0.0;
  double newJ=999999999.;
  double minJ=9999999999999999.;

  double vnum[fcmc->clusterNr][fcmc->dimNr]; // cluster centroid numerator
  double vden[fcmc->clusterNr][fcmc->dimNr]; // cluster centroid denominator
  double ccMin[fcmc->clusterNr][fcmc->dimNr]; // cluster centroid at minimum
  double currU[fcmc->sampleNr][fcmc->clusterNr]; // current membership
  double oldU[fcmc->sampleNr][fcmc->clusterNr]; // previous membership
  double minU[fcmc->sampleNr][fcmc->clusterNr];
  double d[fcmc->sampleNr][fcmc->clusterNr];
  double dk[fcmc->sampleNr];
  double sumU[fcmc->sampleNr];

  while(iter<fcmc->maxIter && maxUDiff>fcmc->limitMaxUDiff) {
    iter++; if(verbose>1) {printf("iteration %d\n", iter); fflush(stdout);}

    /* Initialize cluster centroid numerator and denominator */
    for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
      for(unsigned int di=0; di<fcmc->dimNr; di++) {
        vnum[ci][di]=vden[ci][di]=0.0;
      }
    }

    for(unsigned int si=0; si<fcmc->sampleNr; si++) {
      for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
        if(iter==1) oldU[si][ci]=0.0; else oldU[si][ci]=currU[si][ci];
      }
    }

    for(unsigned int si=0; si<fcmc->sampleNr; si++) {
      dk[si]=0.0;
      for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
        d[si][ci] = fcmcEuclideanDistances(fcmc, /*fcmc->d, fcmc->cc,*/ si, ci);
        dk[si]+=d[si][ci];
      }

      for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
        if(d[si][ci]==0.0)
          currU[si][ci]=999.;
        else
          currU[si][ci] = 1.0/(d[si][ci]/dk[si]);
        currU[si][ci]=pow(currU[si][ci], (2.0/(double)(fcmc->fc-1)) );
      }
    }

    /* Normalize the U */
    for(unsigned int si=0; si<fcmc->sampleNr; si++) {
      sumU[si]=0.0;
      for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
        sumU[si]+=currU[si][ci];
      }
    }

    for(unsigned int si=0; si<fcmc->sampleNr; si++) {
      for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
        currU[si][ci]=currU[si][ci]/sumU[si];
      }
    }

    for(unsigned int si=0; si<fcmc->sampleNr; si++) {
      for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
        for(unsigned int di=0; di<fcmc->dimNr; di++) {
          vnum[ci][di] += pow( currU[si][ci], fcmc->fc ) * fcmc->d[si][di];
          vden[ci][di] += pow( currU[si][ci], fcmc->fc );
        }
      }
    }

    for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
      for(unsigned int di=0; di<fcmc->dimNr; di++) {
        fcmc->cc[ci][di] = vnum[ci][di]/vden[ci][di];
      }
    }

    oldJ=newJ;
    newJ=0.0;

    for(unsigned int si=0; si<fcmc->sampleNr; si++) {
      for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
        newJ += pow(currU[si][ci], fcmc->fc) * pow(d[si][ci], 2.0);
      }
    }
    if(verbose>2) {
      printf(" objective_func_val %g -> %g\n", oldJ, newJ);
    }

    if(newJ<minJ) {
      minJ=newJ;
      for(unsigned int si=0; si<fcmc->sampleNr; si++) {
        for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
          minU[si][ci]=currU[si][ci];
        }
      }
      for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
        for(unsigned int di=0; di<fcmc->dimNr; di++) {
          ccMin[ci][di]=fcmc->cc[ci][di];
        }
      }
    }

    maxUDiff=0.0;
    for(unsigned int si=0; si<fcmc->sampleNr; si++) {
      for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
        double v=fabs(currU[si][ci]-oldU[si][ci]);
        if(v>maxUDiff) maxUDiff=v;
      }
    }
    if(verbose>2) {
      printf(" max_diff_betw_prev_and_curr_membership_degree := %g\n", maxUDiff);
    }

  } // next iteration

  /* Get final cluster centres */
  for(unsigned int si=0; si<fcmc->sampleNr; si++) {
    for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
      currU[si][ci]=minU[si][ci];
    }
  }

  for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
    for(unsigned int di=0; di<fcmc->dimNr; di++) {
      fcmc->cc[ci][di]=ccMin[ci][di];
    }
  }

  /* Set the optimal cluster for each sample */
  for(unsigned int si=0; si<fcmc->sampleNr; si++) {
    double maxU=0.0;
    for(unsigned int ci=0; ci<fcmc->clusterNr; ci++) {
      if(currU[si][ci]>maxU) {
        maxU=currU[si][ci];
        fcmc->sc[si]=ci;
      }
    }
  }

  return(0);
}
/*****************************************************************************/

/*****************************************************************************/
