/// @file regression.c
/// @brief Regression line fitting.
/// @copyright (c) Turku PET Centre
/// @author Vesa Oikonen
///
/*****************************************************************************/
#include "tpcclibConfig.h"
/*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
/*****************************************************************************/
#include "tpcextensions.h"
/*****************************************************************************/
#include "tpclinopt.h"
/*****************************************************************************/

/*****************************************************************************/
/** Calculate regression line slope (m) and y axis intercept (c).
    @sa fitLinePearson, statMeanSD, doubleMean, statSortDouble, highestSlope
    @return Returns the number of samples used in the regression, 0 in case of an error.
 */
int fitLine(
  /** Pointer to an array of x axis values. NaN's and infinite values are ignored. */
  double *x,
  /** Pointer to an array of y axis values. NaN's and infinite values are ignored. */
  double *y,
  /** The number of samples (length of x[] and y[]). */
  const int n,
  /** Pointer where calculated slope is written; enter NULL if not needed. */
  double *m,
  /** Pointer where calculated y axis intercept is written; enter NULL if not needed. */
  double *c
) {
  /* Check the data */
  if(x==NULL || y==NULL) return(0);
  if(m!=NULL) *m=nan("");
  if(c!=NULL) *c=nan("");
  if(n<1) return(0);

  double xsum=0.0, ysum=0.0, x2sum=0.0, xysum=0.0;
  int nn=0;
  for(int i=0; i<n; i++) if(isfinite(x[i]) && isfinite(y[i])) {
    xsum+=x[i]; ysum+=y[i]; x2sum+=x[i]*x[i]; xysum+=x[i]*y[i]; nn++;
  } //printf("nn=%d\n", nn);
  // printf("xsum=%g ysum=%g x2sum=%g xysum=%g\n", xsum, ysum, x2sum, xysum);
  if(nn<1) return(0);
  if(nn==1) {
    double mm=ysum/xsum;
    if(isfinite(mm)) {
      if(m!=NULL) *m=mm;
      if(c!=NULL) *c=0.0;
      return(nn);
    } else
      return(0);
  }
  double delta=(double)nn*x2sum - xsum*xsum; //printf("delta=%g\n", delta);
  if(delta==0.0) return(0);
  if(m!=NULL) *m=((double)nn*xysum - xsum*ysum)/delta;
  if(c!=NULL) *c=(x2sum*ysum - xsum*xysum)/delta;
  return(nn);
}
/*****************************************************************************/

/*****************************************************************************/
/** Calculate regression line slope (m), y axis intercept (c), their SDs, and
    Pearson's correlation coefficient (r).
    @sa fitLine, statMeanSD, doubleMean, statSortDouble
    @return Returns the number of samples used in the regression, 0 in case of an error.
    @todo Check SD of x axis intercept against some other program.
 */
int fitLinePearson(
  /** Pointer to an array of x axis values. NaN's and infinite values are ignored. */
  double *x,
  /** Pointer to an array of y axis values. NaN's and infinite values are ignored. */
  double *y,
  /** The number of samples (length of x[] and y[]). */
  const int n,
  /** Pointer where calculated slope is written; enter NULL if not needed. */
  double *m,
  /** Pointer where SD of slope is written; enter NULL if not needed. */
  double *msd,
  /** Pointer where calculated y axis intercept is written; enter NULL if not needed. */
  double *c,
  /** Pointer where SD of y axis intercept is written; enter NULL if not needed. */
  double *csd,
  /** Pointer where calculated x axis intercept is written; enter NULL if not needed. */
  double *d,
  /** Pointer where SD of x axis intercept is written; enter NULL if not needed. */
  double *dsd,
  /** Pointer where Pearson's correlation coefficient is written; enter NULL if not needed. */
  double *r,
  /** Pointer where residual variance of y values is written; enter NULL if not needed. */
  double *ysd
) {
  /* Check the data */
  if(x==NULL || y==NULL) return(0);
  if(m!=NULL) *m=nan("");
  if(c!=NULL) *c=nan("");
  if(d!=NULL) *d=nan("");
  if(msd!=NULL) *msd=nan("");
  if(csd!=NULL) *csd=nan("");
  if(dsd!=NULL) *dsd=nan("");
  if(r!=NULL) *r=nan("");
  if(ysd!=NULL) *ysd=nan("");
  if(n<1) return(0);

  double xsum=0.0, ysum=0.0, x2sum=0.0, y2sum=0.0, xysum=0.0;
  int nn=0;
  for(int i=0; i<n; i++) if(isfinite(x[i]) && isfinite(y[i])) {
    xsum+=x[i]; ysum+=y[i]; x2sum+=x[i]*x[i]; y2sum+=y[i]*y[i]; xysum+=x[i]*y[i]; nn++;
  }
  if(nn<1) return(0);
  if(nn==1) {
    double mm=ysum/xsum;
    if(isfinite(mm)) {
      if(m!=NULL) *m=mm;
      if(c!=NULL) *c=0.0;
      if(d!=NULL) {*d=0.0; if(fabs(mm)<1.0E-10) *d=nan("");}
      if(msd!=NULL) *msd=0.0;
      if(csd!=NULL) *csd=0.0;
      if(dsd!=NULL) {*dsd=0.0; if(fabs(mm)<1.0E-10) *dsd=nan("");}
      if(r!=NULL) *r=1.0;
      if(ysd!=NULL) *ysd=0.0;
      return(nn);
    } else
      return(0);
  }
  double xmean=xsum/(double)nn;
  double ymean=xsum/(double)nn;
  double dx2sum=0.0, /*dy2sum=0.0,*/ dxdysum=0.0;
  for(int i=0; i<n; i++) if(isfinite(x[i]) && isfinite(y[i])) {
    double f=x[i]-xmean; dx2sum+=f*f;
    double g=y[i]-ymean; /*dy2sum+=g*g;*/
    dxdysum+=f*g;
  }
  // slope and intercept
  double slope=dxdysum/dx2sum;
  double ic=(dx2sum*ysum - xsum*dxdysum)/((double)nn*dx2sum);
  if(!isfinite(slope) || !isfinite(ic)) return(0);
  if(m!=NULL) *m=slope;
  if(c!=NULL) *c=ic;
  double xic=-ic/slope; if(!isfinite(xic)) xic=nan("");
  if(d!=NULL) *d=xic;
  // Errors
  double ry2sum=0.0;
  for(int i=0; i<n; i++) if(isfinite(x[i]) && isfinite(y[i])) {
    double f=slope*x[i]+ic-y[i];
    ry2sum+=f*f;
  }
  double ye=sqrt(ry2sum/(double)(nn-2)); if(!isfinite(ye)) ye=0.0;
  if(ysd!=NULL) *ysd=ye;
  double slopee=ye/sqrt(dx2sum); if(!isfinite(slopee)) slopee=0.0;
  double ice=ye/sqrt((double)nn-xsum*xsum/x2sum); if(!isfinite(ice)) ice=0.0;
  if(msd!=NULL) *msd=slopee;
  if(csd!=NULL) *csd=ice;
  if(dsd!=NULL) { // SD of x axis intercept
    double xice=fabs(ye/slope)*sqrt((1.0/(double)nn) + ymean*ymean/(slope*slope*dx2sum));
    if(!isfinite(xice)) xice=nan("");
    *dsd=xice;
  }
  // Pearson's correlation coefficient
  double rr=(xysum-((xsum*ysum)/(double)nn)) /
            sqrt((x2sum-xsum*xsum/(double)nn)*(y2sum-ysum*ysum/(double)nn));
  // and correct for small sample size
  if(nn>4) rr*=1.0+(1.0-rr*rr)/(double)(2*(nn-4));
  if(!isfinite(rr)) rr=0.0; else rr=fabs(rr);
  if(r!=NULL) *r=rr;

  return(nn);
}
/*****************************************************************************/

/*****************************************************************************/
/** Find the regression line with the highest slope for x,y data.
    @sa fitLine
    @return Return 0 if no errors were found.
 */
int highestSlope(
  /** Pointer to an array of x axis values. NaN's and infinite values are ignored. 
      Data must be sorted by increasing x, and overlapping x values may cause problem. */
  double *x,
  /** Pointer to an array of y axis values. NaN's and infinite values are ignored. 
      Data is not modified. */
  double *y,
  /** The number of samples (length of x[] and y[]). */
  const int n,
  /** The number of samples used to fit the line; must be at least 2. */
  const int slope_n,
  /** Estimation start x value, samples with smaller x are ignored; can usually be set to zero. */
  double x_start,
  /** Pointer where calculated max slope is written; NULL if not needed. */
  double *m,
  /** Pointer where calculated y axis intercept is written; NULL if not needed. */
  double *yi,
  /** Pointer where calculated x axis intercept is written; NULL if not needed.
      This could be used as an estimate of radioactivity appearance time in TAC data,
      but you must then check that the max slope m is positive. */
  double *xi,
  /** Pointer where the place (x) of the highest slope is written; NULL if not needed. */
  double *xh
) {
  /* Check the data */
  if(x==NULL || y==NULL || n<2 || slope_n<2) return(1);
  if(m!=NULL) *m=nan("");
  if(yi!=NULL) *yi=nan("");
  if(xi!=NULL) *xi=nan("");
  if(xh!=NULL) *xh=nan("");

  /* Make copy of original data */
  double xx[n], yy[n]; int nn=0;
  for(int i=0; i<n; i++)
    if(isfinite(x[i]) && isfinite(y[i])) {
      if(isfinite(x_start) && x[i]<x_start) continue;
      xx[nn]=x[i]; yy[nn]=y[i]; nn++;
    }
  if(nn<slope_n) return(2);

  /* Compute regression lines */
  double max_slope=nan(""), ic_at_max=nan(""), slope, ic;
  int i_at_max=0;
  for(int i=0; i<=nn-slope_n; i++) {
    if(fitLine(xx+i, yy+i, slope_n, &slope, &ic)<slope_n) continue;
    if(!isfinite(slope) || !isfinite(ic)) continue;
    if(isnan(max_slope) || slope>max_slope) {
      max_slope=slope; ic_at_max=ic; i_at_max=i;
    }
  }
  if(!isfinite(max_slope)) return(3);
  if(m!=NULL) *m=max_slope;
  if(yi!=NULL) *yi=ic_at_max;
  if(xi!=NULL) {if(max_slope!=0.0) *xi=-ic_at_max/max_slope;}
  if(xh!=NULL) {
    *xh=0.0; for(int i=i_at_max; i<i_at_max+slope_n; i++) *xh+=xx[i];
    *xh/=(double)slope_n;
  }

  return(0);
}
/*****************************************************************************/

/*****************************************************************************/
