/** @file avgbolus.c
 *  @brief Calculates an average curve of bolus input curves with different
           sample times.
 *  @copyright (c) Turku PET Centre
 *  @author Vesa Oikonen
 */
/// @cond
/*****************************************************************************/
#include "tpcclibConfig.h"
/*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <string.h>
/*****************************************************************************/
#include "libtpcmisc.h"
#include "libtpccurveio.h"
#include "libtpcmodel.h"
#include "libtpcmodext.h"
/*****************************************************************************/

/*****************************************************************************/
static char *info[] = {
  "Calculate an average curve of several bolus input curves with different",
  "sample times. For simulations.",
  " ",
  "Usage: @P [Options] meanfile tacfiles",
  " ",
  "Options:",
  " -nr=<Sample nr>",
  "     Set the nr of samples to use for bolus appearance time; default is 2.",
  "     Set nr=0, if different appearance time is not to be considered.",
  " -peak",
  "     Peak time is used to align TACs instead of bolus appearance time.",
  " -ns",
  "     TACs are not scaled to a common AUC.",
  " -ne",
  "     Standard deviations are not written in output file.",
  " -stdoptions", // List standard options like --help, -v, etc
  " ",
  "Example 1:",
  "    @P apmean.kbq up????ap.kbq",
  "Example 2 (Windows OS):",
  "    dir /b *.kbq > filelist.txt",
  "    @P apmean.dat filelist.txt",
  " ",
  "TAC datafiles must contain a time column, and one or more concentration",
  "columns separated by space(s) or tabulator(s). Only the first concentration",
  "column is used in calculations.",
  "If only one input datafile is given, it is assumed to contain a list of",
  "bolus datafiles with paths if necessary. Tabs, commas and newlines can be",
  "used to separate filenames in the list file.",
  " ",
  "Output datafile will contain three columns: time, avg concentration and s.d.",
  "Program will determine the new sample times based on the shortest of input",
  "datafiles.",
  " ",
  "Detailed program description:",
  " 1) Read first curve from each datafile",
  " 2) Replace NaNs with interpolated values.",
  " 3) Determine bolus appearance time in each curve based on certain number",
  "    of samples with highest slope.",
  " 4) Move all curves in time to have a common appearance time.",
  " 5) Search the bolus curve with shortest sampling duration.",
  " 6) Calculate AUC from 0 to that time from all curves separately.",
  " 7) Scale all bolus curves to have the same average AUC.",
  " 8) Interpolate all bolus curves to common sample times.",
  " 9) Calculate the mean and s.d. curve of all bolus curves.",
  "10) Write the mean and s.d. data in a specified ASCII datafile.",
  " ",
  "See also: avgttac, avgfract, dftavg, tacadd, interpol, tac2svg, tacformat",
  " ",
  "Keywords: TAC, simulation, modelling, input",
  0};
/*****************************************************************************/

/*****************************************************************************/
/* Turn on the globbing of the command line, since it is disabled by default in
   mingw-w64 (_dowildcard=0); in MinGW32 define _CRT_glob instead, if necessary;
   In Unix&Linux wildcard command line processing is enabled by default. */
/*
#undef _CRT_glob
#define _CRT_glob -1
*/
int _dowildcard = -1;
/*****************************************************************************/

/*****************************************************************************/
/**
 *  Main
 */
int main(int argc, char **argv)
{
  int     ai, help=0, version=0, verbose=1;
  int     ret, n, m;
  int     slopeNr=2, scaling=1, save_errors=1;
  int     usePeak=0;
  char   *cptr, ofile[FILENAME_MAX], 
          tmp[FILENAME_MAX], studynr[MAX_STUDYNR_LEN+1];
  double  f, g, h;
  STR_TOKEN_LIST filelist;


  /*
   *  Get arguments
   */
  if(argc==1) {tpcPrintUsage(argv[0], info, stderr); return(1);}
  ofile[0]=(char)0;
  str_token_list_init(&filelist);
  /* Options */
  for(ai=1; ai<argc; ai++) if(*argv[ai]=='-') {
    if(tpcProcessStdOptions(argv[ai], &help, &version, &verbose)==0) continue;
    cptr=argv[ai]+1; if(*cptr=='-') cptr++; if(cptr==NULL) continue;
    if(strncasecmp(cptr, "NR=", 3)==0) {
      slopeNr=atoi(cptr+3); continue;
    } else if(strncasecmp(cptr, "NS", 2)==0) {
      scaling=0; continue;
    } else if(strncasecmp(cptr, "NE", 2)==0) {
      save_errors=0; continue;
    } else if(strcasecmp(cptr, "PEAK")==0) {
      usePeak=1; continue;
    }
    fprintf(stderr, "Error: invalid option '%s'.\n", argv[ai]);
    return(1);
  } else break;
  if(usePeak) slopeNr=0;
  
  /* Print help or version? */
  if(help==2) {tpcHtmlUsage(argv[0], info, ""); return(0);}
  if(help) {tpcPrintUsage(argv[0], info, stdout); return(0);}
  if(version) {tpcPrintBuild(argv[0], stdout); return(0);}

  /* Process other arguments, starting from the first non-option */
  for(; ai<argc; ai++) {
    if(!ofile[0]) {
      strlcpy(ofile, argv[ai], FILENAME_MAX); continue;
    } else if(str_token_list_add(&filelist, argv[ai])) {
      fprintf(stderr, "Error: invalid argument '%s'.\n", argv[ai]);
      return(1);
    }
  }

  /* Is something missing? */
  if(!ofile[0] || filelist.token_nr<1) {
    fprintf(stderr, "Error: missing command-line argument; use option --help\n");
    return(1);
  }

  /* In verbose mode print arguments and options */
  if(verbose>1) {
    printf("usePeak := %d\n", usePeak);
    if(!usePeak) printf("slopeNr := %d\n", slopeNr);
    printf("scaling := %d\n", scaling);
    printf("save_errors := %d\n", save_errors);
    printf("ofile := %s\n", ofile);
    printf("token_nr := %d\n", filelist.token_nr);
  }

  /* Check filename list; read list of files if necessary */
  if(filelist.token_nr==0) {
    tpcPrintUsage(argv[0], info, stderr);
    str_token_list_empty(&filelist);
    return(1);
  } else if(filelist.token_nr>1) {
    /* Filenames were given directly */
    if(verbose>2) printf("filenames were given on command-line\n");
  } else { /* Filenames were given in a list file */
    strcpy(tmp, filelist.tok[0]);
    if(access(tmp, 0) == -1) {
      fprintf(stderr, "Error: '%s' is not accessible.\n", tmp); 
      str_token_list_empty(&filelist); return(2);
    }
    str_token_list_empty(&filelist);
    if(verbose>1) printf("reading filenames in %s\n", tmp);
    ret=str_token_list_read(tmp, &filelist);
    if(ret) {
      fprintf(stderr, "Error %d in reading filename list %s\n", ret, tmp);
      return(2);
    }
  }

  /*
   *  Read datafiles
   */
  int bi, fi, dftNr;
  dftNr=filelist.token_nr;
  DFT *dftlist;
  if(verbose>1) printf("dftNr := %d\n", dftNr);
  /* Check that files exist */
  for(bi=0; bi<dftNr; bi++) {
    if(verbose>3) 
      fprintf(stdout, "Bolus datafile #%d: '%s'\n", bi+1, filelist.tok[bi]);
    if(access(filelist.tok[bi], 0) == -1) {
      fprintf(stderr, "Error: bolus file '%s' is not accessible.\n",
              filelist.tok[bi]);
      str_token_list_empty(&filelist); return(3);
    }
  }
  /* Allocate memory for an array of DFT data */
  dftlist=(DFT*)malloc(dftNr*sizeof(DFT));
  if(dftlist==NULL) {
    fprintf(stderr, "Error: out of memory.\n");
    str_token_list_empty(&filelist); return(4);
  }
  /* Read bolus datafiles */
  for(bi=0; bi<dftNr; bi++) {
    if(verbose>2) printf("reading %s\n", filelist.tok[bi]);
    dftInit(dftlist+bi);
    if(dftRead(filelist.tok[bi], dftlist+bi)) {
      fprintf(stderr, "Error in reading '%s': %s\n",filelist.tok[bi],dfterrmsg);
      str_token_list_empty(&filelist); return(5);
    }
    if(verbose>3) printf(" -> %d frames and %d curves\n",
      dftlist[bi].frameNr, dftlist[bi].voiNr);
    if(dftlist[bi].frameNr<2) {
      fprintf(stderr, "Error: only one sample time in '%s'.\n",filelist.tok[bi]);
      str_token_list_empty(&filelist); return(5);
    }
    /* Get studynr */
    if(bi==0) strcpy(studynr, dftlist[bi].studynr);
    else {if(strcasecmp(studynr, dftlist[bi].studynr)!=0) strcpy(studynr, "");}
    /* We use only the first TAC in case there are several */
    if(dftlist[bi].voiNr>1) {
      fprintf(stderr, "Warning: only first TAC in %s is read.\n", 
              filelist.tok[bi]);
      dftlist[bi].voiNr=1;
    }
    /* Remove NA's */
    if(dftNAfill(dftlist+bi)) {
      fprintf(stderr, "Error: cannot replace missing data in %s\n", 
              filelist.tok[bi]);
      str_token_list_empty(&filelist); return(5);
    }
  }

  /* Allocate temp memory */
  double *bolusv;
  bolusv=(double*)malloc(dftNr*sizeof(double));
  if(bolusv==NULL) {
    fprintf(stderr, "Error: out of memory.\n");
    str_token_list_empty(&filelist);
    for(bi=dftNr-1; bi>=0; bi--) dftEmpty(dftlist+bi);
    free(dftlist);
    return(6);
  }


  if(!usePeak) {
    /*
     *  Correct the bolus appearance time to the average of it
     */
    if(verbose>1) printf("determining the bolus appearance times\n");
    double slope, ic, meanAppTime;
    /* Determine the appearance times; set to 0 if it is not possible */ 
    for(bi=0; bi<dftNr; bi++) {
      bolusv[bi]=0.0; if(slopeNr<2) continue;
      /* Find the max TAC value */
      f=dftlist[bi].voi[0].y[0]; n=0;
      for(fi=1; fi<dftlist[bi].frameNr; fi++) if(dftlist[bi].voi[0].y[fi]>f) {
        f=dftlist[bi].voi[0].y[fi]; n=fi;
      }
      if(verbose>2) printf("curve #%d: max %g at sample %d\n", bi+1, f, n+1);
      /* If the first sample is the max, then we cannot determine 
         appearance time */
      if(n==0) {bolusv[bi]=nan(""); continue;}
      /* Determine the max "derivative" */
      /* and appearance time as its intercept with x axis */
      ret=highest_slope(
        dftlist[bi].x, dftlist[bi].voi[0].y, dftlist[bi].frameNr, slopeNr,
        &slope, &ic, NULL, NULL
      );
      if(ret) {
        fprintf(stderr, "Error (%d): cannot calculate max slope.\n", ret);
        str_token_list_empty(&filelist); free(bolusv);
        for(bi=dftNr-1; bi>=0; bi--) dftEmpty(dftlist+bi);
        free(dftlist);
        return(7);
      }
      if(slope!=0.0) bolusv[bi]=-ic/slope; else bolusv[bi]=nan("");
      if(verbose>2)
        printf("  curve #%d: slopeMax %g appTime %g\n", bi+1, slope, bolusv[bi]);
    }
    /* Calculate the mean appearance time (including only positive values) */
    meanAppTime=0; n=0;
    for(bi=0; bi<dftNr; bi++) if(!isnan(bolusv[bi]) && bolusv[bi]>0.0) {
      meanAppTime+=bolusv[bi]; n++;}
    if(n>0) {
      meanAppTime/=(double)n;
      if(verbose>0) fprintf(stdout, "Mean appearance time is %g\n", meanAppTime);
    }
    /* Correct the bolus sample times according to mean appearance time */
    if(slopeNr>0) for(bi=0; bi<dftNr; bi++) if(!isnan(bolusv[bi])) {
      g=meanAppTime-bolusv[bi];
      if(verbose>0)
        fprintf(stdout, "%s : %g change in sample times\n", filelist.tok[bi], g);
      for(fi=0; fi<dftlist[bi].frameNr; fi++) {
        dftlist[bi].x[fi]+=g; dftlist[bi].x1[fi]+=g; dftlist[bi].x2[fi]+=g;}
      if(verbose==11) dftPrint(dftlist+bi);
    }
    /* Add zero sample times with zero value if necessary */
    for(bi=0; bi<dftNr; bi++) if(dftlist[bi].x[0]>0.0) {
      dftAddnullframe(dftlist+bi);
      /* Set the "null" time closer to the mean appearance time */
      if(dftlist[bi].x[1]>meanAppTime) dftlist[bi].x[0]=meanAppTime;
    }

  } else {

    /*
     *  Correct the peak time to the average of it
     */
    if(verbose>1) printf("determining the peak times\n");
    double maxv;
    /* Determine the peak times */ 
    for(bi=0; bi<dftNr; bi++) {
      ret=dftMinMaxTAC(dftlist+bi, 0, NULL, bolusv+bi, 
                       NULL, &maxv, NULL, NULL, NULL, NULL);
      if(ret) {
        fprintf(stderr, "Error: cannot determine TAC peak.\n");
        str_token_list_empty(&filelist); free(bolusv);
        for(bi=dftNr-1; bi>=0; bi--) dftEmpty(dftlist+bi);
        free(dftlist);
        return(6);
      }
      if(verbose>2) printf("curve #%d: max %g at %g\n", bi+1, maxv, bolusv[bi]);
    }
    /* What is the latest peak time? */
    double latestPeak;
    latestPeak=bolusv[0];
    for(bi=1; bi<dftNr; bi++) if(bolusv[bi]>latestPeak) latestPeak=bolusv[bi];
    if(verbose>2) printf("latest peak time is %g\n", latestPeak);
    /* Correct the peak times according to the latest peak time */
    for(bi=0; bi<dftNr; bi++) {
      g=latestPeak-bolusv[bi];
      if(verbose>0)
        fprintf(stdout, "%s : %g change in sample times\n", filelist.tok[bi], g);
      for(fi=0; fi<dftlist[bi].frameNr; fi++) {
        dftlist[bi].x[fi]+=g; dftlist[bi].x1[fi]+=g; dftlist[bi].x2[fi]+=g;}
      if(verbose==11) dftPrint(dftlist+bi);
    }
  }

  /*
   *  Scale the levels of curves based on AUC
   */
  double shortestTime, longestTime;
  /* Find the shortest and longest bolus curve */
  shortestTime=longestTime=dftlist[0].x[dftlist[0].frameNr-1];
  for(bi=1; bi<dftNr; bi++) {
    if(dftlist[bi].x[dftlist[bi].frameNr-1]<shortestTime)
      shortestTime=dftlist[bi].x[dftlist[bi].frameNr-1];
    else if(dftlist[bi].x[dftlist[bi].frameNr-1]>longestTime)
      longestTime=dftlist[bi].x[dftlist[bi].frameNr-1];
  }
  if(verbose>1) 
    printf("The shortest and longest bolus curve lengths are %g and %g\n",
           shortestTime, longestTime);
  if(shortestTime<=0.0) {
    fprintf(stderr, "Error: check the bolus sample times!\n");
    for(bi=dftNr-1; bi>=0; bi--) dftEmpty(dftlist+bi);
    free(dftlist);
    str_token_list_empty(&filelist); free(bolusv);
    return(8);
  }
  /* Calculate AUC 0-shortestTime for all curves */
  double x[2], yi[2];
  x[0]=0.0; x[1]=shortestTime;
  for(bi=0; bi<dftNr; bi++) {
    ret=interpolate(dftlist[bi].x, dftlist[bi].voi[0].y, dftlist[bi].frameNr,
                    x, NULL, yi, NULL, 2);
    if(ret) {
      fprintf(stderr, "Error %d in AUC calculation: check sample times!\n", ret);
      str_token_list_empty(&filelist);
      for(bi=dftNr-1; bi>=0; bi--) dftEmpty(dftlist+bi);
      free(dftlist);
      free(bolusv); return(8);
    }
    bolusv[bi]=yi[1]-yi[0];
    if(verbose>3) printf("Curve #%d: AUC0-%g = %g\n", bi+1, shortestTime, bolusv[bi]);
  }
  /* Calculate the mean AUC */
  f=0.0; for(bi=0; bi<dftNr; bi++) f+=bolusv[bi];
  f/=(double)dftNr;
  if(verbose>2) fprintf(stdout, "Mean AUC is %g\n", f);
  /* Correct the bolus curves according to mean AUC */
  if(scaling!=0) {
    if(verbose>1) printf("scaling the levels of curves based on AUC\n");
    for(bi=0; bi<dftNr; bi++) {
      g=f/bolusv[bi];
      if(verbose>0)
        fprintf(stdout, "%s : scaling with factor %g\n", filelist.tok[bi], g);
      for(fi=0; fi<dftlist[bi].frameNr; fi++) dftlist[bi].voi[0].y[fi]*=g;
      if(verbose==13) dftPrint(dftlist+bi);
    }
  }


  /*
   *  Interpolate bolus curves to the same sample times
   */
  DFT idft;
  dftInit(&idft); bi=0;
  /* Use autointerpolate for the first TAC just to get the times */
  f=0.5*(shortestTime+longestTime);
  if(verbose>1) printf("interpolating to time %g\n", f);
  ret=dftAutointerpolate(dftlist+bi, &idft, f, verbose-2);
  if(ret) {
    fprintf(stderr, "Error %d: cannot create interpolated curve.\n", ret);
    str_token_list_empty(&filelist);
    for(bi=dftNr-1; bi>=0; bi--) dftEmpty(dftlist+bi);
    free(dftlist);
    free(bolusv); return(11);
  }
  /* Remove comments from interpolated data */
  strcpy(idft.comments, "");
  /* Set extrapolated values to NaN */
  f=dftlist[bi].x[dftlist[bi].frameNr-1];
  for(fi=idft.frameNr-1; fi>0; fi--)
    if(idft.x[fi-1]>=f) idft.voi[bi].y[fi]=nan(""); else break;
  /* Add places for the others */
  ret=dftAddmem(&idft, dftNr-1);
  if(ret) {
    fprintf(stderr, "Error %d in memory allocation.\n", ret);
    str_token_list_empty(&filelist);
    for(bi=dftNr-1; bi>=0; bi--) dftEmpty(dftlist+bi);
    free(dftlist); free(bolusv); dftEmpty(&idft); return(11);
  }
  /* and interpolate those */
  for(bi=1; bi<dftNr; bi++) {
    ret=interpolate4pet(dftlist[bi].x, dftlist[bi].voi[0].y, dftlist[bi].frameNr,
          idft.x1, idft.x2, idft.voi[bi].y, NULL, NULL, idft.frameNr);
    if(ret) {
      fprintf(stderr, "Error %d in interpolation of %dth curve.\n", ret, bi+1);
      str_token_list_empty(&filelist);
      for(bi=dftNr-1; bi>=0; bi--) dftEmpty(dftlist+bi);
      free(dftlist); free(bolusv); dftEmpty(&idft); return(12);
    }
    idft.voiNr++;
    /* and set extrapolated values to NaN */
    f=dftlist[bi].x[dftlist[bi].frameNr-1];
    for(fi=idft.frameNr-1; fi>0; fi--)
      if(idft.x[fi-1]>=f) idft.voi[bi].y[fi]=nan(""); else break;
  }
  /* Set file and time type to match original (first) file */
  idft.timetype=dftlist[0].timetype;
  idft._type=dftlist[0]._type;
  if(verbose>10) dftPrint(&idft);

  /* Original data is not needed anymore */
  for(bi=dftNr-1; bi>=0; bi--) dftEmpty(dftlist+bi);
  free(dftlist); free(bolusv);
  

  /*
   *  Calculate average and SD for each interpolated sample
   */
  if(verbose>1) printf("calculating mean and sd\n");
  for(fi=m=0; fi<idft.frameNr; fi++) {
    /* Mean */
    g=h=0.0; n=0;
    for(bi=0; bi<idft.voiNr; bi++) if(!isnan(idft.voi[bi].y[fi])) {
      g+=idft.voi[bi].y[fi];
      h+=idft.voi[bi].y[fi]*idft.voi[bi].y[fi];
      n++;
    }
    if(verbose>6) printf("  sample %d sum=%g n=%d\n", 1+fi, g, n);
    if(n==0) continue; // leave out samples where no mean could be computed
    /* mean */
    idft.voi[0].y2[m]=g/(double)n;
    /* sd */
    if(n<2) {idft.voi[1].y2[m]=0.0;} else {
    g*=g; idft.voi[1].y2[m]=sqrt( (h-g/(double)n)/(double)(n-1) );}
    /* Next sample */
    m++;
  }
  idft.frameNr=m;
  if(verbose>9) dftPrint(&idft);


  /*
   *  Write average datafile
   */
  if(verbose>1) printf("writing average data in %s\n", ofile);
  if(*studynr=='\0' || strcmp(studynr, ".")==0) strcpy(idft.studynr, "mean");
  else strcpy(idft.studynr, studynr);
  /* Set data */
  if(save_errors) idft.voiNr=2; else idft.voiNr=1;
  strcpy(idft.voi[0].voiname, "Avg");
  strcpy(idft.voi[1].voiname, "SD");
  for(fi=0; fi<idft.frameNr; fi++) {
    idft.voi[0].y[fi]=idft.voi[0].y2[fi];
    idft.voi[1].y[fi]=idft.voi[1].y2[fi];
  }
  /* Write the file */
  dftSetComments(&idft);
  ret=dftWrite(&idft, ofile);
  if(ret) {
    fprintf(stderr, "Error in writing '%s': %s\n", ofile, dfterrmsg);
    str_token_list_empty(&filelist); dftEmpty(&idft);
    return(15);
  }
  dftEmpty(&idft);
  if(verbose>0) {
    if(!save_errors) fprintf(stderr, "Average curve written in %s\n", ofile);
    else fprintf(stderr, "Average and SD curves written in %s\n", ofile);
  }

  str_token_list_empty(&filelist);

  return(0);
}
/*****************************************************************************/

/*****************************************************************************/
/// @endcond
