/** @file imgdelay.c
 *  @brief Make a map of time delay between dynamic PET image and BTAC.
 *  @copyright (c) Turku PET Centre
 *  @author Vesa Oikonen
 */
/// @cond
/*****************************************************************************/
#include "tpcclibConfig.h"
/*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
/*****************************************************************************/
#include "tpcextensions.h"
#include "tpcift.h"
#include "tpccsv.h"
#include "tpctac.h"
#include "tpcimage.h"
#include "tpcli.h"
#include "tpclinopt.h"
#include "tpctacmod.h"
/*****************************************************************************/

/*****************************************************************************/
static char *info[] = {
  "Make a map of time delay between dynamic PET image and BTAC.",
  "Reversible one-tissue compartment model with blood volume is applied.",
  "Positive delay time means that tissue curve is delayed as compared to",
  "the input curve, and vice versa. Thus, input curve needs to be moved",
  "by the delay time to match the tissue curve.",
  " ",
  "Currently only NIfTI format is supported.",
  " ",
  "Usage: @P [options] imgfile btacfile delaymap",
  " ",
  "Options:",
  " -min=<Time (sec)> and -max=<Time (sec)>",
  "     The range of time delays to be tested; by default -10 - +50 s.",
  "     Large range will increase computation time.",
  " -end=<Fit end time (sec)>",
  "     Use data from 0 to end time; by default, 300 s. End time may need to",
  "     be reduced so that BTAC with negative delay extends to end time.",
  " -thr=<threshold%>",
  "     Pixels with AUC less than (threshold/100 x BTAC AUC) are set to zero.",
  "     Default is 1%.",
  " -Vb=<filename>",
  "     Vb map is saved in units mL blood/mL PET volume.",
  " -K1=<filename>",
  "     K1 map is saved in units mL blood/(min*mL PET volume).",
  " -stdoptions", // List standard options like --help, -v, etc
  " ",
  "See also: fitdelay, tactime",
  " ",
  "Keywords: image, time delay",
  0};
/*****************************************************************************/

/*****************************************************************************/
/* Turn on the globbing of the command line, since it is disabled by default in
   mingw-w64 (_dowildcard=0); in MinGW32 define _CRT_glob instead, if necessary;
   In Unix&Linux wildcard command line processing is enabled by default. */
/*
#undef _CRT_glob
#define _CRT_glob -1
*/
int _dowildcard = -1;
/*****************************************************************************/

/*****************************************************************************/
/**
 *  Main
 */
int main(int argc, char **argv)
{
  int ai, help=0, version=0, verbose=1;
  int ret;
  char imgfile[FILENAME_MAX], btacfile[FILENAME_MAX], mapfile[FILENAME_MAX];
  char vbfile[FILENAME_MAX], k1file[FILENAME_MAX];
  double endtime=300.0; // fit end time in seconds
  double endtimemin=120.0; // shortest fit end time allowed
  int drange[2]={-10,+50};
  float calcThreshold=0.01;


  /*
   *  Get arguments
   */
  if(argc==1) {tpcPrintUsage(argv[0], info, stderr); return(1);}
  imgfile[0]=btacfile[0]=mapfile[0]=vbfile[0]=k1file[0]=(char)0;
  /* Options */
  for(ai=1; ai<argc; ai++) if(*argv[ai]=='-') {
    if(tpcProcessStdOptions(argv[ai], &help, &version, &verbose)==0) continue;
    char *cptr=argv[ai]+1; if(*cptr=='-') cptr++; if(!*cptr) continue;
    if(strncasecmp(cptr, "END=", 4)==0) {
      ret=atofCheck(cptr+4, &endtime); if(!ret && endtime>0.0) continue;
    } else if(strncasecmp(cptr, "MIN=", 4)==0) {
      if(atoiCheck(cptr+4, &drange[0])==0) continue;
    } else if(strncasecmp(cptr, "MAX=", 4)==0) {
      if(atoiCheck(cptr+4, &drange[1])==0) continue;
    } else if(strncasecmp(cptr, "THR=", 4)==0) {
      double v; ret=atofCheck(cptr+4, &v);
      if(!ret && v<100.0) {calcThreshold=(float)(0.01*v); continue;}
    } else if(strncasecmp(cptr, "K1=", 3)==0) {
      strlcpy(k1file, cptr+3, FILENAME_MAX); if(strlen(k1file)) continue;
    } else if(strncasecmp(cptr, "VB=", 3)==0) {
      strlcpy(vbfile, cptr+3, FILENAME_MAX); if(strlen(vbfile)) continue;
    }
    fprintf(stderr, "Error: invalid option '%s'.\n", argv[ai]);
    return(1);
  } else break;
  
  /* Print help or version? */
  if(help==2) {tpcHtmlUsage(argv[0], info, ""); return(0);}
  if(help) {tpcPrintUsage(argv[0], info, stdout); return(0);}
  if(version) {tpcPrintBuild(argv[0], stdout); return(0);}

  /* Process other arguments, starting from the first non-option */
  if(ai<argc) strlcpy(imgfile, argv[ai++], FILENAME_MAX);
  if(ai<argc) strlcpy(btacfile, argv[ai++], FILENAME_MAX);
  if(ai<argc) strlcpy(mapfile, argv[ai++], FILENAME_MAX);
  if(ai<argc) {fprintf(stderr, "Error: too many arguments: '%s'.\n", argv[ai]); return(1);}
  /* Is something missing? */
  if(!mapfile[0]) {tpcPrintUsage(argv[0], info, stdout); return(1);}

  /* Check options */
  if(endtime<endtimemin) {
    fprintf(stderr, "Error: too short fit time set with option -end.\n");
    return(1);
  }
  if(drange[0]>drange[1]) {int i=drange[0]; drange[0]=drange[1]; drange[1]=i;}
  if((drange[1]-drange[0])>180) {
    fprintf(stderr, "Error: too wide delay range.\n");
    return(1);
  } else if((drange[1]-drange[0])>120) {
    fprintf(stderr, "Warning: large delay range.\n");
  } else if((drange[1]-drange[0])<4) {
    fprintf(stderr, "Error: too short delay range.\n");
    return(1);
  }

  /* In verbose mode print arguments and options */
  if(verbose>1) {
    printf("imgfile := %s\n", imgfile);
    printf("btacfile := %s\n", btacfile);
    printf("mapfile := %s\n", mapfile);
    printf("endtime := %g s\n", endtime);
    printf("delay_range := %d - %d s\n", drange[0], drange[1]);
    printf("threshold := %g\n", calcThreshold);
    if(vbfile[0]) printf("vbfile := %s\n", vbfile);
    if(k1file[0]) printf("k1file := %s\n", k1file);
    fflush(stdout);
  }

  TPCSTATUS status; statusInit(&status);
  statusSet(&status, __func__, __FILE__, __LINE__, TPCERROR_OK);
  status.verbose=verbose-1;


  /*
   *  Read BTAC
   */
  if(verbose>1) printf("reading %s\n", btacfile);
  TAC tac; tacInit(&tac);
  ret=tacRead(&tac, btacfile, &status);
  if(ret!=TPCERROR_OK) {
    fprintf(stderr, "Error (%d): %s\n", ret, errorMsg(status.error));
    tacFree(&tac); return(2);
  }
  if(verbose>2) {
    printf("fileformat := %s\n", tacFormattxt(tac.format));
    printf("tacNr := %d\n", tac.tacNr);
    printf("sampleNr := %d\n", tac.sampleNr);
    printf("xunit := %s\n", unitName(tac.tunit));
    printf("yunit := %s\n", unitName(tac.cunit));
    printf("isframe := %d\n", tac.isframe);
    fflush(stdout);
  }
  /* Make sure that frame mid times are set */
  tacSetX(&tac, NULL);


  /*
   *  Read image data
   */
  if(verbose>1) {printf("reading %s\n", imgfile); fflush(stdout);}
  IMG img; imgInit(&img);
  ret=imgRead(&img, imgfile, &status);
  if(ret!=TPCERROR_OK) { // error
    tacFree(&tac); imgFree(&img);
    fprintf(stderr, "Error: %s (%s)\n", errorMsg(status.error), imgfile); fflush(stderr);
    return(2);
  }
  if(imgNaNs(&img, 1)>0)
    if(verbose>0) fprintf(stderr, "Warning: missing pixel values.\n");
  if(verbose>2) imgContents(&img, stdout);
  double ixmin, ixmax;
  if(imgXRange(&img, &ixmin, &ixmax)!=TPCERROR_OK) {
    fprintf(stderr, "Error: invalid sample times in %s\n", imgfile); fflush(stderr);
    tacFree(&tac); imgFree(&img); return(2);
  }
  if(verbose>1) printf("Image range: %g - %g\n", ixmin, ixmax);
  /* Refine fit end time */
  if(ixmax<endtimemin) {
    fprintf(stderr, "Error: image time range is too short.\n");
    tacFree(&tac); imgFree(&img); return(2);
  }
  if(ixmax>endtime) endtime=ixmax;


  /* Convert BTAC time units to same as in the image */
  if(tac.tunit==UNIT_UNKNOWN) {
    if(verbose>0) fprintf(stderr, "Warning: missing BTAC time units.\n");
    double bxmin, bxmax;
    if(tacXRange(&tac, &bxmin, &bxmax)!=TPCERROR_OK) {
      fprintf(stderr, "Error: invalid sample times in %s\n", btacfile); fflush(stderr);
      tacFree(&tac); return(2);
    }
    unit guessedUnit=img.tunit;
    if(img.tunit==UNIT_SEC && bxmax<0.2*ixmax) guessedUnit=UNIT_MIN;
    if(img.tunit==UNIT_MIN && bxmax>20.*ixmax) guessedUnit=UNIT_SEC;
    fprintf(stderr, "Warning: assuming BTAC sample times are in units %s.\n", unitName(guessedUnit));
    tac.tunit=guessedUnit;
  }
  if(tacXUnitConvert(&tac, img.tunit, &status)!=TPCERROR_OK) {
    fprintf(stderr, "Error: %s (%s)\n", errorMsg(status.error), btacfile); fflush(stderr);
    tacFree(&tac); imgFree(&img); return(2);
  }
  double bxmin, bxmax;
  if(tacXRange(&tac, &bxmin, &bxmax)!=TPCERROR_OK) {
    fprintf(stderr, "Error: invalid sample times in %s\n", btacfile); fflush(stderr);
    tacFree(&tac); return(2);
  }
  if(verbose>1) printf("BTAC range: %g - %g\n", bxmin, bxmax);


  /*
   *  Check time ranges
   */
  if(verbose>1) {printf("checking time range\n"); fflush(stdout);}
  ret=0;
  if(tac.sampleNr<5) {fprintf(stderr, "Error: BTAC has too few samples.\n"); ret++;}
  if(img.dimt<5) {fprintf(stderr, "Error: image has too few frames.\n"); ret++;}
  if(bxmax<endtimemin) {fprintf(stderr, "Error: BTAC time range is too short.\n"); ret++;}
  if(ixmax<endtimemin) {fprintf(stderr, "Error: image time range is too short.\n"); ret++;}
  if(drange[0]<0 && bxmax<(endtime+drange[0])) {
    endtime+=drange[0];
    if(endtime<endtimemin) {
      fprintf(stderr, "Error: BTAC does not extend to required end time.\n"); ret++;}
  }
  int frameNr=0;
  for(int i=0; i<img.dimt; i++) if(img.x[i]<=endtime) frameNr++;
  if(frameNr<5) {fprintf(stderr, "Error: image has too few frames in fit time range.\n"); ret++;}
  if(ret>0) {tacFree(&tac); imgFree(&img); return(2);}

  /* Calculate threshold value */
  double thrs=tacAUC(&tac, 0, 0.0, img.x[frameNr-1], NULL);
  thrs*=calcThreshold;

  /*
   *  Make BTACs moved in time, interpolated and integrated to image frames
   */
  if(verbose>1) {printf("moving BTAC\n"); fflush(stdout);}
  int moveNr=1+(drange[1]-drange[0]);
  if(verbose>2) printf("frameNr := %d\nmoveNr := %d\n", frameNr, moveNr);
  TAC dtac; tacInit(&dtac); // Delayed BTACs at PET frames
  if(tacAllocate(&dtac, frameNr, moveNr)!=TPCERROR_OK) {
    fprintf(stderr, "Error: cannot allocate memory.\n"); fflush(stderr);
    tacFree(&tac); imgFree(&img); return(3);
  }
  dtac.sampleNr=frameNr;
  dtac.tacNr=moveNr;
  dtac.isframe=1;
  for(int i=0; i<dtac.sampleNr; i++) { // x1 and x2 values are need later when integrating pixel TAC
    dtac.x1[i]=img.x1[i]; dtac.x[i]=img.x[i]; dtac.x2[i]=img.x2[i];}
  TAC ditac; tacInit(&ditac); // Delayed integrated BTACs at PET frames
  if(tacDuplicate(&dtac, &ditac)!=TPCERROR_OK) {
    fprintf(stderr, "Error: cannot allocate memory.\n"); fflush(stderr);
    tacFree(&tac); imgFree(&img); tacFree(&dtac); return(3);
  }
  ret=0;
  if(verbose>1) {printf("integrating moved BTACs\n"); fflush(stdout);}
  for(int di=0; di<dtac.tacNr; di++) {
    if(di==0) for(int i=0; i<tac.sampleNr; i++) tac.x[i]+=drange[0]; // do NOT modify x1 or x2
    else for(int i=0; i<tac.sampleNr; i++) tac.x[i]+=1.0;
    ret=liInterpolateForPET(tac.x, tac.c[0].y, tac.sampleNr, dtac.x1, dtac.x2, 
                            dtac.c[di].y, ditac.c[di].y, NULL, dtac.sampleNr, 3, 1, 0);
    if(ret) break;
  }
  if(ret) {
    fprintf(stderr, "Error: cannot interpolate delayed BTACs.\n"); fflush(stderr);
    tacFree(&tac); imgFree(&img); tacFree(&dtac); tacFree(&ditac); return(3);
  }
  if(verbose>3) {
    char *dtacfile="delayed_btacs.dat";
    printf("writing %s\n", dtacfile);
    FILE *fp; fp=fopen(dtacfile, "w");
    if(fp!=NULL) {
      tacWrite(&dtac, fp, TAC_FORMAT_PMOD, 1, &status);
      fclose(fp);
    }
    char *ditacfile="delayed_btac_integrals.dat";
    printf("writing %s\n", ditacfile);
    fp=fopen(ditacfile, "w");
    if(fp!=NULL) {
      tacWrite(&ditac, fp, TAC_FORMAT_PMOD, 1, &status);
      fclose(fp);
    }
  }


  /*
   *  Allocate memory for delay map
   */
  if(verbose>1) {printf("preparing delay map\n"); fflush(stdout);}
  IMG map; imgInit(&map);
  if(imgAllocate(&map, img.dimz, img.dimy, img.dimx, 1, &status)!=TPCERROR_OK) {
    fprintf(stderr, "Error: cannot allocate memory\n"); fflush(stderr);
    tacFree(&tac); imgFree(&img); tacFree(&dtac); tacFree(&ditac); return(4);
  }
  imgCopyHeader(&img, &map);
  map.cunit=UNIT_SEC;

  /* Allocate memory for Vb and/or K1 images, if requested */
  IMG vbimg; imgInit(&vbimg);
  if(vbfile[0]) {
    if(imgAllocate(&vbimg, map.dimz, map.dimy, map.dimx, 1, &status)!=TPCERROR_OK) {
      fprintf(stderr, "Error: cannot allocate memory\n"); fflush(stderr);
      tacFree(&tac); imgFree(&img); tacFree(&dtac); tacFree(&ditac); imgFree(&map); return(4);
    }
    imgCopyHeader(&map, &vbimg);
    vbimg.cunit=UNIT_ML_PER_ML;
  }
  IMG k1img; imgInit(&k1img);
  if(k1file[0]) {
    if(imgAllocate(&k1img, map.dimz, map.dimy, map.dimx, 1, &status)!=TPCERROR_OK) {
      fprintf(stderr, "Error: cannot allocate memory\n"); fflush(stderr);
      tacFree(&tac); imgFree(&img); tacFree(&dtac); tacFree(&ditac); imgFree(&map);
      imgFree(&vbimg); return(4);
    }
    imgCopyHeader(&map, &k1img);
    vbimg.cunit=UNIT_PER_SEC;
  }


  /* Allocate matrices for NNLS */
  if(verbose>1) {printf("allocating memory for NNLS matrices\n"); fflush(stdout);}
  int llsq_m=dtac.sampleNr;
  int llsq_n=3;
  double *llsq_mat=(double*)malloc((llsq_n*llsq_m)*sizeof(double));
  if(llsq_mat==NULL) {
    fprintf(stderr, "Error: cannot allocate memory for NNLS.\n");
    tacFree(&tac); imgFree(&img); tacFree(&dtac); tacFree(&ditac); imgFree(&map); 
    imgFree(&vbimg); imgFree(&k1img); return(5);
  }
  double **llsq_a=(double**)malloc(llsq_n*sizeof(double*));
  if(llsq_a==NULL) {
    tacFree(&tac); imgFree(&img); tacFree(&dtac); tacFree(&ditac); imgFree(&map); free(llsq_mat);
    imgFree(&vbimg); imgFree(&k1img); return(5);
  }
  for(int ni=0; ni<llsq_n; ni++) llsq_a[ni]=llsq_mat+ni*llsq_m;
  double r2, llsq_b[llsq_m], llsq_x[llsq_n], llsq_wp[llsq_n], llsq_zz[llsq_m];
  int indexp[llsq_n];


  /*
   *  Pixel-by-pixel
   */
  long long pxlNr=img.dimx*img.dimy*img.dimz;
  if(verbose>0) {printf("processing %llu image pixels\n", pxlNr); fflush(stdout);}
  for(int zi=0; zi<img.dimz; zi++) {
    if(img.dimz>2 && verbose>0) {fprintf(stdout, "."); fflush(stdout);}
    for(int yi=0; yi<img.dimy; yi++) for(int xi=0; xi<img.dimx; xi++) {
      map.m[zi][yi][xi][0]=0.0;
      if(vbfile[0]) vbimg.m[zi][yi][xi][0]=0.0;
      if(k1file[0]) k1img.m[zi][yi][xi][0]=0.0;
      double ttac[llsq_m], ittac[llsq_m];
      for(int mi=0; mi<llsq_m; mi++) ttac[mi]=img.m[zi][yi][xi][mi];
      if(liIntegratePET(dtac.x1, dtac.x2, ttac, llsq_m, ittac, NULL, 0)!=0) continue;
      if(ittac[llsq_m-1]<thrs) continue;
      int diBest=-1;
      double r2Best=1.0E+100;
      double vbBest=0.0, k1Best=0.0;
      for(int di=0; di<dtac.tacNr; di++) {
        dtac.c[di].size=1.0E+100; // save r2 in here
        /* Setup data matrix A and vector B */
        for(int mi=0; mi<llsq_m; mi++)
          llsq_b[mi]=img.m[zi][yi][xi][mi];      // TTAC
        for(int mi=0; mi<llsq_m; mi++) {
          llsq_mat[mi]=dtac.c[di].y[mi];         // BTAC
          llsq_mat[mi+llsq_m]=ditac.c[di].y[mi]; // BTAC integral
          llsq_mat[mi+2*llsq_m]=-ittac[mi];      // TTAC integral
        }
#if(0)
        printf("\nmatrix %d\n", 1+di);
        for(int mi=0; mi<llsq_m; mi++) printf("%g	%g	%g	%g\n", 
          llsq_b[mi], llsq_mat[mi], llsq_mat[mi+llsq_m], llsq_mat[mi+2*llsq_m]);
#endif
        /* Compute NNLS */
        int ret=nnls(llsq_a, llsq_m, llsq_n, llsq_b, llsq_x, &r2, llsq_wp, llsq_zz, indexp);
        if(ret>1) continue;
        dtac.c[di].size=r2; // save r2 in here
        if(r2<r2Best) {
          r2Best=r2; diBest=di; 
          vbBest=llsq_x[0];
          k1Best=llsq_x[1]-llsq_x[0]*llsq_x[2];
        }
      } // next delay
      if(diBest<0) continue; // not successful, let delay be zero
      //printf("best r2 := %g\n", dtac.c[diBest].size);
      map.m[zi][yi][xi][0]=(float)(drange[0]+diBest);
      if(vbfile[0]) {
        if(vbBest>1.0) vbBest=1.0;
        vbimg.m[zi][yi][xi][0]=(float)vbBest;
      }
      if(k1file[0]) {
        k1Best*=60.0;  // convert to per min
        if(k1Best>6.0) k1Best=6.0; else if(k1Best<0.0) k1Best=0.0;
        k1img.m[zi][yi][xi][0]=(float)k1Best;
      }
#if(0)
      /* Try to refine delay with r2 weighted average with adjacent delays */
      if(diBest==0 || diBest==dtac.tacNr-1) continue;
      float w1=1.0/(1.0E-12+dtac.c[diBest-1].size);
      float w2=1.0/(1.0E-12+dtac.c[diBest].size);
      float w3=1.0/(1.0E-12+dtac.c[diBest+1].size);
      float d=(w1*(float)(diBest-1) + w2*(float)diBest + w3*(float)(diBest+1))/(w1+w2+w3);
      map.m[zi][yi][xi][0]=(float)drange[0] + d;
#endif
    }
  }
  if(img.dimz>2 && verbose>0) {fprintf(stdout, "\n"); fflush(stdout);}
  /* Free NNLS matrix data */
  free(llsq_a); free(llsq_mat);


  /* Dynamic data not needed after this */
  tacFree(&tac); imgFree(&img); tacFree(&dtac); tacFree(&ditac);

  /*
   *  If Vb or K1 images were requested, save them
   */
  if(vbfile[0]) {
    if(imgWrite(&vbimg, vbfile, &status)!=TPCERROR_OK)
      fprintf(stderr, "Error: %s\n", errorMsg(status.error));
    if(verbose>0) {printf("  %s written.\n", vbfile); fflush(stdout);}
  }
  if(k1file[0]) {
    if(imgWrite(&k1img, k1file, &status)!=TPCERROR_OK)
      fprintf(stderr, "Error: %s\n", errorMsg(status.error));
    if(verbose>0) {printf("  %s written.\n", k1file); fflush(stdout);}
  }
  imgFree(&vbimg); imgFree(&k1img);


  /* 
   *  Write the delay map
   */
  if(verbose>1) {printf("writing %s\n", mapfile); fflush(stdout);}
  ret=imgWrite(&map, mapfile, &status);
  if(ret!=TPCERROR_OK) {
    fprintf(stderr, "Error: %s\n", errorMsg(status.error)); fflush(stderr);
    imgFree(&map);
    return(11);
  }
  if(verbose>0) {printf("  %s written.\n", mapfile); fflush(stdout);}

  imgFree(&map);

  return(0);
}
/*****************************************************************************/

/*****************************************************************************/
/// @endcond
