/** @file imgbfh2om.c
    @brief Estimation of perfusion from dynamic radiowater PET images
     applying basis function approach.

    @note A copy of imgbfh2o where option -mask is to be implemented. Also, tests use NIfTI format.

    @copyright (c) Turku PET Centre
    @author Vesa Oikonen
 */
/// @cond
/*****************************************************************************/
#include "tpcclibConfig.h"
/*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <math.h>
#include <time.h>
/*****************************************************************************/
#include "libtpccurveio.h"
#include "libtpcmodext.h"
#include "libtpcmisc.h"
#include "libtpcmodel.h"
#include "libtpcimgio.h"
#include "libtpcimgp.h"
/*****************************************************************************/
#define MAX_N 2
/*****************************************************************************/

/*****************************************************************************/
static char *info[] = {
  "Estimation of rate constants K1, k2 and Va from dynamic PET image in",
  "ECAT 6.3, ECAT 7.x, NIfTI-1, or Analyze 7.5 file format using",
  "one-tissue compartment model (1), solved using the basis function",
  "approach (2, 3).",
  " ",
  "When applied to dynamic [O-15]H2O studies, the resulting K1 image",
  "equals perfusion (blood flow) image. K1 image can be divided by tissue",
  "density (g/mL) (option -density) and multiplied by 100 (option -dL)",
  "to achieve the blood flow image in units (mL blood)/((100 g tissue) * min).",
  " ",
  "When applied to dynamic [O-15]O2 brain studies, the resulting K1 image",
  "can be converted to oxygen consumption image by multiplying it by",
  "arterial oxygen concentration (4) (mL O2 / dL blood) to get the",
  "parametric image in units mL O2 / ((100 ml tissue) * min).",
  "The model assumptions hold only when oxygen consumption is 1-6.7",
  "mL O2/(100g * min) and fit time is set to 300 s or less (4).",
  " ",
  "Arterial blood TAC must be corrected for decay and delay, with sample times",
  "in seconds. Dynamic PET image must be corrected for decay. Fit time must",
  "be given in seconds.",
  " ",
  "Usage: @P [Options] btacfile imgfile fittime flowfile",
  " ",
  "Options:",
  " -mL or -dL",
  "     Units in flow and Va images will be given per mL or per dL,",
  "     respectively. By default, units are per mL.",
  " -density[=<value>]",
  "     With option -density the flow is calculated per gram or 100g tissue.",
  "     Tissue density can be changed from the default 1.04 g/mL.",
  " -Vd=<filename>",
  "     Parametric K1/k2 (Vd, apparent p) image is saved.",
  " -k2=<filename>",
  "     Parametric k2 image is saved; in some situations perfusion calculation",
  "     from k2 can be more accurate than the default assumption of f=K1.",
  "     Perfusion can be calculated from k2 using equation f=k2*pH2O, where",
  "     pH2O is the physiological partition coefficient of water in tissue.",
  " -Va=<filename>",
  "     Parametric Va image is saved.",
  "     Set -Va=0, if Va=0 is assumed (pre-corrected); otherwise Va is fitted.",
  " -wss=<filename>",
  "     Weighted sum-of-squares are written in specified image file.",
  " -thr=<threshold%>",
  "     Pixels with AUC less than (threshold/100 x input AUC) are set to zero;",
  "     default is 0%",
  " -k2min=<Min k2> and -k2max=<Max k2>",
  "     Enter the minimum and maximum k2 in units 1/min, applying to decay",
  "     corrected data.",
  " -fmin=<Min K1> and -fmax=<Max K1>",
  "     Enter the minimum and maximum perfusion value; defaults are",
  "     0.005 and 4.0 mL/(mL*min), respectively.",
  " -pmin=<Min p> and -pmax=<pmax>",
  "     Enter the minimum and maximum value for apparent partition coefficient",
  "     for water; defaults are 0.3 and 1.0 mL/mL, respectively.",
  " -nr=<value>",
  "     Set number of basis functions; default is 500, minimum 100.",
  " -bf=<filename>",
  "     Basis function curves are written in specified file.",
  " -err=<filename>",
  "     Pixels with their k2 in its min or max value (calculated from min and",
  "     max K1 and p values) in the specified imagefile with values 1 and 2,",
  "     respectively, others with value 0.",
  " -mask=<filename>",
  "     Only the masked pixels are processed. If output images exist, then",
  "     pixel values outside of mask are preserved.",
  " -stdoptions", // List standard options like --help, -v, etc
  " ",
  "Example 1. Calculation of perfusion and arterial blood volume image,",
  "           stopping fit at 180 s:",
  "  @P -Va=s2345va.img s2345abfit.kbq s2345dy1.v 180 s2345flow.v",
  " ",
  "By default, the units of pixel values in the blood flow (K1) image is",
  "(mL blood)/((mL tissue) * min), in Vd image (mL blood)/(mL tissue),",
  "in k2 image 1/min, and in Va image (mL blood/mL tissue),",
  "but the blood flow and Va units can be changed with above listed options.",
  " ",
  "References:",
  "1. Lammertsma AA, Jones T. J Cereb Blood Flow Metab. 1983;3:416-424.",
  "2. Koeppe RA et al. J Cereb Blood Flow metab. 1985;5:224-234.",
  "3. Boellaard R et al. Mol Imaging Biol. 2005;7:273-285.",
  "4. Ohta S, et al. J Cereb Blood Flow Metab. 1992;12:179-192.",
  " ",
  "See also: bfmh2o, imgflow, arlkup, fit_h2o, imgunit, fitdelay, imgcbv",
  " ",
  "Keywords: image, modelling, perfusion, radiowater, basis function method",
  0};
/*****************************************************************************/

/*****************************************************************************/
/* Turn on the globbing of the command line, since it is disabled by default in
   mingw-w64 (_dowildcard=0); in MinGW32 define _CRT_glob instead, if necessary;
   In Unix&Linux wildcard command line processing is enabled by default. */
/*
#undef _CRT_glob
#define _CRT_glob -1
*/
int _dowildcard = -1;
/*****************************************************************************/

/*****************************************************************************/
/**
 *  main()
 */
int main(int argc, char **argv)
{
  int      ai, help=0, version=0, verbose=1;
  int      fitdimt;
  char     inpfile[FILENAME_MAX], petfile[FILENAME_MAX], flowfile[FILENAME_MAX];
  char     vdfile[FILENAME_MAX], k2file[FILENAME_MAX], wssfile[FILENAME_MAX];
  char     errfile[FILENAME_MAX], bfsfile[FILENAME_MAX];
  char     vafile[FILENAME_MAX], maskfile[FILENAME_MAX], tmp[FILENAME_MAX+1], *cptr;
  int      bfNr=500, *bf_opt_nr;
  float    threshold, calcThreshold=0.0;
  double   fittime=0.0;
  double   flowMin=0.005, flowMax=4.0; // mL/(min*mL)
  double   pWaterMin=0.3, pWaterMax=1.0; // mL/mL
  double   k2min=-1.0, k2max=-1.0;
  int      ret, fi, pi, xi, yi;
  int      nosolution_nr=0, thresholded_nr=0;
  clock_t  fitStart, fitFinish;
  int      fitVa=1; // 1=fitted, 0=Vb assumed to be zero
  int      per_dl=0; // 0 or 1
  int      per_gram=0; // 0 or 1
  double   density=1.04;

  DFT blood, tac, bf;
  IMG img, flowimg, k2img, vdimg, vaimg, wssimg, errimg;


  /*
   *  Get arguments
   */
  if(argc==1) {tpcPrintUsage(argv[0], info, stderr); return(1);}
  inpfile[0]=petfile[0]=flowfile[0]=vdfile[0]=k2file[0]=wssfile[0]=(char)0;
  errfile[0]=bfsfile[0]=vafile[0]=maskfile[0]=(char)0;
  /* Get options */
  for(ai=1; ai<argc; ai++) if(*argv[ai]=='-') {
    if(tpcProcessStdOptions(argv[ai], &help, &version, &verbose)==0) continue;
    cptr=argv[ai]+1; if(*cptr=='-') cptr++; if(cptr==NULL) continue;
    if(strcasecmp(cptr, "DL")==0) {
      per_dl=1; continue;
    } else if(strcasecmp(cptr, "ML")==0) {
      per_dl=0; continue;
    } else if(strcasecmp(cptr, "DENSITY")==0) {
      per_gram=1; continue; /* if plain -density, then use default density */
    } else if(strncasecmp(cptr, "DENSITY=", 8)==0) {
      per_gram=1; density=atof_dpi(cptr+8); if(density>0.0) continue;
    } else if(strncasecmp(cptr, "VA=", 3)==0 || strncasecmp(cptr, "VB=", 3)==0) {
      strlcpy(vafile, cptr+3, FILENAME_MAX); if(strlen(vafile)) continue;
    } else if(strncasecmp(cptr, "k2min=", 6)==0) {
      if(atof_with_check(cptr+6, &k2min)==0 && k2min>=0.0) continue;
    } else if(strncasecmp(cptr, "k2max=", 6)==0) {
      if(atof_with_check(cptr+6, &k2max)==0 && k2max>=0.0) continue;
    } else if(strncasecmp(cptr, "fmin=", 5)==0 && strlen(cptr)>5) {
      if(atof_with_check(cptr+5, &flowMin)==0 && flowMin>=0.0) continue;
    } else if(strncasecmp(cptr, "fmax=", 5)==0 && strlen(cptr)>5) {
      if(atof_with_check(cptr+5, &flowMax)==0 && flowMax>=0.0) continue;
    } else if(strncasecmp(cptr, "pmin=", 5)==0 && strlen(cptr)>5) {
      if(atof_with_check(cptr+5, &pWaterMin)==0 && pWaterMin>0.0) continue;
    } else if(strncasecmp(cptr, "pmax=", 5)==0 && strlen(cptr)>5) {
      if(atof_with_check(cptr+5, &pWaterMax)==0 && pWaterMax<1.25) continue;
    } else if(strncasecmp(cptr, "VD=", 3)==0) {
      strlcpy(vdfile, cptr+3, FILENAME_MAX); if(strlen(vdfile)>1) continue;
    } else if(strncasecmp(cptr, "k2=", 3)==0) {
      strlcpy(k2file, cptr+3, FILENAME_MAX); if(strlen(k2file)>1) continue;
    } else if(strncasecmp(cptr, "NR=", 3)==0) {
      bfNr=atoi(cptr+3); if(bfNr>5E+04) bfNr=5E+04;
      if(bfNr>=100) continue;
    } else if(strncasecmp(cptr, "BF=", 3)==0) {
      strlcpy(bfsfile, cptr+3, FILENAME_MAX); if(strlen(bfsfile)>0) continue;
    } else if(strncasecmp(cptr, "WSS=", 4)==0) {
      strlcpy(wssfile, cptr+4, FILENAME_MAX); if(strlen(wssfile)>0) continue;
    } else if(strncasecmp(cptr, "ERR=", 4)==0) {
      strlcpy(errfile, cptr+4, FILENAME_MAX); if(strlen(errfile)>0) continue;
    } else if(strncasecmp(cptr, "THR=", 4)==0 && strlen(cptr)>4) {
      cptr+=4; if(isdigit(*cptr) || *cptr=='+' || *cptr=='-') {
        calcThreshold=0.01*atof_dpi(cptr);
        if(calcThreshold>=0.0 && calcThreshold<=2.0) continue;
      }
    }
    fprintf(stderr, "Error: invalid option '%s'.\n", argv[ai]);
    return(1);
  } else break;

  /* Print help or version? */
  if(help==2) {tpcHtmlUsage(argv[0], info, ""); return(0);}
  if(help) {tpcPrintUsage(argv[0], info, stdout); return(0);}
  if(version) {tpcPrintBuild(argv[0], stdout); return(0);}
  
  /* Process other arguments, starting from the first non-option */
  if(ai<argc) strlcpy(inpfile, argv[ai++], FILENAME_MAX);
  if(ai<argc) strlcpy(petfile, argv[ai++], FILENAME_MAX);
  if(ai<argc) {
    if(!atof_with_check(argv[ai], &fittime)) fittime/=60.0;
    else {
      fprintf(stderr, "Error: invalid fit time '%s'.\n", argv[ai]);
      return(1);
    }
    ai++;
  }
  if(ai<argc) strlcpy(flowfile, argv[ai++], FILENAME_MAX);
  if(ai<argc) {
    fprintf(stderr, "Error: invalid argument '%s'.\n", argv[ai]);
    return(1);
  }
  /* Did we get all the information that we need? */
  if(!flowfile[0]) {
    fprintf(stderr, "Error: missing command-line argument; use option --help\n");
    return(1);
  }
  if(fittime<=0.0) fittime=1.0E+020;
  /* Check if Va=0 is assumed */
  if(strcasecmp(vafile, "NONE")==0 || strcasecmp(vafile, "ZERO")==0 ||
     strcasecmp(vafile, "0")==0) {vafile[0]=(char)0; fitVa=0;}
  /* In verbose mode print arguments and options */
  if(verbose>1) {
    printf("petfile := %s\n", petfile);
    printf("inpfile := %s\n", inpfile);
    printf("flowfile := %s\n", flowfile);
    if(vdfile[0]) printf("vdfile := %s\n", vdfile);
    if(k2file[0]) printf("k2file := %s\n", k2file);
    if(vafile[0]) printf("vafile := %s\n", vafile);
    if(wssfile[0]) printf("wssfile := %s\n", wssfile);
    if(errfile[0]) printf("errfile := %s\n", errfile);
    if(bfsfile[0]) printf("bfsfile := %s\n", bfsfile);
    if(maskfile[0]) printf("maskfile := %s\n", maskfile);
    printf("fitVa := %d\n", fitVa);
    printf("bfNr := %d\n", bfNr);
    printf("per_dl := %d\n", per_dl);
    printf("per_gram := %d\n", per_gram);
    if(per_gram!=0) printf("density := %g\n", density);
    printf("calcThreshold := %g\n", calcThreshold);
    printf("requested_fittime := %g [min]\n", fittime);
    if(k2min>0.0) printf("k2min := %g\n", k2min);
    if(k2max>0.0) printf("k2max := %g\n", k2max);
    printf("flowMax := %g\n", flowMax);
    printf("flowMin := %g\n", flowMin);
    printf("flowMax := %g\n", flowMax);
    printf("pWaterMin := %g\n", pWaterMin);
    printf("pWaterMax := %g\n", pWaterMax);
  }
  if(verbose>8) {IMG_TEST=verbose-8; SIF_TEST=verbose-8;} else IMG_TEST=SIF_TEST=0;

  /* Check user-defined parameter ranges and calculate range of k2 */
  if(flowMin>=flowMax) {
    fprintf(stderr, "Error: invalid range for perfusion (%g - %g).\n",
    flowMin, flowMax);
    return(1);    
  }
  if(pWaterMin>=pWaterMax) {
    fprintf(stderr, "Error: invalid range for p (%g - %g).\n",
    pWaterMin, pWaterMax);
    return(1);    
  }
  if(k2min<=0.) {
    k2min=flowMin/pWaterMax;
    if(verbose>1) printf("k2min := %g [1/min]\n", k2min);
  }
  if(k2max<=0.) {
    k2max=flowMax/pWaterMin;
    if(verbose>1) printf("k2max := %g [1/min]\n", k2max);
  }
  if(k2max<=k2min || k2min<=0.) {
    fprintf(stderr, "Error: invalid range for k2 (%g - %g).\n", k2min, k2max);
    return(1);    
  }


  /*
   *  Read PET image and input TAC
   */
  if(verbose>1) printf("reading data files\n");
  dftInit(&blood); dftInit(&tac); imgInit(&img);
  ret=imgReadModelingData(
    petfile, NULL, inpfile, NULL, NULL, &fittime, &fitdimt, &img,
    &blood, &tac, 1, stdout, verbose-2, tmp);
  if(ret!=0) {
    fprintf(stderr, "Error: %s.\n", tmp);
    if(verbose>1) printf("  ret := %d\n", ret);
    return(2);
  }
  if(imgNaNs(&img, 1)>0)
    if(verbose>0) fprintf(stderr, "Warning: missing pixel values.\n");
  //printf("last x2 = %g\n", tac.x2[tac.frameNr-1]);
  /* Set time unit to min */
  dftTimeunitConversion(&blood, TUNIT_MIN);
  dftTimeunitConversion(&tac, TUNIT_MIN);
  /* Check that the image is dynamic */
  if(fitdimt<3) {
    fprintf(stderr, "Error: too few time frames for fitting.\n");
    if(verbose>0) imgInfo(&img);
    imgEmpty(&img); dftEmpty(&blood); dftEmpty(&tac); return(2);
  }
  /* Allocate memory for tissue data and integrals */
  ret=dftAddmem(&tac, 1);
  if(ret!=0) {
    fprintf(stderr, "Error (%d) in allocating memory.\n", ret); fflush(stderr);
    imgEmpty(&img); dftEmpty(&blood); dftEmpty(&tac); return(3);
  }
  strcpy(tac.voi[0].voiname, "input");
  strcpy(tac.voi[1].voiname, "tissue");
  if(verbose>1) {
    printf("fittimeFinal := %g min\n", fittime);
    printf("fitdimt := %d\n", fitdimt);
    fflush(stdout);
  }

  /* Determine the threshold */
  if(verbose>50) dftPrint(&tac);
  if(verbose>2) printf("input_AUC[%g] := %g\n", tac.x2[tac.frameNr-1], tac.voi[0].y2[tac.frameNr-1]);
  threshold=calcThreshold*tac.voi[0].y2[tac.frameNr-1]/60.0;
  if(verbose>2) {printf("threshold_AUC = %g\n", threshold); fflush(stdout);}


  /*
   *  Determine the weights for the fit
   */
  if(verbose>1) printf("setting weights\n");
  ret=imgSetWeights(&img, 0, verbose-5);
  if(ret) {
    fprintf(stderr, "Warning: cannot calculate weights.\n"); fflush(stderr);
    /* set weights to 1 */
    for(fi=0; fi<img.dimt; fi++) img.weight[fi]=1.0;
    img.isWeight=1;
  }


  /*
   *  Calculate the basis functions
   */
  if(verbose>1) {fprintf(stdout, "calculating basis functions\n"); fflush(stdout);}
  dftInit(&bf);
  ai=tac.frameNr; tac.frameNr=fitdimt;
  ret=bfRadiowater(&blood, &tac, &bf, bfNr, k2min, k2max, tmp, verbose-2);
  //ret=bfRadiowater(&blood, &tac, &bf, bfNr, k2min, k2max, tmp, 10);
  tac.frameNr=ai;
  if(ret) {
    fprintf(stderr, "Error: cannot calculate basis functions (%d).\n", ret); fflush(stderr);
    imgEmpty(&img); dftEmpty(&blood); dftEmpty(&tac); return(6);
  }
  /* Original sampling blood data not needed any more */
  dftEmpty(&blood);
  // Note that basis functions are to be saved later (in bfsfile),
  // after it is known in how many image pixels each basis function was found
  // to give the best fit


  /*
   *  Allocate result images and fill the header info
   */
  if(verbose>1) {printf("allocating memory for parametric images\n"); fflush(stdout);}
  imgInit(&flowimg); imgInit(&k2img); imgInit(&vdimg); imgInit(&vaimg);
  imgInit(&wssimg); imgInit(&errimg);
  ret=imgAllocateWithHeader(&flowimg, img.dimz, img.dimy, img.dimx, 1, &img);
  if(ret!=0) {
    fprintf(stderr, "Error: out of memory.\n");
    imgEmpty(&img); dftEmpty(&tac); dftEmpty(&bf);
    return(8);
  }
  flowimg.start[0]=0.0; flowimg.end[0]=fittime*60.0;
  flowimg.decayCorrection=IMG_DC_NONCORRECTED;
  flowimg.isWeight=0;
  flowimg.unit=IMGUNIT_ML_PER_ML_PER_MIN;
  if(k2file[0]) {
    ret=imgAllocateWithHeader(&k2img, img.dimz,img.dimy,img.dimx, 1, &flowimg);
    k2img.unit=IMGUNIT_PER_MIN;
  }
  if(ret==0 && vdfile[0]) {
    ret=imgAllocateWithHeader(&vdimg, img.dimz,img.dimy,img.dimx, 1, &flowimg);
    vdimg.unit=IMGUNIT_UNITLESS;
  }
  if(ret==0 && vafile[0]) {
    ret=imgAllocateWithHeader(&vaimg, img.dimz,img.dimy,img.dimx, 1, &flowimg);
    vaimg.unit=IMGUNIT_UNITLESS;
  }
  if(ret==0 && wssfile[0]) {
    ret=imgAllocateWithHeader(&wssimg, img.dimz,img.dimy,img.dimx, 1, &flowimg);
    wssimg.unit=IMGUNIT_UNITLESS;
  }
  if(ret==0 && errfile[0]) {
    ret=imgAllocateWithHeader(&errimg, img.dimz,img.dimy,img.dimx, 1, &flowimg);
    errimg.unit=IMGUNIT_UNITLESS;
  }
  if(ret) {
    fprintf(stderr, "Error: out of memory.\n"); fflush(stderr);
    imgEmpty(&img); dftEmpty(&tac); dftEmpty(&bf);
    imgEmpty(&flowimg); imgEmpty(&k2img); imgEmpty(&vaimg); imgEmpty(&vdimg);
    imgEmpty(&wssimg); imgEmpty(&errimg);
    return(8);
  }


#if(0)
  /*
   *  Allocate memory for QR
   */
  if(verbose>1) printf("allocating memory for QR\n");
  int colNr, rowNr;
  colNr=2; if(fitVa==0) colNr--;
  rowNr=bf.frameNr;
  if(verbose>2) {
    printf("QR_colNr := %d\n", colNr);
    printf("QR_rowNr := %d\n", rowNr);
  }
  double *buf, **mat, *rhs, *sol, r2;
  buf=(double*)calloc(colNr*rowNr+rowNr+colNr, sizeof(double));
  mat=(double**)calloc(rowNr, sizeof(double*));
  if(buf==NULL || mat==NULL) {
    fprintf(stderr, "Error: cannot allocate memory for QR\n");
    imgEmpty(&img); dftEmpty(&tac); dftEmpty(&bf);
    imgEmpty(&flowimg); imgEmpty(&k2img); imgEmpty(&vaimg); imgEmpty(&vdimg);
    imgEmpty(&wssimg); imgEmpty(&errimg);
    return(8);
  }
  for(int i=0; i<rowNr; i++) mat[i]=buf+(i*colNr);
  rhs=buf+(rowNr*colNr); sol=buf+(rowNr*colNr+rowNr);


  /*
   *  Compute pixel-by-pixel
   */
  if(verbose>0) {fprintf(stdout, "computing QR pixel-by-pixel\n"); fflush(stdout);}
  thresholded_nr=0; nosolution_nr=0;
  double *ct, *cti;
  ct=tac.voi[1].y; cti=tac.voi[1].y2;
  /* Allocate memory for BF counters on how often each BF is
     found to provide the optimal fit */
  bf_opt_nr=(int*)malloc(bfNr*sizeof(int));
  for(int bi=0; bi<bf.voiNr; bi++) bf_opt_nr[bi]=0.0;
  fitStart=clock();
  for(pi=0; pi<img.dimz; pi++) {
    if(verbose>6) printf("computing plane %d\n", img.planeNumber[pi]);
    else if(img.dimz>1 && verbose>0) {fprintf(stdout, "."); fflush(stdout);}
    for(yi=0; yi<img.dimy; yi++) {
      if(verbose>6 && yi==4*img.dimy/10) printf("  computing row %d\n", yi+1);
      for(xi=0; xi<img.dimx; xi++) {
        if(verbose>6 && yi==4*img.dimy/10 && xi==4*img.dimx/10)
          printf("    computing column %d\n", xi+1);

        /* if end AUC is less than threshold value, then set values to 0 */
        /* Calculate pixel integral */
        for(int i=0; i<rowNr; i++) {ct[i]=img.m[pi][yi][xi][i];}     
        ret=petintegral(tac.x1, tac.x2, ct, rowNr, cti, NULL);
        if(cti[rowNr-1]<threshold) {
          flowimg.m[pi][yi][xi][0]=0.0;
          if(k2file[0]) k2img.m[pi][yi][xi][0]=0.0;
          if(vafile[0]) vaimg.m[pi][yi][xi][0]=0.0;
          if(vdfile[0]) vdimg.m[pi][yi][xi][0]=0.0;
          if(wssfile[0]) wssimg.m[pi][yi][xi][0]=0.0; 
          if(errfile[0]) errimg.m[pi][yi][xi][0]=0.0;
          thresholded_nr++;
          continue;
        }
        
        /* Go through all basis functions */
        int bi_min=-1; double r2_min=nan("");
        for(int bi=0; bi<bf.voiNr; bi++) {

          /* Initiate matrix */
          for(int j=0; j<rowNr; j++) {
            mat[j][0]=bf.voi[bi].y[j];
            if(colNr>1) mat[j][1]=tac.voi[0].y[j];
            rhs[j]=ct[j];
          }

          /* Compute QR */
          if(qrLSQ(mat, rhs, sol, rowNr, colNr, &r2)!=0) continue;
          /* Check if this was best fit for now */
          if(isnan(r2_min) || r2_min>r2) {
            r2_min=r2; bi_min=bi;
            flowimg.m[pi][yi][xi][0]=sol[0];
            if(vafile[0] && colNr>1) vaimg.m[pi][yi][xi][0]=sol[1];
          }
        } /* next basis function */
        if(isnan(r2_min)) {nosolution_nr++; continue;}
        else bf_opt_nr[bi_min]+=1;
        /* Put results to output images */
        if(flowimg.m[pi][yi][xi][0]>flowMax) flowimg.m[pi][yi][xi][0]=flowMax;
        else if(flowimg.m[pi][yi][xi][0]<0.0) flowimg.m[pi][yi][xi][0]=0.0;
        if(k2file[0]) {
          k2img.m[pi][yi][xi][0]=bf.voi[bi_min].size;
          if(k2img.m[pi][yi][xi][0]>k2max) k2img.m[pi][yi][xi][0]=k2max;
        }
        if(vdfile[0]) {
          double f=flowimg.m[pi][yi][xi][0]/k2img.m[pi][yi][xi][0];
          if(f>pWaterMax) f=pWaterMax;
          vdimg.m[pi][yi][xi][0]=f;
        }
        if(wssfile[0]) wssimg.m[pi][yi][xi][0]=r2_min;
      } /* next column */
    } /* next row */
  } /* next plane */
  fitFinish=clock();
  if(verbose>0) {fprintf(stdout, "done.\n"); fflush(stdout);}

#else

  /*
   *  Allocate memory for QR
   */
  if(verbose>1) {fprintf(stdout, "allocating memory for QR\n"); fflush(stdout);}
  int m, n, M, N;
  int bi, bi_min;
  double rnorm_min, wss, f;
  double   p1, p2, p3;
  double **mem, **A, *B, X[MAX_N], *tau, *residual, RNORM, *chain;
  double *qrweight, **wws, *ws, *wwschain, *ct, *cti;

  M=bf.frameNr; N=2; if(fitVa==0) N--;
  chain=(double*)malloc((M+1)*N*bf.voiNr * sizeof(double));
  mem=(double**)malloc(bf.voiNr * sizeof(double*));
  A=(double**)malloc(M * sizeof(double*));
  B=(double*)malloc(M*sizeof(double));
  residual=(double*)malloc(M*sizeof(double));
  qrweight=(double*)malloc(M*sizeof(double));
  wwschain=(double*)malloc((M*N+2*M)*sizeof(double));
  wws=(double**)malloc(M * sizeof(double*));
  if(chain==NULL || B==NULL || A==NULL || residual==NULL || qrweight==NULL || wwschain==NULL || wws==NULL)
  {
    fprintf(stderr, "Error: out of memory.\n"); fflush(stderr);
    imgEmpty(&img); dftEmpty(&tac); dftEmpty(&bf);
    imgEmpty(&flowimg); imgEmpty(&k2img); imgEmpty(&vaimg); imgEmpty(&vdimg);
    imgEmpty(&wssimg); imgEmpty(&errimg);
    return(8);
  }
  for(bi=0; bi<bf.voiNr; bi++) mem[bi]=chain+bi*(M+1)*N;
  for(m=0; m<M; m++) wws[m]=wwschain+m*N;
  ws=wwschain+M*N;

  /* Pre-compute QR weights for faster execution */
  for(m=0; m<M; m++) {
    if(img.weight[m]<=1.0e-20) qrweight[m]=0.0;
    else qrweight[m]=sqrt(img.weight[m]);
  }

  /* Make A matrix, and QR decomposition for it, for all pixels
     beforehand for faster execution */
  if(verbose>1) {fprintf(stdout, "calculating QR decomposition\n"); fflush(stdout);}
  for(bi=0; bi<bf.voiNr; bi++) {

    /* Define memory site for coefficient matrix and vector tau */
    for(m=0; m<M; m++) A[m]=mem[bi]+m*N;
    tau=mem[bi]+M*N;

    /* Initiate matrix  (A = mem[bi]) */
    for(m=0; m<M; m++) {
      A[m][0]=bf.voi[bi].y[m];     
      if(N>1) A[m][1]=tac.voi[0].y[m]; // blood TAC for Va estimation
    }

    /* Apply data weights */
    for(m=0; m<M; m++) for(n=0; n<N; n++) A[m][n]*=qrweight[m];

    /* Compute QR decomposition of the coefficient matrix */
    ret=qr_decomp(A, M, N, tau, wws, ws);

    if(ret>0) { /* Decomposition failed */
      free(chain); free(B); free(residual); 
      free(A); free(wwschain); free(wws); free(qrweight); free(mem);
      imgEmpty(&img); dftEmpty(&tac); dftEmpty(&bf);
      imgEmpty(&flowimg); imgEmpty(&k2img); imgEmpty(&vaimg); imgEmpty(&vdimg);
      imgEmpty(&wssimg); imgEmpty(&errimg);
      return (9);
    } 
  } /* next BF */


  /*
   *  Compute pixel-by-pixel
   */
  if(verbose>0) {fprintf(stdout, "computing QR pixel-by-pixel\n"); fflush(stdout);}
  thresholded_nr=0; nosolution_nr=0;
  ct=tac.voi[1].y; cti=tac.voi[1].y2;
  /* Allocate memory for BF counters on how often each BF is
     found to provide the optimal fit */
  bf_opt_nr=(int*)malloc(bfNr*sizeof(int));
  for(bi=0; bi<bf.voiNr; bi++) bf_opt_nr[bi]=0.0;
  fitStart=clock();
  for(pi=0; pi<img.dimz; pi++) {
    if(verbose>6) printf("computing plane %d\n", img.planeNumber[pi]);
    else if(img.dimz>1 && verbose>0) {fprintf(stdout, "."); fflush(stdout);}
    for(yi=0; yi<img.dimy; yi++) {
      if(verbose>6 && yi==4*img.dimy/10) printf("  computing row %d\n", yi+1);
      for(xi=0; xi<img.dimx; xi++) {
        if(verbose>6 && yi==4*img.dimy/10 && xi==4*img.dimx/10)
          printf("    computing column %d\n", xi+1);

        /* if end AUC is less than threshold value, then set values to 0 */
        /* Calculate pixel integral */
        for(m=0; m<M; m++) {ct[m]=img.m[pi][yi][xi][m];}     
        ret=petintegral(tac.x1, tac.x2, ct, tac.frameNr, cti, NULL);
//if(verbose>6 && yi==4*img.dimy/10 && xi==4*img.dimx/10)
//printf("last x2 = %g\n", tac.x2[tac.frameNr-1]);
        if(verbose>6 && yi==4*img.dimy/10 && xi==4*img.dimx/10) {
          printf("     Pixel (%d,%d,%d), int= %f, threshold= %g\n", pi,yi,xi,cti[M-1],threshold); 
          if(verbose>7) {
            for(m=0; m<M; m++)
              printf("     %02d:\t%g\t%g\n", m, img.m[pi][yi][xi][m], tac.voi[0].y[m]);     
          }
        }
        if(cti[M-1]<threshold) {
          flowimg.m[pi][yi][xi][0]=0.0;
          if(k2file[0]) k2img.m[pi][yi][xi][0]=0.0;
          if(vafile[0]) vaimg.m[pi][yi][xi][0]=0.0;
          if(vdfile[0]) vdimg.m[pi][yi][xi][0]=0.0;
          if(wssfile[0]) wssimg.m[pi][yi][xi][0]=0.0; 
          if(errfile[0]) errimg.m[pi][yi][xi][0]=0.0;
          thresholded_nr++;
          continue;        
        }
        
        /* Go through all basis functions */
        bi_min=-1; rnorm_min=1.0E80; p1=p2=p3=0.0;
        for(bi=0; bi<bf.voiNr; bi++) {

          /* Define memory site for present coefficient matrix and vector tau */
          for(m=0; m<M; m++) {A[m]=mem[bi]+ m*N;}
          tau=mem[bi]+M*N;

          /* Get data vector */
          for(m=0; m<M; m++) {
            B[m]=img.m[pi][yi][xi][m];
            /* Apply data weights */
            B[m]*=qrweight[m];
          }

          /* Compute solution */
          ret=qr_solve(A, M, N, tau, B, X, residual, &RNORM, wws, ws);
          if(ret>0) { /* no solution is possible */
            for(n=0; n<N; n++) X[n]=0.0; 
            RNORM=1.0E80;
          }
          /* Check if this was best fit for now */
          if(RNORM<rnorm_min) {
            rnorm_min=RNORM; bi_min=bi;
            /* K1 */ p1=X[0];
            /* Va */ if(N>1) p2=X[1]; else p2=0.0;
            /* k2 */ p3=bf.voi[bi_min].size;
          }
        } /* next basis function */
        if(rnorm_min>=1.0E60) nosolution_nr++;
        else bf_opt_nr[bi_min]+=1;

        if(verbose>6 && yi==4*img.dimy/10 && xi==4*img.dimx/10) {
          printf("      Pixel (%d,%d,%d), K1=%g Va=%g k2=%g\n", pi, yi, xi, p1, p2, p3);
          if(verbose>10) dftPrint(&tac);
        }

        /* Calculate WSS */
        for(m=0, wss=0.0; m<M; m++) {
          f=p1*bf.voi[bi_min].y[m];
          if(N>1) f+=p2*tac.voi[0].y[m];
          f-=img.m[pi][yi][xi][m];
          wss+=img.weight[m]*f*f;
        }
        /* Put results to output images */
        if(p1>flowMax) flowimg.m[pi][yi][xi][0]=flowMax;
        else if(p1<0.0) flowimg.m[pi][yi][xi][0]=0.0;
        else flowimg.m[pi][yi][xi][0]=p1;
        if(vafile[0]) vaimg.m[pi][yi][xi][0]=p2;
        if(k2file[0]) {
          if(p3>k2max) k2img.m[pi][yi][xi][0]=k2max;
          else k2img.m[pi][yi][xi][0]=p3;
        }
        if(vdfile[0]) {
          f=p1/p3; if(f>pWaterMax) f=pWaterMax;
          vdimg.m[pi][yi][xi][0]=f;
        }
        if(wssfile[0]) wssimg.m[pi][yi][xi][0]=wss;
        if(errfile[0]) {
          if(bi_min==0) ret=1; else if(bi_min==bf.voiNr-1) ret=2; else ret=0;
          errimg.m[pi][yi][xi][0]=(float)ret;
        }
      } /* next column */
    } /* next row */
  } /* next plane */
  fitFinish=clock();
  if(verbose>0) {fprintf(stdout, "done.\n"); fflush(stdout);}
  if(verbose>1 || thresholded_nr>0) {
    double f;
    f=(double)thresholded_nr/((double)(flowimg.dimx*flowimg.dimy*flowimg.dimz));
    f*=100.; if(f<3.0) printf("%g%%", f); else printf("%.0f%%", f);
    printf(" of pixels were not fitted due to threshold.\n");
    if(verbose>2) printf("thresholded %d pixels\n", thresholded_nr);
    fflush(stdout);
  }
  if(verbose>1 || nosolution_nr>0) {
    fprintf(stdout, "no QR solution for %d pixels.\n", nosolution_nr); fflush(stdout);}

  /* No need for dynamic image or input tac anymore */
  imgEmpty(&img); dftEmpty(&tac);
  /* Free memory of QR */
  free(chain);  free(B); free(residual); free(A); free(wwschain); 
  free(wws); free(qrweight); free(mem);

#endif


  /*
   *  Save basis functions if required;
   *  this is done not before, so that also the number of optimal fits
   *  achieved with each BF can be saved as the "size".
   */
  if(bfsfile[0]) {
    for(int bi=0; bi<bf.voiNr; bi++) sprintf(bf.voi[bi].place, "%d", bf_opt_nr[bi]);
    if(dftWrite(&bf, bfsfile)) {
      fprintf(stderr, "Error in writing %s: %s\n", bfsfile, dfterrmsg); fflush(stderr);
      imgEmpty(&flowimg); imgEmpty(&k2img); imgEmpty(&vaimg); imgEmpty(&vdimg);
      imgEmpty(&wssimg); imgEmpty(&errimg);
      dftEmpty(&bf); free(bf_opt_nr); return(11);
    }
    if(verbose>0) {fprintf(stdout, "basis functions were written in %s\n", bfsfile); fflush(stdout);}
  }

  /* No need for basis functions anymore */
  dftEmpty(&bf); free(bf_opt_nr);


  /*
   *  Convert units to per dL and/or gram if necessary
   */
  if(per_dl || per_gram) {
    /* K1 */
    float f;
    if(per_dl && per_gram) {
      f=100.0/density; flowimg.unit=IMGUNIT_ML_PER_DL_PER_MIN;
    } else if(per_dl) {
      f=100.0; flowimg.unit=IMGUNIT_ML_PER_DL_PER_MIN;
    } else if(per_gram) {
      f=1.0/density; flowimg.unit=IMGUNIT_ML_PER_ML_PER_MIN;
    } else
      f=1.0;
    ret=imgArithmConst(&flowimg, f, '*', 1.0E+06, verbose-6);
  }
  if(per_dl && vafile[0]) {
    /* Va */
    vaimg.unit=IMGUNIT_ML_PER_DL;
    ret=imgArithmConst(&vaimg, 100.0, '*', 1.0E+06, verbose-6);
  }

  /*
   *  Save parametric image(s)
   */
  if(verbose>1) {printf("Saving parametric images\n"); fflush(stdout);}
  {
    ret=imgWrite(flowfile, &flowimg);
    if(ret) {fprintf(stderr, "Error: %s\n", flowimg.statmsg); fflush(stderr);}
    else if(verbose>0) {fprintf(stdout, "Flow image %s saved.\n", flowfile); fflush(stdout);}
  }
  if(ret==0 && vafile[0]) {
    ret=imgWrite(vafile, &vaimg);
    if(ret) {fprintf(stderr, "Error: %s\n", vaimg.statmsg); fflush(stderr);}
    else if(verbose>0) {fprintf(stdout, "Va image %s saved.\n", vafile); fflush(stdout);}
  }
  if(ret==0 && vdfile[0]) {
    ret=imgWrite(vdfile, &vdimg);
    if(ret) {fprintf(stderr, "Error: %s\n", vdimg.statmsg); fflush(stderr);}
    else if(verbose>0) {fprintf(stdout, "Vd image %s saved.\n", vdfile); fflush(stdout);}
  }
  if(ret==0 && k2file[0]) {
    ret=imgWrite(k2file, &k2img);
    if(ret) {fprintf(stderr, "Error: %s\n", k2img.statmsg); fflush(stderr);}
    else if(verbose>0) {fprintf(stdout, "k2 image %s saved.\n", k2file); fflush(stdout);}
  }
  if(ret==0 && wssfile[0]) {
    ret=imgWrite(wssfile, &wssimg);
    if(ret) {fprintf(stderr, "Error: %s\n", wssimg.statmsg); fflush(stderr);}
    else if(verbose>0) {fprintf(stdout, "WSS image %s saved.\n", wssfile); fflush(stdout);}
  }
  if(ret==0 && errfile[0]) {
    ret=imgWrite(errfile, &errimg);
    if(ret) {fprintf(stderr, "Error: %s\n", errimg.statmsg); fflush(stderr);}
    else if(verbose>0) {fprintf(stdout, "Error image %s saved.\n", errfile); fflush(stdout);}
  }

  imgEmpty(&flowimg); imgEmpty(&k2img); imgEmpty(&vaimg); imgEmpty(&vdimg);
  imgEmpty(&wssimg); imgEmpty(&errimg);

  /* How long did the fitting take */
  if(verbose>0) fprintf(stdout, "parameter estimation time := %.1f [s]\n",
    (double)(fitFinish-fitStart) / CLOCKS_PER_SEC );

  return(ret);
}
/*****************************************************************************/

/*****************************************************************************/
/// @endcond
