/** @file imgbfk3.c
    @brief Estimation of irreversible 2TCM parameters from dynamic PET images
     applying basis function approach.
    @remark Not well tested yet! 
    @copyright (c) Turku PET Centre
    @author Vesa Oikonen
 */
/// @cond
/*****************************************************************************/
#include "tpcclibConfig.h"
/*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <math.h>
#include <time.h>
/*****************************************************************************/
#include "libtpccurveio.h"
#include "libtpcmodext.h"
#include "libtpcmisc.h"
#include "libtpcmodel.h"
#include "libtpcimgio.h"
#include "libtpcimgp.h"
/*****************************************************************************/
#ifdef HAVE_OMP_H
#include <omp.h>
#endif
/*****************************************************************************/
#define MAX_N 3
/*****************************************************************************/

/*****************************************************************************/
static char *info[] = {
  "Computation of parametric images from dynamic PET image in ECAT, NIfTI,",
  "or Analyze format applying irreversible two-tissue compartmental model with",
  "arterial plasma input, using the basis function method (1).",
  " ",
  "Dynamic PET image and plasma and blood time-activity curves (PTAC and BTAC)",
  "must be corrected for decay to the tracer administration time.",
  "Enter 'none' in place of the name of btacfile, if you want to assume Vb=0.",
  " ",
  "Usage: @P [Options] ptacfile btacfile imgfile k3file",
  " ",
  "Options:",
  " -thr=<threshold%>",
  "     Pixels with AUC less than (threshold/100 x PTAC AUC) are set to zero;",
  "     default is 1%.",
  " -end=<Fit end time (min)>",
  "     Use data from 0 to end time; by default, model is fitted to all frames.",
  " -dv=<filename>",
  "     Parametric K1/(k2+k3) image is saved.",
  " -K1=<filename>",
  "     Parametric K1 image is saved.",
  " -k2=<filename>",
  "     Parametric k2 image is saved.",
  " -Ki=<filename>",
  "     Parametric Ki image is saved.",
  " -Vb=<filename>",
  "     Parametric Vb image is saved.",
  " -min=<Min k2+k3> and -max=<Max k2+k3>",
  "     Enter the basis functions minimum and maximum k2+k3 (=alpha) in units 1/min;",
  "     defaults are 0.15 and 0.60, respectively.",
//  " -theta1max=<Max theta1>",
//  "     Enter the maximum theta1=(1-Vb)*Ki; default is 1.0; not applied in QR method.",
//  " -theta2max=<Max theta2>",
//  "     Enter the maximum theta2=(1-Vb)*(K1-Ki); default is 1.0; not applied in QR method.",
//  " -Vbmax=<Max Vb>",
//  "     Enter the maximum Vb; default is 1.0; not applied in QR method.",
  " -nr=<value>",
  "     Set number of basis functions; default is 200, minimum 50.",
  " -k2k3=<filename>",
  "     Parametric k2+k3 (=alpha) image is saved.",
  " -t1=<filename>",
  "     Parametric theta1 image is saved.",
  " -t2=<filename>",
  "     Parametric theta2 image is saved.",
  " -bf=<filename>",
  "     Basis function curves are written in specified TAC file.",
  " -err=<filename>",
  "     Save image where the pixels that had k2+k3 at min or max value are",
  "     set to values 1 and 2, respectively, and other pixels are set to value 0.",
  " -w1, -wf, -wfa",
  "     By default, all weights are set to 1.0 (no weighting, option -w1); option -wf",
  "     sets weights based on frame lengths, and option -wfa based on both frame lengths",
  "     and mean activity during each frame.",
  " -stdoptions", // List standard options like --help, -v, etc
  " ",
  "Example 1. Calculation of K1, Ki, k3, and Vb images:",
  "  @P -k1=s2345k1.v -ki=s2345ki.v -Vb=s2345vb.v s2345ap.kbq s2345ab.kbq s2345dy.v s2345k3.v",
  "Example 2. Calculation with assumption Vb=0:",
  "  @P -k1=s2345k1.v -ki=s2345ki.v s2345ap.kbq none s2345dy.v s2345k3.v",
  " ",
  "The units of pixel values in the parametric images are 1/min for k3,",
  "ml/(min*ml) for K1 and Ki, and ml/ml for DV and Vb.",
  " ",
  "References:",
  "1. Hong YT et al. J Cereb Blood Flow Metab. 2011;31:648-657.",
  " ",
  "See also: imglhk3, imgki, imgcbv, imgunit, fitdelay",
  " ",
  "Keywords: image, modelling, irreversible uptake, Ki, basis function method",
  0};
/*****************************************************************************/

/*****************************************************************************/
/* Turn on the globbing of the command line, since it is disabled by default in
   mingw-w64 (_dowildcard=0); in MinGW32 define _CRT_glob instead, if necessary;
   In Unix&Linux wildcard command line processing is enabled by default. */
/*
#undef _CRT_glob
#define _CRT_glob -1
*/
int _dowildcard = -1;
/*****************************************************************************/

/*****************************************************************************/
enum {METHOD_UNKNOWN, METHOD_QR, METHOD_BVLS};
static char *method_str[] = {"unknown", "QR", "BVLS", 0};
/*****************************************************************************/

/*****************************************************************************/
/**
 *  main()
 */
int main(int argc, char **argv)
{
  int      ai, help=0, version=0, verbose=1;
  char     ptacfile[FILENAME_MAX], btacfile[FILENAME_MAX], petfile[FILENAME_MAX];
  char     k1file[FILENAME_MAX], k2file[FILENAME_MAX], vbfile[FILENAME_MAX];
  char     dvfile[FILENAME_MAX], kifile[FILENAME_MAX], k3file[FILENAME_MAX];
  char     errfile[FILENAME_MAX], bfsfile[FILENAME_MAX], k2k3file[FILENAME_MAX];
  char     t1file[FILENAME_MAX], t2file[FILENAME_MAX];
  float    calcThreshold=0.01;
  double   fittime=-1.0;
  int      weights=2; // 1=frame lengths and activity, 2=frame lengths, 2=no weighting
  int      bfNr=200;
  double   alphamin=0.15, alphamax=0.60;
  double   theta1max=1.0, theta2max=1.0, Vbmax=1.0;
  int      fitVb=1; // 0=not fitted, 1=fitted
  int      method=METHOD_QR;
  int      ret;


  /*
   *  Get arguments
   */
  if(argc==1) {tpcPrintUsage(argv[0], info, stderr); return(1);}
  ptacfile[0]=btacfile[0]=petfile[0]=k3file[0]=(char)0;
  vbfile[0]=k1file[0]=k2file[0]=kifile[0]=dvfile[0]=(char)0;
  errfile[0]=bfsfile[0]=k2k3file[0]=t1file[0]=t2file[0]=(char)0;
  /* Get options */
  for(ai=1; ai<argc; ai++) if(*argv[ai]=='-') {
    if(tpcProcessStdOptions(argv[ai], &help, &version, &verbose)==0) continue;
    char *cptr=argv[ai]+1; if(*cptr=='-') cptr++; if(cptr==NULL) continue;
    if(strncasecmp(cptr, "K1=", 3)==0) {
      strlcpy(k1file, cptr+3, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "K2=", 3)==0) {
      strlcpy(k2file, cptr+3, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "KI=", 3)==0) {
      strlcpy(kifile, cptr+3, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "VB=", 3)==0) {
      strlcpy(vbfile, cptr+3, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "DV=", 3)==0) {
      strlcpy(dvfile, cptr+3, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "K2K3=", 5)==0) {
      strlcpy(k2k3file, cptr+5, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "T1=", 3)==0) {
      strlcpy(t1file, cptr+3, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "T2=", 3)==0) {
      strlcpy(t2file, cptr+3, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "THR=", 4)==0) {
      double v; ret=atof_with_check(cptr+4, &v);
      if(!ret && v>=0.0 && v<=200.0) {calcThreshold=(float)(0.01*v); continue;}
    } else if(strncasecmp(cptr, "END=", 4)==0) {
      ret=atof_with_check(cptr+4, &fittime); if(!ret && fittime>0.0) continue;
    } else if(strcasecmp(cptr, "WFA")==0) {
      weights=0; continue;
    } else if(strcasecmp(cptr, "WF")==0) {
      weights=1; continue;
    } else if(strcasecmp(cptr, "W1")==0) {
      weights=2; continue;
    } else if(strncasecmp(cptr, "min=", 4)==0) {
      if(atof_with_check(cptr+4, &alphamin)==0 && alphamin>=0.0) continue;
    } else if(strncasecmp(cptr, "max=", 4)==0) {
      if(atof_with_check(cptr+4, &alphamax)==0 && alphamax>=0.0) continue;
    } else if(strncasecmp(cptr, "theta1max=", 10)==0) {
      if(atof_with_check(cptr+10, &theta1max)==0 && theta1max>=0.0) continue;
    } else if(strncasecmp(cptr, "theta2max=", 10)==0) {
      if(atof_with_check(cptr+10, &theta2max)==0 && theta2max>=0.0) continue;
    } else if(strncasecmp(cptr, "Vbmax=", 6)==0) {
      if(atof_with_check(cptr+6, &Vbmax)==0 && Vbmax>=0.0 && Vbmax<=1.0) continue;
    } else if(strncasecmp(cptr, "NR=", 3)==0) {
      bfNr=atoi(cptr+3); if(bfNr>5E+04) bfNr=5E+04;
      if(bfNr>=50) continue;
    } else if(strncasecmp(cptr, "BF=", 3)==0) {
      strlcpy(bfsfile, cptr+3, FILENAME_MAX); if(strlen(bfsfile)>0) continue;
    } else if(strncasecmp(cptr, "ERR=", 4)==0) {
      strlcpy(errfile, cptr+4, FILENAME_MAX); if(strlen(errfile)>0) continue;
    } else if(strcasecmp(cptr, "QR")==0) {
      method=METHOD_QR; continue;
    } else if(strcasecmp(cptr, "BVLS")==0) {
      method=METHOD_BVLS; continue;
    }
    fprintf(stderr, "Error: invalid option '%s'.\n", argv[ai]);
    return(1);
  } else break;

  /* Print help or version? */
  if(help==2) {tpcHtmlUsage(argv[0], info, ""); return(0);}
  if(help) {tpcPrintUsage(argv[0], info, stdout); return(0);}
  if(version) {tpcPrintBuild(argv[0], stdout); return(0);}
  
  /* Process other arguments, starting from the first non-option */
  if(ai<argc) strlcpy(ptacfile, argv[ai++], FILENAME_MAX);
  if(ai<argc) strlcpy(btacfile, argv[ai++], FILENAME_MAX);
  if(ai<argc) strlcpy(petfile, argv[ai++], FILENAME_MAX);
  if(ai<argc) strlcpy(k3file, argv[ai++], FILENAME_MAX);
  if(ai<argc) {
    fprintf(stderr, "Error: invalid argument '%s'.\n", argv[ai]);
    return(1);
  }
  /* Did we get all the information that we need? */
  if(!k3file[0]) {
    fprintf(stderr, "Error: missing command-line argument; use option --help\n");
    return(1);
  }
  if(strcasecmp(btacfile, "NONE")==0 || strcasecmp(btacfile, "'NONE'")==0 || Vbmax<=0.0) {
    fitVb=0; Vbmax=0.0; btacfile[0]=(char)0;
    if(vbfile[0]) {
      fprintf(stderr, "Error: Vb cannot be calculated without BTAC file.\n");
      return(1);
    }
  }

  /* In verbose mode print arguments and options */
  if(verbose>1) {
    printf("ptacfile := %s\n", ptacfile);
    if(btacfile[0]) printf("btacfile := %s\n", btacfile);
    printf("petfile := %s\n", petfile);
    printf("k3file := %s\n", k3file);
    if(vbfile[0]) printf("vbfile := %s\n", vbfile);
    if(k1file[0]) printf("k1file := %s\n", k1file);
    if(k2file[0]) printf("k2file := %s\n", k2file);
    if(k2k3file[0]) printf("k2k3file := %s\n", k2k3file);
    if(kifile[0]) printf("kifile := %s\n", kifile);
    if(dvfile[0]) printf("dvfile := %s\n", dvfile);
    if(errfile[0]) printf("errfile := %s\n", errfile);
    if(t1file[0]) printf("t1file := %s\n", t1file);
    if(t2file[0]) printf("t2file := %s\n", t2file);
    if(bfsfile[0]) printf("bfsfile := %s\n", bfsfile);
    printf("fitVb := %d\n", fitVb);
    printf("method := %s\n", method_str[method]);
    printf("calcThreshold :=%g\n", calcThreshold);
    printf("weights := %d\n", weights);
    if(fittime>0.0) printf("required_fittime := %g min\n", fittime);
    printf("bfNr := %d\n", bfNr);
    if(alphamin>0.0) printf("alpha_min := %g\n", alphamin);
    if(alphamax>0.0) printf("alpha_max := %g\n", alphamax);
    printf("theta1_max := %g\n", theta1max);
    printf("theta2_max := %g\n", theta2max);
    printf("Vb_max := %g\n", Vbmax);
  }
  if(verbose>8) IMG_TEST=SIF_TEST=verbose-8; else IMG_TEST=SIF_TEST=0;
  if(verbose>20) ECAT63_TEST=ECAT7_TEST=verbose-20; else ECAT63_TEST=ECAT7_TEST=0;

  /* Check user-defined alpha range */
  if(alphamin>=alphamax) {
    fprintf(stderr, "Error: invalid range for k2+k3 (%g - %g).\n", alphamin, alphamax);
    return(1);
  }


  /*
   *  Read PET image and input TACs
   */
  if(verbose>0) {printf("reading data files\n"); fflush(stdout);}
  DFT tac; dftInit(&tac); 
  DFT inp; dftInit(&inp);
  IMG img; imgInit(&img);
  int dataNr=0;
  char errmsg[512];
  ret=imgReadModelingData(
    petfile, NULL, ptacfile, btacfile, NULL, &fittime, &dataNr, &img,
    &inp, &tac, 1, stdout, verbose-2, errmsg);
  if(ret!=0) {
    fprintf(stderr, "Error: %s.\n", errmsg);
    if(verbose>1) printf("  ret := %d\n", ret);
    fflush(stderr); fflush(stdout);
    return(2);
  }
  int origDataNr=tac.frameNr;
  /* Set time unit to min, also for integrals in y2[] */
  if(tac.timeunit==TUNIT_SEC) {
    for(int fi=0; fi<tac.frameNr; fi++) tac.voi[0].y2[fi]/=60.0;
    for(int fi=0; fi<tac.frameNr; fi++) tac.voi[0].y3[fi]/=3600.0;
  }
  ret=dftTimeunitConversion(&tac, TUNIT_MIN);
  ret=dftTimeunitConversion(&inp, TUNIT_MIN);
  if(verbose>1) {
    printf("fittimeFinal := %g min\n", fittime);
    printf("dataNr := %d\n", dataNr);
  }
  /* Check that image is dynamic */
  if(dataNr<4) {
    fprintf(stderr, "Error: too few time frames for fitting.\n");
    if(verbose>1) imgInfo(&img);
    imgEmpty(&img); dftEmpty(&tac); dftEmpty(&inp); return(2);
  }

  /* Add place for tissue TACs too */
  if(verbose>1) fprintf(stdout, "allocating working memory for pixel TACs\n");
  ret=dftAddmem(&tac, 2);
  if(ret) {
    fprintf(stderr, "Error: cannot allocate memory.\n");
    if(verbose>0) printf("ret := %d\n", ret);
    imgEmpty(&img); dftEmpty(&tac); dftEmpty(&inp); return(2);
  }
  strcpy(tac.voi[0].name, "plasma");
  strcpy(tac.voi[1].name, "blood"); // can be empty
  strcpy(tac.voi[2].name, "tissue");
  tac.voiNr=3;


  /* Determine the threshold */
  double threshold=calcThreshold*tac.voi[0].y2[dataNr-1];
  if(verbose>1) printf("threshold_AUC := %g\n", threshold);

  /* Set weights as requested */
  if(imgSetWeights(&img, weights, verbose-5)!=0) {
    fprintf(stderr, "Error: cannot calculate weights.\n");
    imgEmpty(&img); dftEmpty(&tac); dftEmpty(&inp); return(3);
  }
  for(int i=0; i<dataNr; i++) tac.w[i]=img.weight[i];


  /*
   *  Calculate the basis functions
   */
  if(verbose>1) fprintf(stdout, "calculating basis functions\n");
  DFT bf; dftInit(&bf);
  tac.frameNr=dataNr; // origDataNr contains the full frame number
  ret=bfIrr2TCM(&inp, &tac, &bf, bfNr, alphamin, alphamax, errmsg, verbose-2);
  tac.frameNr=origDataNr;
  if(ret) {
    fprintf(stderr, "Error: cannot calculate basis functions (%d).\n", ret);
    imgEmpty(&img); dftEmpty(&inp); dftEmpty(&tac); return(4);
  }
  /* Original sampling blood data not needed any more */
  dftEmpty(&inp);
  /* Note that basis functions are to be saved later (in bfsfile), after it is known 
     in how many image pixels each basis function was found to give the best fit.
  */
  /* Allocate memory for BF counters on how often each BF is
     found to provide the optimal fit */
  int *bf_opt_nr=(int*)malloc(bfNr*sizeof(int));
  for(int bi=0; bi<bf.voiNr; bi++) bf_opt_nr[bi]=0.0;




  /*
   *  Allocate result images (allocate all, even if user did not want to save those)
   */
  if(verbose>1) fprintf(stdout, "allocating memory for parametric image data\n");
  IMG k3img; imgInit(&k3img);
  IMG k1img; imgInit(&k1img);
  IMG k2img; imgInit(&k2img);
  IMG kiimg; imgInit(&kiimg);
  IMG dvimg; imgInit(&dvimg);
  IMG vbimg; imgInit(&vbimg);
  IMG erimg; imgInit(&erimg);
  IMG k2k3img; imgInit(&k2k3img);
  IMG t1img; imgInit(&t1img);
  IMG t2img; imgInit(&t2img);
  ret=imgAllocateWithHeader(         &k3img, img.dimz, img.dimy, img.dimx, 1, &img);
  if(!ret) ret=imgAllocateWithHeader(&k1img, img.dimz, img.dimy, img.dimx, 1, &img);
  if(!ret) ret=imgAllocateWithHeader(&k2img, img.dimz, img.dimy, img.dimx, 1, &img);
  if(!ret) ret=imgAllocateWithHeader(&kiimg, img.dimz, img.dimy, img.dimx, 1, &img);
  if(!ret) ret=imgAllocateWithHeader(&dvimg, img.dimz, img.dimy, img.dimx, 1, &img);
  if(!ret) ret=imgAllocateWithHeader(&vbimg, img.dimz, img.dimy, img.dimx, 1, &img);
  if(!ret) ret=imgAllocateWithHeader(&erimg, img.dimz, img.dimy, img.dimx, 1, &img);
  if(!ret) ret=imgAllocateWithHeader(&k2k3img, img.dimz, img.dimy, img.dimx, 1, &img);
  if(!ret) ret=imgAllocateWithHeader(&t1img, img.dimz, img.dimy, img.dimx, 1, &img);
  if(!ret) ret=imgAllocateWithHeader(&t2img, img.dimz, img.dimy, img.dimx, 1, &img);
  if(ret) {
    fprintf(stderr, "Error: cannot allocate memory for result image.\n");
    imgEmpty(&img); dftEmpty(&tac); dftEmpty(&bf); free(bf_opt_nr);
    imgEmpty(&k3img); imgEmpty(&kiimg); imgEmpty(&k1img); imgEmpty(&k2img);
    imgEmpty(&dvimg); imgEmpty(&vbimg); imgEmpty(&erimg); imgEmpty(&k2k3img);
    imgEmpty(&t1img); imgEmpty(&t2img);
    return(5);
  }
  /* set 'frame time' for parametric images */
  k3img.start[0]=k1img.start[0]=k2img.start[0]=kiimg.start[0]=vbimg.start[0]=
  dvimg.start[0]=erimg.start[0]=k2k3img.start[0]=t1img.start[0]=t2img.start[0]=0.0;
  k3img.end[0]=k1img.end[0]=k2img.end[0]=kiimg.end[0]=vbimg.end[0]=
  dvimg.end[0]=erimg.end[0]=k2k3img.end[0]=t1img.end[0]=t2img.end[0]=60.*fittime;
  /* set units in parametric images */
  k3img.unit=k2img.unit=k2k3img.unit=CUNIT_PER_MIN;
  dvimg.unit=vbimg.unit=CUNIT_ML_PER_ML;
  k1img.unit=kiimg.unit=t1img.unit=t2img.unit=CUNIT_ML_PER_ML_PER_MIN;
  erimg.unit=CUNIT_UNITLESS;
  /* and set the more or less necessary things */
  k3img.decayCorrection=k1img.decayCorrection=k2img.decayCorrection=kiimg.decayCorrection=
    vbimg.decayCorrection=dvimg.decayCorrection=erimg.decayCorrection=
    k2k3img.decayCorrection=t1img.decayCorrection=t2img.decayCorrection=IMG_DC_NONCORRECTED;
  k3img.isWeight=k1img.isWeight=k2img.isWeight=kiimg.isWeight=vbimg.isWeight=
    dvimg.isWeight=erimg.isWeight=k2k3img.isWeight=t1img.isWeight=t2img.isWeight=0;


  /* Fitting */

  int thresholded_nr=0; 
  int nosolution_nr=0;

  if(method==METHOD_QR) {

    /*
     *  Allocate memory for QR
     */
    int M, N;
    M=dataNr; N=3; if(fitVb==0) N--;
    double **mem, **A, *B, X[N], *tau, *residual, RNORM, *chain;
    double *qrweight, **wws, *ws, *wwschain;
    if(verbose>1) fprintf(stdout, "allocating memory for QR\n");
    chain=(double*)malloc((M+1)*N*bf.voiNr * sizeof(double));
    mem=(double**)malloc(bf.voiNr * sizeof(double*));
    A=(double**)malloc(M * sizeof(double*));
    B=(double*)malloc(M*sizeof(double));
    residual=(double*)malloc(M*sizeof(double));
    qrweight=(double*)malloc(M*sizeof(double));
    wwschain=(double*)malloc((M*N+2*M)*sizeof(double));
    wws=(double**)malloc(M * sizeof(double*));
    if(chain==NULL || B==NULL || A==NULL || residual==NULL || qrweight==NULL || 
       wwschain==NULL || wws==NULL)
    {
      fprintf(stderr, "Error: out of memory.\n");
      imgEmpty(&img); dftEmpty(&tac); dftEmpty(&bf); free(bf_opt_nr);
      imgEmpty(&k3img); imgEmpty(&kiimg); imgEmpty(&k1img); imgEmpty(&k2img);
      imgEmpty(&dvimg); imgEmpty(&vbimg); imgEmpty(&erimg); imgEmpty(&k2k3img);
      imgEmpty(&t1img); imgEmpty(&t2img);
      return(5);
    }
    for(int bi=0; bi<bf.voiNr; bi++) mem[bi]=chain+bi*(M+1)*N;
    for(int m=0; m<M; m++) wws[m]=wwschain+m*N;
    ws=wwschain+M*N;

    /* Pre-compute QR weights for faster execution */
    for(int m=0; m<M; m++) {
      if(img.weight[m]<=1.0e-20) qrweight[m]=0.0;
      else qrweight[m]=sqrt(img.weight[m]);
    }


    /* Make A matrix, and QR decomposition for it, for all pixels
       beforehand for faster execution */
    if(verbose>1) fprintf(stdout, "calculating QR decomposition\n");
    for(int bi=0; bi<bf.voiNr; bi++) {

      /* Define memory site for coefficient matrix and vector tau */
      for(int m=0; m<M; m++) A[m]=mem[bi]+m*N;
      tau=mem[bi]+M*N;

      /* Initiate matrix  (A = mem[bi]) */
      for(int m=0; m<M; m++) {
        A[m][0]=tac.voi[0].y2[m]; // plasma integral
        A[m][1]=bf.voi[bi].y[m]; // basis function     
        if(N>2) A[m][2]=tac.voi[1].y[m]; // blood TAC for Vb estimation
      }

      /* Apply data weights */
      for(int m=0; m<M; m++)
        for(int n=0; n<N; n++)
          A[m][n]*=qrweight[m];

      /* Compute QR decomposition of the coefficient matrix */
      ret=qr_decomp(A, M, N, tau, wws, ws);

      if(ret>0) { /* Decomposition failed */
        free(chain); free(B); free(residual); 
        free(A); free(wwschain); free(wws); free(qrweight); free(mem);
        imgEmpty(&img); dftEmpty(&tac); dftEmpty(&bf); free(bf_opt_nr);
        imgEmpty(&k3img); imgEmpty(&kiimg); imgEmpty(&k1img); imgEmpty(&k2img); 
        imgEmpty(&dvimg); imgEmpty(&vbimg); imgEmpty(&erimg); imgEmpty(&k2k3img);
        imgEmpty(&t1img); imgEmpty(&t2img);
        return (6);
      } 
    } /* next BF */


    /*
     *  Compute pixel-by-pixel
     */
    if(verbose>0) {fprintf(stdout, "computing QR pixel-by-pixel\n"); fflush(stdout);}
    double maxk3=0.0, maxk1=0.0;
    double maxt1=0.0, maxt2=0.0;
    double *ct, *cti;
    ct=tac.voi[2].y; cti=tac.voi[2].y2;
    for(int pi=0; pi<img.dimz; pi++) {
      if(img.dimz>1 && verbose>0) {fprintf(stdout, "."); fflush(stdout);}
      for(int yi=0; yi<img.dimy; yi++) {
        for(int xi=0; xi<img.dimx; xi++) {
          /* Set pixel results to zero */
          k3img.m[pi][yi][xi][0]=0.0; 
          k1img.m[pi][yi][xi][0]=0.0; 
          k2img.m[pi][yi][xi][0]=0.0; 
          kiimg.m[pi][yi][xi][0]=0.0; 
          dvimg.m[pi][yi][xi][0]=0.0; 
          vbimg.m[pi][yi][xi][0]=0.0; 
          erimg.m[pi][yi][xi][0]=0.0; 
          k2k3img.m[pi][yi][xi][0]=0.0; 
          t1img.m[pi][yi][xi][0]=0.0; 
          t2img.m[pi][yi][xi][0]=0.0; 
          /* Copy pixel TAC and calculate pixel integral */
          for(int m=0; m<M; m++) {ct[m]=img.m[pi][yi][xi][m];}
          ret=petintegral(tac.x1, tac.x2, ct, tac.frameNr, cti, NULL);
          if(ret) continue;
          /* if AUC at the end is less than threshold value, then do nothing more */
          if(cti[dataNr-1]<threshold) {thresholded_nr++; continue;}
        
          /* Go through all basis functions */
          int bi_min=-1; 
          double rnorm_min=1.0E80; 
          double p1, p2, p3, p4; p1=p2=p3=p4=0.0;
          for(int bi=0; bi<bf.voiNr; bi++) {

            /* Define memory site for present coefficient matrix and vector tau */
            for(int m=0; m<M; m++) {A[m]=mem[bi]+ m*N;}
            tau=mem[bi]+M*N;

            /* Get data vector */
            for(int m=0; m<M; m++) {
              B[m]=img.m[pi][yi][xi][m];
              /* Apply data weights */
              B[m]*=qrweight[m];
            }

            /* Compute solution */
            ret=qr_solve(A, M, N, tau, B, X, residual, &RNORM, wws, ws);
            if(ret!=0) { /* no solution is possible */
              for(int n=0; n<N; n++) X[n]=0.0; 
              RNORM=1.0E80;
            }

            /* Check if this was best fit for now; if yes, then save the parameters */
            if(RNORM<rnorm_min) {
              rnorm_min=RNORM; bi_min=bi;
              p1=X[0];
              p2=X[1];
              if(N>2) p3=X[2]; else p3=0.0; // Vb
              p4=bf.voi[bi_min].size;
            }
          } /* next basis function */

          if(verbose>6 && yi==4*img.dimy/10 && xi==4*img.dimx/10) {
            printf("      Pixel (%d,%d,%d), P1=%g P2=%g P3=%g theta=%g\n",
                   pi, yi, xi, p1, p2, p3, p4);
            if(verbose>10) dftPrint(&tac);
          }

          /* count the selected BFs */
          if(!(rnorm_min<1.0E60)) {nosolution_nr++; continue;}
          else bf_opt_nr[bi_min]+=1;

          /* Put results to output images */
          if(bi_min==0) ret=1; else if(bi_min==bf.voiNr-1) ret=2; else ret=0;
          erimg.m[pi][yi][xi][0]=(float)ret;
          t1img.m[pi][yi][xi][0]=p1; // theta1
          t2img.m[pi][yi][xi][0]=p2; // theta2
          k2k3img.m[pi][yi][xi][0]=p4; // alpha=k2+k3
          if(p1>maxt1) maxt1=p1;
          if(p2>maxt2) maxt2=p2;
          if(fitVb!=0) {
            vbimg.m[pi][yi][xi][0]=p3;
            if(p3>0.99) continue; // if very large Vb, then all others should be zero
          }
          //if((p1)<1.0E-10) continue; // if very small Ki, then all others are zero, too
          if((p1+p2)<1.0E-10) continue; // if very small K1, then all others are zero, too
          k1img.m[pi][yi][xi][0]=p1+p2; if(p3>0.0 && p3<0.9) k1img.m[pi][yi][xi][0]/=(1.0-p3);
          k2img.m[pi][yi][xi][0]=p2*p4/(p1+p2);
          k3img.m[pi][yi][xi][0]=p1*p4/(p1+p2);
          kiimg.m[pi][yi][xi][0]=p1; if(p3>0.0 && p3<0.9) kiimg.m[pi][yi][xi][0]/=(1.0-p3);
          if(p4>0.00001) dvimg.m[pi][yi][xi][0]=k1img.m[pi][yi][xi][0]/p4;
          if(k1img.m[pi][yi][xi][0]>maxk1) maxk1=k1img.m[pi][yi][xi][0];
          if(k3img.m[pi][yi][xi][0]>maxk3) maxk3=k3img.m[pi][yi][xi][0];
        } /* next column */
      } /* next row */
    } /* next plane */
    if(verbose>0) {fprintf(stdout, "\ndone.\n"); fflush(stdout);}
    if(verbose>1 || thresholded_nr>0) {
      double f;
      f=(double)thresholded_nr/((double)(k3img.dimx*k3img.dimy*k3img.dimz));
      f*=100.; if(f<3.0) printf("%g%%", f); else printf("%.0f%%", f);
      printf(" of pixels were not fitted due to threshold.\n");
      if(verbose>2) printf("thresholded %d pixels\n", thresholded_nr);
    }
    if(verbose>0 || nosolution_nr>0)
      fprintf(stdout, "no QR solution for %d pixels.\n", nosolution_nr);
    if(verbose>1) {
      printf("max_theta1 := %g\n", maxt1);
      printf("max_theta2 := %g\n", maxt2);
      printf("max_k1 := %g\n", maxk1);
      printf("max_k3 := %g\n", maxk3);
    }

    /* Free memory of QR */
    free(chain); free(B); free(residual); free(A); free(wwschain); 
    free(wws); free(qrweight); free(mem);


  } else if(method==METHOD_BVLS) {

    /*
     *  Compute pixel-by-pixel
     */
    if(verbose>0) {fprintf(stdout, "computing BF BVLS pixel-by-pixel\n"); fflush(stdout);}
    int m=dataNr;
    int n=3; if(fitVb==0) n--;
    int nm=n*m;
    double maxk3=0.0, maxk1=0.0;
    double maxt1=0.0, maxt2=0.0;

#pragma omp parallel for
    for(int pi=0; pi<img.dimz; pi++) {
      if(img.dimz>1 && verbose>0) {fprintf(stdout, "."); fflush(stdout);}
      for(int yi=0; yi<img.dimy; yi++) {
        for(int xi=0; xi<img.dimx; xi++) {
          /* Set pixel results to zero */
          k3img.m[pi][yi][xi][0]=0.0; 
          k1img.m[pi][yi][xi][0]=0.0; 
          k2img.m[pi][yi][xi][0]=0.0; 
          kiimg.m[pi][yi][xi][0]=0.0; 
          dvimg.m[pi][yi][xi][0]=0.0; 
          vbimg.m[pi][yi][xi][0]=0.0; 
          erimg.m[pi][yi][xi][0]=0.0; 
          k2k3img.m[pi][yi][xi][0]=0.0; 
          t1img.m[pi][yi][xi][0]=0.0; 
          t2img.m[pi][yi][xi][0]=0.0; 
          /* if AUC at the end is less than threshold value, then do nothing more */
          float pxlint[dataNr];
          if(fpetintegral(img.start, img.end, img.m[pi][yi][xi], dataNr, pxlint, NULL)!=0) continue;
          if(pxlint[dataNr-1]<60.*threshold) {thresholded_nr++; continue;}
          /* Allocate memory required by BVLS */
          double *mat=(double*)malloc(nm*sizeof(double));
          if(mat==NULL) continue;
          double b[m], x[n], bl[n], bu[n], w[n], zz[m];
          double act[m*(n+2)], r2;
          int istate[n+1], iterNr;
          /* Fit pixel with each basis function */
          int bi_min=-1; 
          double r2_min=1.0E80; 
          double p1, p2, p3, p4; p1=p2=p3=p4=0.0;
          for(int bi=0; bi<bf.voiNr; bi++) {
            /* Setup data matrix A and vector B */
            for(int mi=0; mi<m; mi++) b[mi]=img.m[pi][yi][xi][mi];
            for(int mi=0; mi<m; mi++) {
              mat[mi]=tac.voi[0].y2[mi]; // plasma integral
              mat[mi+m]=bf.voi[bi].y[mi]; // basis function     
              if(n>2) mat[mi+(2*m)]=tac.voi[1].y[mi]; // blood TAC for Vb estimation
            }
            /* Apply data weights */
            if(tac.isweight) llsqWght(n, m, NULL, mat, b, tac.w);
            /* Set istate vector to indicate that all parameters are non-bound */
            istate[n]=0; for(int ni=0; ni<n; ni++) istate[ni]=1+ni;
            /* Set parameter limits */
            bl[0]=0.0; bu[0]=theta1max;
            bl[1]=0.0; bu[1]=theta2max;
            if(fitVb!=0) {bl[2]=0.0; bu[2]=Vbmax;}
            /* Set max iterations */
            iterNr=3*n;
            /* Compute BVLS */
            ret=bvls(1, m, n, mat, b, bl, bu, x, w, act, zz, istate, &iterNr, verbose-30);
            if(ret!=0) continue; /* no solution is possible */
            r2=w[0];
            /* Check if this was best fit for now; if yes, then save the parameters */
            if(r2<r2_min) {
              r2_min=r2; bi_min=bi;
              p1=x[0];
              p2=x[1];
              if(n>2) p3=x[2]; else p3=0.0; // Vb
              p4=bf.voi[bi_min].size;
            }
          } /* next basis function */
          free(mat);

          /* count the selected BFs */
          if(!(r2_min<1.0E60)) {nosolution_nr++; continue;}
          else bf_opt_nr[bi_min]+=1;

          /* Put results to output images */
          if(bi_min==0) ret=1; else if(bi_min==bf.voiNr-1) ret=2; else ret=0;
          erimg.m[pi][yi][xi][0]=(float)ret;
          t1img.m[pi][yi][xi][0]=p1; // theta1
          t2img.m[pi][yi][xi][0]=p2; // theta2
          k2k3img.m[pi][yi][xi][0]=p4; // alpha=k2+k3
          if(p1>maxt1) maxt1=p1;
          if(p2>maxt2) maxt2=p2;
          if(fitVb!=0) {
            vbimg.m[pi][yi][xi][0]=p3;
            if(p3>0.99) continue; // if very large Vb, then all others should be zero
          }
          if((p1+p2)<1.0E-10) continue; // if very small K1, then all others are zero, too
          k1img.m[pi][yi][xi][0]=p1+p2; if(p3>0.0 && p3<0.9) k1img.m[pi][yi][xi][0]/=(1.0-p3);
          k2img.m[pi][yi][xi][0]=p2*p4/(p1+p2);
          k3img.m[pi][yi][xi][0]=p1*p4/(p1+p2);
          kiimg.m[pi][yi][xi][0]=p1; if(p3>0.0 && p3<0.9) kiimg.m[pi][yi][xi][0]/=(1.0-p3);
          if(p4>0.00001) dvimg.m[pi][yi][xi][0]=k1img.m[pi][yi][xi][0]/p4;
          if(k1img.m[pi][yi][xi][0]>maxk1) maxk1=k1img.m[pi][yi][xi][0];
          if(k3img.m[pi][yi][xi][0]>maxk3) maxk3=k3img.m[pi][yi][xi][0];

        } // next image column
      } // next image row
    } // next image plane
    if(verbose>0) {fprintf(stdout, "\ndone.\n"); fflush(stdout);}
    if(verbose>1 || thresholded_nr>0) {
      double f;
      f=(double)thresholded_nr/((double)(k3img.dimx*k3img.dimy*k3img.dimz));
      f*=100.; if(f<3.0) printf("%g%%", f); else printf("%.0f%%", f);
      printf(" of pixels were not fitted due to threshold.\n");
      if(verbose>2) printf("thresholded %d pixels\n", thresholded_nr);
    }
    if(verbose>0 || nosolution_nr>0)
      fprintf(stdout, "no solution for %d pixels.\n", nosolution_nr);
    if(verbose>1) {
      printf("max_theta1 := %g\n", maxt1);
      printf("max_theta2 := %g\n", maxt2);
      printf("max_k1 := %g\n", maxk1);
      printf("max_k3 := %g\n", maxk3);
    }

  } else {

    fprintf(stderr, "Error: selected method not available.");
    dftEmpty(&bf); free(bf_opt_nr);
    imgEmpty(&k3img); imgEmpty(&kiimg); imgEmpty(&k1img); imgEmpty(&k2img); 
    imgEmpty(&dvimg); imgEmpty(&vbimg); imgEmpty(&erimg); imgEmpty(&k2k3img);
    imgEmpty(&t1img); imgEmpty(&t2img);
    return(1);

  }



  /* No need for dynamic image or input tac any more */
  imgEmpty(&img); dftEmpty(&tac);

  /*
   *  Save basis functions if required;
   *  this is done not before, so that also the number of optimal fits
   *  achieved with each BF can be saved as the "size".
   */
  if(bfsfile[0]) {
    for(int bi=0; bi<bf.voiNr; bi++)
      sprintf(bf.voi[bi].place, "%d", bf_opt_nr[bi]);
    if(dftWrite(&bf, bfsfile)) {
      fprintf(stderr, "Error in writing %s: %s\n", bfsfile, dfterrmsg);
      dftEmpty(&bf); free(bf_opt_nr);
      imgEmpty(&k3img); imgEmpty(&kiimg); imgEmpty(&k1img); imgEmpty(&k2img); 
      imgEmpty(&dvimg); imgEmpty(&vbimg); imgEmpty(&erimg); imgEmpty(&k2k3img);
      imgEmpty(&t1img); imgEmpty(&t2img);
      return(11);
    }
    if(verbose>0) fprintf(stdout, "basis functions were written in %s\n", bfsfile);
  }

  /* No need for basis functions any more */
  dftEmpty(&bf); free(bf_opt_nr);


  /*
   *  Save parametric images
   */
  ret=imgWrite(k3file, &k3img);
  if(!ret && k1file[0]) ret=imgWrite(k1file, &k1img);
  if(!ret && k2file[0]) ret=imgWrite(k2file, &k2img);
  if(!ret && kifile[0]) ret=imgWrite(kifile, &kiimg);
  if(!ret && vbfile[0]) ret=imgWrite(vbfile, &vbimg);
  if(!ret && dvfile[0]) ret=imgWrite(dvfile, &dvimg);
  if(!ret && errfile[0]) ret=imgWrite(errfile, &erimg);
  if(!ret && k2k3file[0]) ret=imgWrite(k2k3file, &k2k3img);
  if(!ret && t1file[0]) ret=imgWrite(t1file, &t1img);
  if(!ret && t2file[0]) ret=imgWrite(t2file, &t2img);
  imgEmpty(&k3img); imgEmpty(&kiimg); imgEmpty(&k1img); imgEmpty(&k2img);
  imgEmpty(&dvimg); imgEmpty(&vbimg); imgEmpty(&erimg); imgEmpty(&k2k3img);
  imgEmpty(&t1img); imgEmpty(&t2img);
  if(ret) {
    fprintf(stderr, "Error: cannot write parametric image.\n");
    return(11);
  }
  if(verbose>0) fprintf(stdout, "Parametric image(s) saved.\n");


  return(0);
}
/*****************************************************************************/

/*****************************************************************************/
/// @endcond
