/** @file imgsrtm.c
    @brief Estimation of BPnd from dynamic PET image applying Lawson-Hanson
     non-negative least squares (NNLS) method to solve general linear least 
     squares functions of simplified reference tissue model (SRTM).
    @copyright (c) Turku PET Centre
    @author Vesa Oikonen
 */
/// @cond
/*****************************************************************************/
#include "tpcclibConfig.h"
/*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <math.h>
#include <time.h>
/*****************************************************************************/
#include "libtpccurveio.h"
#include "libtpcmodext.h"
#include "libtpcmisc.h"
#include "libtpcmodel.h"
#include "libtpcimgio.h"
#include "libtpcimgp.h"
/*****************************************************************************/
#define NNLS_N 3
enum {MODEL_UNKNOWN, MODEL_SRTM, MODEL_SRTM2};
/*****************************************************************************/

/*****************************************************************************/
static char *info[] = {
  "Computation of parametric image of binding potential (BPnd) from",
  "dynamic PET image in ECAT, NIfTI, or Analyze format applying simplified",
  "reference tissue model (SRTM) [1]:",
  "    ______   K1' ___________            ",
  "   |      | --> |           |           dCt(t)=               ",
  "   |      | <-- |   Cr      |             +R1*dCr(t)          ",
  "   |      |  k2'|___________|             +k2*Cr(t)           ",
  "   |      |      ____________________     -(k2/(1+BPnd))*Ct(t)",
  "   |  Cp  |  K1 |        k3 |        |  Ct(t)=Cf(t)+Cb(t)     ",
  "   |      | --> |   Cf  -------> Cb  |  ",
  "   |      | <-- |       <-------     |  ",
  "   |      |  k2 |           | k4     |  R1=K1/K1'=k2/k2'      ",
  "   |______|     |___________|________|  BPnd=k3/k4            ",
  " ",
  "The model is transformed to general linear least squares functions [2],",
  "which are solved using Lawson-Hanson non-negative least squares (NNLS)",
  "algorithm [3]. BPnd is estimated directly without division [4].",
  " ",
  "Dynamic PET image and reference region TAC must be corrected for decay.",
  " ",
  "Usage: @P [Options] imgfile rtacfile bpfile",
  " ",
  "Options:",
  " -SRTM2",
  "     STRM2 method (5) is applied; in brief, traditional SRTM method is",
  "     used first to calculate median k2' from all pixels where BPnd>0; then",
  "     SRTM is run another time with fixed k2'",
  " -R1=<filename>",
  "     Programs computes also an R1 image.",
  " -k2=<filename>",
  "     Programs computes also a k2 image.",
  " -k2s=<filename>",
  "     Program computes also a k2' image",
  " -theta3=<filename> or -t3=<filename>",
  "     Program computes also a theta3 image; theta3 = k2/(1+BPnd)+lambda",
  " -rp=<filename>",
  "     Program writes regression parameters in the specified image file",
  " -dual=<filename> or -du=<filename>",
  "     Program writes number of i in set p in NNLS dual solution vector in",
  "     the specified image file",
  " -thr=<threshold%>",
  "     Pixels with AUC less than (threshold/100 x ref AUC) are set to zero",
  "     default is 0%",
  " -DVR",
  "     Instead of BP, program saves the DVR (=BP+1) values.",
  " -end=<Fit end time (min)>",
  "     Use data from 0 to end time; by default, model is fitted to all frames.",
  " -stdoptions", // List standard options like --help, -v, etc
  " ",
  "Example:",
  "  @P ua2918dy1.v ua2918cer.dft ua2918bp.v",
  " ",
  "References:",
  "1. Lammertsma AA, Hume SP. Simplified reference tissue model for PET",
  "   receptor studies. NeuroImage 1996;4:153-158.",
  "2. Blomqvist G. On the construction of functional maps in positron emission",
  "   tomography. J Cereb Blood Flow Metab 1984;4:629-632.",
  "3. Lawson CL & Hanson RJ. Solving least squares problems.",
  "   Prentice-Hall, 1974.",
  "4. Zhou Y, Brasic J, Endres CJ, Kuwabara H, Kimes A, Contoreggi C, Maini A,",
  "   Ernst M, Wong DF. Binding potential image based statistical mapping for",
  "   detection of dopamine release by [11C]raclopride dynamic PET.",
  "   NeuroImage 2002;16:S91.",
  "5. Wu Y, Carson RE. Noise reduction in the simplified reference tissue",
  "   model for neuroreceptor functional imaging. J Cereb Blood Flow Metab.",
  "   2002;22:1440-1452.",
  " ",
  "See also: imgdv, imgbfbp, imgratio, imgunit, eframe, imgdecay, img2dft",
  " ",
  "Keywords: image, modelling, binding potential, SRTM, SRTM2, reference input",
  0};
/*****************************************************************************/

/*****************************************************************************/
/* Turn on the globbing of the command line, since it is disabled by default in
   mingw-w64 (_dowildcard=0); in MinGW32 define _CRT_glob instead, if necessary;
   In Unix&Linux wildcard command line processing is enabled by default. */
/*
#undef _CRT_glob
#define _CRT_glob -1
*/
int _dowildcard = -1;
/*****************************************************************************/

/*****************************************************************************/
/**
 *  main()
 */
int main(int argc, char **argv)
{
  int      ai, help=0, version=0, verbose=1;
  int      pi, yi, xi, fi, ret;
  int      model=MODEL_SRTM, bp_plus_one=0, weight=0;
  int      fitdimt;
  char     inpfile[FILENAME_MAX], petfile[FILENAME_MAX], bpfile[FILENAME_MAX];
  char     r1file[FILENAME_MAX], k2file[FILENAME_MAX], k2sfile[FILENAME_MAX];
  char     t3file[FILENAME_MAX], regfile[FILENAME_MAX], dualfile[FILENAME_MAX];
  char    *cptr, tmp[512];
  float    threshold, calcThreshold=0.0;
  double   fittime=-1.0, f;
  DFT      tac;
  IMG      pet, bpout, r1out, k2out, k2sout, t3out, tout, dualout;
  clock_t  fitStart, fitFinish;
  double   lambda=0;
  double  *ct, *cti, *cr, *cri; /* Pointers to tissue and reference TACs */
  /* nnls */
  int      nnls_n, nnls_m, n, m, nnls_index[NNLS_N];
  double  *nnls_a[NNLS_N], *nnls_b, *nnls_zz, nnls_x[NNLS_N], *nnls_mat,
           nnls_wp[NNLS_N], *dptr, nnls_rnorm;
  
  
  
  /*
   *  Get arguments
   */
  if(argc==1) {tpcPrintUsage(argv[0], info, stderr); return(1);}
  inpfile[0]=petfile[0]=bpfile[0]=r1file[0]=k2file[0]=k2sfile[0]=(char)0;
  t3file[0]=regfile[0]=dualfile[0]=(char)0;
  /* Get options */
  for(ai=1; ai<argc; ai++) if(*argv[ai]=='-') {
    if(tpcProcessStdOptions(argv[ai], &help, &version, &verbose)==0) continue;
    cptr=argv[ai]+1; if(*cptr=='-') cptr++; if(cptr==NULL) continue;
    if(strcasecmp(cptr, "DVR")==0 || strcasecmp(cptr, "BP+1")==0) {
      bp_plus_one=1; continue;
    } else if(*cptr=='w' || *cptr=='W') {
      weight=1; continue;
    } else if(strncasecmp(cptr, "R1=", 3)==0) {
      strlcpy(r1file, cptr+3, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "k2=", 3)==0) {
      strlcpy(k2file, cptr+3, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "k2s=", 4)==0) {
      strlcpy(k2sfile, cptr+4, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "theta3=", 7)==0) {
      strlcpy(t3file, cptr+7, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "t3=", 3)==0) {
      strlcpy(t3file, cptr+3, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "dual=", 5)==0) {
      strlcpy(dualfile, cptr+5, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "du=", 3)==0) {
      strlcpy(dualfile, cptr+3, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "RP=", 3)==0) {
      strlcpy(regfile, cptr+3, FILENAME_MAX); continue;
    } else if(strncasecmp(cptr, "THR=", 4)==0) {
      double v; ret=atof_with_check(cptr+4, &v);
      if(!ret && v>=0.0 && v<=200.0) {calcThreshold=(float)(0.01*v); continue;}
    } else if(strncasecmp(cptr, "END=", 4)==0) {
      fittime=atof_dpi(cptr+4); if(fittime>0.0) continue;
    } else if(strcasecmp(cptr, "SRTM2")==0) {
      model=MODEL_SRTM2; continue;
    }
    fprintf(stderr, "Error: invalid option '%s'.\n", argv[ai]);
    return(1);
  } else break;

  /* Print help or version? */
  if(help==2) {tpcHtmlUsage(argv[0], info, ""); return(0);}
  if(help) {tpcPrintUsage(argv[0], info, stdout); return(0);}
  if(version) {tpcPrintBuild(argv[0], stdout); return(0);}
  
  /* Process other arguments, starting from the first non-option */
  if(ai<argc) strlcpy(petfile, argv[ai++], FILENAME_MAX);
  if(ai<argc) strlcpy(inpfile, argv[ai++], FILENAME_MAX);
  if(ai<argc) strlcpy(bpfile, argv[ai++], FILENAME_MAX);
  if(ai<argc) {
    fprintf(stderr, "Error: invalid argument '%s'.\n", argv[ai]);
    return(1);
  }
  /* Did we get all the information that we need? */
  if(!bpfile[0]) {
      fprintf(stderr, "Error: missing command-line argument; use option --help\n");
    return(1);
  }
  if(strcasecmp(petfile, bpfile)==0 || strcasecmp(petfile, r1file)==0 ||
     strcasecmp(petfile, k2file)==0 || strcasecmp(petfile, regfile)==0)
  {
    fprintf(stderr, "Error: check the output filenames.\n");
    return(1);
  }
  /* In verbose mode print arguments and options */
  if(verbose>1) {
    printf("inpfile := %s\n", inpfile);
    printf("petfile := %s\n", petfile);
    printf("bpfile := %s\n", bpfile);
    if(r1file[0]) printf("r1file := %s\n", r1file);
    if(k2file[0]) printf("k2file := %s\n", k2file);
    if(k2sfile[0]) printf("k2sfile := %s\n", k2sfile);
    if(t3file[0]) printf("t3file := %s\n", t3file);
    if(dualfile[0]) printf("dualfile := %s\n", dualfile);
    if(regfile[0]) printf("regfile := %s\n", regfile);
    printf("calcThreshold :=%g\n", calcThreshold);
    printf("bp_plus_one := %d\n", bp_plus_one);
    printf("weight := %d\n", weight);
    if(fittime>0.0) printf("required_fittime := %g min\n", fittime);
    printf("model := %d\n", model);
  }
  if(verbose>8) IMG_TEST=verbose-8; else IMG_TEST=0;



  /*
   *  Read PET image and reference tissue TAC
   */
  imgInit(&pet); dftInit(&tac);
  ret=imgReadModelingData(
    petfile, NULL, inpfile, NULL, NULL, &fittime, &fitdimt, &pet,
    NULL, &tac, 1, stdout, verbose-2, tmp);
  if(ret!=0) {
    fprintf(stderr, "Error: %s.\n", tmp);
    if(verbose>1) printf("  ret := %d\n", ret);
    return(2);
  }
  if(imgNaNs(&pet, 1)>0)
    if(verbose>0) fprintf(stderr, "Warning: missing pixel values.\n");
  /* Set time unit to min, also for integrals in y2[] */
  if(tac.timeunit==TUNIT_SEC)
    for(fi=0; fi<tac.frameNr; fi++) tac.voi[0].y2[fi]/=60.0;
  dftTimeunitConversion(&tac, TUNIT_MIN);

  /* Theta3 can be calculated only if isotope halflife is known; check that */
  /* and calculate the lambda */
  if(t3file[0]) {
    if(pet.isotopeHalflife<=1.0E-005) {
      fprintf(stderr,"Error: %s does not contain isotope halflife;\n", petfile);
      fprintf(stderr,"       this is required with option -theta3=<filename>\n");
      imgEmpty(&pet); dftEmpty(&tac);
      return(3);
    }
    lambda=M_LN2/(pet.isotopeHalflife/60.0);
    if(verbose>1) fprintf(stdout, "lambda := %g [1/min]\n", lambda);
  }
  if(verbose>1) {
    printf("fittimeFinal := %g min\n", fittime);
    printf("fitdimt := %d\n", fitdimt);
  }
  /* Check that image is dynamic and fit time long enough */
  if(fitdimt<4) {
    fprintf(stderr, "Error: too few time frames for fitting.\n");
    if(verbose>1) imgInfo(&pet);
    imgEmpty(&pet); dftEmpty(&tac); 
    return(2);
  }


  /* Allocate memory for tissue TAC and integral */
  ret=dftAddmem(&tac, 1);
  if(ret!=0) {
    fprintf(stderr, "Error: cannot allocate memory.\n");
    if(verbose>0) printf("  ret := %d\n", ret);
    imgEmpty(&pet); dftEmpty(&tac);
    return(3);
  }
  strcpy(tac.voi[0].voiname, "input");
  strcpy(tac.voi[1].voiname, "tissue");


  /* Determine the threshold based on reference tissue integral */
  threshold=calcThreshold*tac.voi[0].y2[fitdimt-1];
  if(verbose>2) printf("threshold_AUC := %g\n", threshold);


  /*
   *  Allocate result images and fill the header info
   */
  if(verbose>1) printf("allocating memory for parametric images\n");
  imgInit(&tout); imgInit(&bpout); imgInit(&r1out); imgInit(&k2out);
  imgInit(&k2sout); imgInit(&t3out); imgInit(&dualout);
  ret=imgAllocateWithHeader(&tout, pet.dimz, pet.dimy, pet.dimx, NNLS_N, &pet);
  if(ret==0)
    ret=imgAllocateWithHeader(&bpout, pet.dimz, pet.dimy, pet.dimx, 1, &pet);
  if(ret==0 && r1file[0])
    ret=imgAllocateWithHeader(&r1out, pet.dimz, pet.dimy, pet.dimx, 1, &pet);
  if(ret==0 && k2file[0])
    ret=imgAllocateWithHeader(&k2out, pet.dimz, pet.dimy, pet.dimx, 1, &pet);
  if(ret==0 && (k2sfile[0] || model==MODEL_SRTM2))
    ret=imgAllocateWithHeader(&k2sout, pet.dimz, pet.dimy, pet.dimx, 1, &pet);
  if(ret==0 && t3file[0])
    ret=imgAllocateWithHeader(&t3out, pet.dimz, pet.dimy, pet.dimx, 1, &pet);
  if(ret==0 && dualfile[0])
    ret=imgAllocateWithHeader(&dualout, pet.dimz, pet.dimy, pet.dimx, 1, &pet);
  if(ret) {
    fprintf(stderr, "Error (%d): out of memory.\n", ret);
    imgEmpty(&pet); dftEmpty(&tac); imgEmpty(&tout); imgEmpty(&bpout);
    imgEmpty(&r1out); imgEmpty(&k2out); imgEmpty(&t3out); imgEmpty(&dualout);
    imgEmpty(&k2sout);
    return(4);
  }
  /* Set the rest of image header */
  if(verbose>1) fprintf(stdout, "setting parametric image headers\n");
  bpout.start[0]=pet.start[0]; bpout.end[0]=pet.end[fitdimt-1];
  tout.unit=(char)CUNIT_UNITLESS;
  bpout.unit=(char)CUNIT_UNITLESS;
  if(r1file[0]) {
    r1out.unit=(char)CUNIT_UNITLESS;
    r1out.start[0]=pet.start[0]; r1out.end[0]=pet.end[fitdimt-1];
  }
  if(k2file[0]) {
    k2out.unit=(char)CUNIT_PER_MIN;
    k2out.start[0]=pet.start[0]; k2out.end[0]=pet.end[fitdimt-1];
  }
  if(k2sfile[0] || model==MODEL_SRTM2) {
    k2sout.unit=(char)CUNIT_PER_MIN;
    k2sout.start[0]=pet.start[0]; k2sout.end[0]=pet.end[fitdimt-1];
  }
  if(t3file[0]) {
    t3out.unit=(char)CUNIT_PER_MIN;
    t3out.start[0]=pet.start[0]; t3out.end[0]=pet.end[fitdimt-1];
  }
  if(dualfile[0]) {
    dualout.unit=(char)CUNIT_UNITLESS;
    dualout.start[0]=pet.start[0]; dualout.end[0]=pet.end[fitdimt-1];
  }


  /*
   *  Allocate memory required by NNLS
   */
  if(verbose>1) printf("allocating memory for NNLS\n");
  nnls_n=NNLS_N; nnls_m=fitdimt;
  nnls_mat=(double*)malloc(((nnls_n+2)*nnls_m)*sizeof(double));
  if(nnls_mat==NULL) {
    fprintf(stderr, "Error: cannot allocate memory for NNLS.\n");
    imgEmpty(&pet); dftEmpty(&tac); imgEmpty(&tout); imgEmpty(&bpout);
    imgEmpty(&r1out); imgEmpty(&k2out); imgEmpty(&t3out); imgEmpty(&dualout);
    imgEmpty(&k2sout);
    return(5);
  }
  for(n=0, dptr=nnls_mat; n<nnls_n; n++) {nnls_a[n]=dptr; dptr+=nnls_m;}
  nnls_b=dptr; dptr+=nnls_m; nnls_zz=dptr;

  /* Copy weights if available */
  /* or set them to frame lengths */
  if(verbose>2) printf("working with NNLS weights\n");
  if(weight==1 && pet.isWeight==0) {
    for(m=0; m<nnls_m; m++) pet.weight[m]=pet.end[m]-pet.start[m];
    pet.isWeight=1;
  }
  /* Compute NNLS weights */
  if(pet.isWeight) {
    for(m=0; m<nnls_m; m++) {
      tac.w[m]=pet.weight[m];
      if(tac.w[m]<=1.0e-20) tac.w[m]=0.0;
    }
  }



  /*
   *  Compute SRTM pixel-by-pixel
   */
  if(verbose>0) fprintf(stdout, "computing SRTM pixel-by-pixel\n");
  cr=tac.voi[0].y; cri=tac.voi[0].y2;
  ct=tac.voi[1].y; cti=tac.voi[1].y2;
  int thresholded_nr=0, nosolution_nr=0;
  fitStart=clock();
  for(pi=0; pi<pet.dimz; pi++) {
    if(verbose>2) printf("computing plane %d\n", pet.planeNumber[pi]);
    else if(pet.dimz>1 && verbose>0) {fprintf(stdout, "."); fflush(stdout);}
    for(yi=0; yi<pet.dimy; yi++) {
      for(xi=0; xi<pet.dimx; xi++) {
        /* Set regression coefs to zero */
        for(n=0; n<nnls_n; n++) tout.m[0][yi][xi][n]=0.0;
        /* Calculate pixel integral */
        for(fi=0; fi<tac.frameNr; fi++) ct[fi]=pet.m[pi][yi][xi][fi];
        ret=petintegral(tac.x1, tac.x2, ct, tac.frameNr, cti, NULL);
        if(verbose>6 && pi==pet.dimz/2 && yi==pet.dimy/3 && xi==pet.dimx/3) {
          printf("\nExample pixel pi=%d yi=%d xi=%d\n", pi, yi, xi);
          printf("  Cr         Cri          Ct          Cti\n");
          for(m=0; m<nnls_m; m++)
            printf("%12.4f %12.4f %12.4f %12.4f\n", cr[m],cri[m],ct[m],cti[m]);
        }
        /* if AUC at the end is less than threshold value, then do nothing */
        if(cti[fitdimt-1]<threshold) {
          thresholded_nr++;
          continue;
        }

        /* Fit using traditional formulation, if R1, k2 and/or k2' are needed */
        if(model==MODEL_SRTM2 ||
           r1file[0] || k2file[0] || k2sfile[0] || t3file[0])
        {
          /* Fill  A matrix: */
          /* function #1:  */
          for(m=0; m<nnls_m; m++) nnls_a[0][m]=cr[m];
          /* function #2:  */
          for(m=0; m<nnls_m; m++) nnls_a[1][m]=cri[m];
          /* function #3:  */
          for(m=0; m<nnls_m; m++) nnls_a[2][m]=-cti[m];
          /* Fill  B array:  */
          for(m=0; m<nnls_m; m++) nnls_b[m]=ct[m];
          /* Apply data weights */
          if(pet.isWeight) nnlsWght(nnls_n, nnls_m, nnls_a, nnls_b, tac.w);
          if(verbose>6 && pi==pet.dimz/2 && yi==pet.dimy/3 && xi==pet.dimx/3) {
            printf("Matrix A                     Array B\n");
            for(m=0; m<nnls_m; m++) {
              printf("%12.3f %12.3f %12.3f     %12.3f\n",
                nnls_a[0][m], nnls_a[1][m], nnls_a[2][m], nnls_b[m]);
            }
          }
          /* NNLS */
          ret=nnls(nnls_a, nnls_m, nnls_n, nnls_b, nnls_x, &nnls_rnorm,
                   nnls_wp, nnls_zz, nnls_index);
          if(ret>1) { /* no solution is possible */
            nosolution_nr++; continue;
          }
          for(n=0; n<nnls_n; n++) tout.m[pi][yi][xi][n]=nnls_x[n];
          /* Get R1 */
          if(r1file[0]) r1out.m[pi][yi][xi][0]=nnls_x[0];
          /* Get k2 */
          if(k2file[0]) k2out.m[pi][yi][xi][0]=nnls_x[1];
          /* Get theta3=p3+lambda */
          if(t3file[0]) {
            if(nnls_wp[2]==0.0) t3out.m[pi][yi][xi][0]=nnls_x[2]+lambda;
            else t3out.m[pi][yi][xi][0]=0.0;
          }
          /* Get k2' (k2 of reference region), when BP>0 */
          if(k2sfile[0] || model==MODEL_SRTM2) {
            if(nnls_x[2]>0.0) f=nnls_x[1]/nnls_x[2]; else f=0.0; // f=BP+1
            if(f>1.0 && nnls_x[0]>0.0 && nnls_x[1]>0.0)
              k2sout.m[pi][yi][xi][0]=nnls_x[1]/nnls_x[0];
          }
        }

        /* Estimate BP without division, unless using SRTM2 model */
        if(model==MODEL_SRTM2) continue;
        /* Set regression coefs to zero */
        for(n=0; n<nnls_n; n++) tout.m[0][yi][xi][n]=0.0;

        /* Fill  A matrix: */
        /* function #1:  */
        for(m=0; m<nnls_m; m++) nnls_a[0][m]=cr[m];
        /* function #2:  */
        for(m=0; m<nnls_m; m++) nnls_a[1][m]=cri[m];
        /* function #3:  */
        for(m=0; m<nnls_m; m++) nnls_a[2][m]=-ct[m];
        /* Fill  B array:  */
        for(m=0; m<nnls_m; m++) nnls_b[m]=cti[m];
        /* Apply data weights */
        if(pet.isWeight) nnlsWght(nnls_n, nnls_m, nnls_a, nnls_b, tac.w);
        if(verbose>6 && pi==pet.dimz/2 && yi==pet.dimy/3 && xi==pet.dimx/3) {
          printf("Matrix A                     Array B\n");
          for(m=0; m<nnls_m; m++) {
            printf("%12.3f %12.3f %12.3f     %12.3f\n",
              nnls_a[0][m], nnls_a[1][m], nnls_a[2][m], nnls_b[m]);
          }
        }

        /* NNLS */
        ret=nnls(nnls_a, nnls_m, nnls_n, nnls_b, nnls_x, &nnls_rnorm,
                 nnls_wp, nnls_zz, nnls_index);
        if(ret>1) continue; /* no solution is possible */
        for(n=0; n<nnls_n; n++) tout.m[pi][yi][xi][n]=nnls_x[n];
        /* Get BP */
        bpout.m[pi][yi][xi][0]=nnls_x[1];
        if(!bp_plus_one) bpout.m[pi][yi][xi][0]-=1.0;
        if(verbose>6 && pi==pet.dimz/2 && yi==pet.dimy/3 && xi==pet.dimx/3) {
          for(n=0; n<nnls_n; n++) printf(" nnls_x[%d]=%g", n, nnls_x[n]);
          printf("  bpout.m[%d][%d][%d][0]=%g\n",
                                       pi, yi, xi, bpout.m[pi][yi][xi][0]);
        }
        /* Count the number of i in set p in NNLS solution */
        for(n=m=0; n<nnls_n; n++) if(nnls_wp[n]==0.0) m++;
        if(dualfile[0]) dualout.m[pi][yi][xi][0]=m;
      } /* next column */
    } /* next row */
  } /* next plane */
  fitFinish=clock();
  free(nnls_mat);
  if(verbose>0) {
    fprintf(stdout, "\n");
    if(model==MODEL_SRTM2) fprintf(stdout, "first step ");
    fprintf(stdout, "done.\n");
  }
  if(verbose>1 || thresholded_nr>0)
    fprintf(stdout, "%d pixels were not fitted due to threshold.\n",
            thresholded_nr);
  if(verbose>1 || nosolution_nr>0)
    fprintf(stdout, "no NNLS solution for %d pixels.\n", nosolution_nr);


  /*
   *  Compute next step of SRTM2 pixel-by-pixel
   */
  if(model==MODEL_SRTM2) {

    if(verbose>0) fprintf(stdout, "calculating median of k2'\n");
    /* In how many pixels k2'>0 (and BP>0 as well)? */
    for(n=pi=0; pi<pet.dimz; pi++)
      for(yi=0; yi<pet.dimy; yi++) for(xi=0; xi<pet.dimx; xi++)
        if(k2sout.m[pi][yi][xi][0]>0.0001) n++;
    if(verbose>1) fprintf(stdout, "BP>0 and k2'>0 in %d pixels.\n", n);
    /* Allocate memory for n k2' values */
    double *k2sarray, k2smedian, sd;
    k2sarray=(double*)malloc(n*sizeof(double));
    if(k2sarray==NULL) {
      fprintf(stderr, "Error: cannot allocate memory for k2' values.\n");
      imgEmpty(&pet); dftEmpty(&tac);
      imgEmpty(&tout); imgEmpty(&bpout); imgEmpty(&r1out); imgEmpty(&k2out);
      imgEmpty(&t3out); imgEmpty(&dualout);
      imgEmpty(&k2sout);
      return 6;
    }
    /* Calculate the median of k2' */
    for(n=pi=0; pi<pet.dimz; pi++)
      for(yi=0; yi<pet.dimy; yi++) for(xi=0; xi<pet.dimx; xi++)
        if(k2sout.m[pi][yi][xi][0]>0.0001)
          k2sarray[n++]=k2sout.m[pi][yi][xi][0];
    k2smedian=dmedian(k2sarray, n);
    if(verbose>0) fprintf(stdout, "k2s_median := %g\n", k2smedian);
    if(verbose>1) {
      f=dmean(k2sarray, n, &sd);
      fprintf(stdout, "k2s_mean := %g +- %g\n", f, sd);
    }
    free(k2sarray);

    /* Allocate memory for NNLS */
    if(verbose>1) fprintf(stdout, "allocating memory for the second NNLS\n");
    nnls_n=2; nnls_m=fitdimt;
    nnls_mat=(double*)malloc(((nnls_n+2)*nnls_m)*sizeof(double));
    if(nnls_mat==NULL) {
      fprintf(stderr, "Error: cannot allocate memory for NNLS.\n");
      imgEmpty(&pet); dftEmpty(&tac); imgEmpty(&tout); imgEmpty(&bpout);
      imgEmpty(&r1out); imgEmpty(&k2out); imgEmpty(&t3out); imgEmpty(&dualout);
      imgEmpty(&k2sout);
      return(5);
    }
    for(n=0, dptr=nnls_mat; n<nnls_n; n++) {nnls_a[n]=dptr; dptr+=nnls_m;}
    nnls_b=dptr; dptr+=nnls_m; nnls_zz=dptr;

    if(verbose>0) fprintf(stdout, "computing SRTM2 2nd step pixel-by-pixel\n");

    cr=tac.voi[0].y; cri=tac.voi[0].y2;
    ct=tac.voi[1].y; cti=tac.voi[1].y2;
    thresholded_nr=0; nosolution_nr=0;
    for(pi=0; pi<pet.dimz; pi++) {
      if(verbose>2) printf("computing plane %d\n", pet.planeNumber[pi]);
      else if(pet.dimz>1 && verbose>0) {fprintf(stdout, "."); fflush(stdout);}
      for(yi=0; yi<pet.dimy; yi++) {
        for(xi=0; xi<pet.dimx; xi++) {
          /* Set regression coefs to zero */
          for(n=0; n<(nnls_n+1); n++) tout.m[0][yi][xi][n]=0.0;
          bpout.m[pi][yi][xi][0]=0.0;
          /* Calculate pixel integral */
          for(fi=0; fi<tac.frameNr; fi++) ct[fi]=pet.m[pi][yi][xi][fi];
          ret=petintegral(tac.x1, tac.x2, ct, tac.frameNr, cti, NULL);
          /* if AUC at the end is less than threshold value, then do nothing */
          if(cti[fitdimt-1]<threshold) {
            thresholded_nr++; continue;
          }

          /* Estimate BP without division */

          /* Fill  A matrix: */
          for(m=0; m<nnls_m; m++) nnls_a[0][m]=cr[m]+k2smedian*cri[m];
          for(m=0; m<nnls_m; m++) nnls_a[1][m]=-ct[m];
          /* Fill  B array */
          for(m=0; m<nnls_m; m++) nnls_b[m]=k2smedian*cti[m];
          /* Apply data weights */
          if(pet.isWeight) nnlsWght(nnls_n, nnls_m, nnls_a, nnls_b, tac.w);

          /* NNLS */
          ret=nnls(nnls_a, nnls_m, nnls_n, nnls_b, nnls_x, &nnls_rnorm,
                   nnls_wp, nnls_zz, nnls_index);
          if(ret>1) { /* no solution is possible */
            nosolution_nr++; continue;
          }
          for(n=0; n<nnls_n; n++) tout.m[pi][yi][xi][n]=nnls_x[n];
          /* Get BP */
          bpout.m[pi][yi][xi][0]=nnls_x[0];
          if(!bp_plus_one) bpout.m[pi][yi][xi][0]-=1.0;
          if(verbose>6 && pi==pet.dimz/2 && yi==pet.dimy/3 && xi==pet.dimx/3) {
            for(n=0; n<nnls_n; n++) printf("\n nnls_x[%d]=%g", n, nnls_x[n]);
            printf("  bpout.m[%d][%d][%d][0]=%g\n",
                      pi, yi, xi, bpout.m[pi][yi][xi][0]);
          }

          /* Estimate R1 without division, if needed */
          if(!r1file[0]) continue;

          /* Fill  A matrix: */
          for(m=0; m<nnls_m; m++) nnls_a[0][m]=cr[m]+k2smedian*cri[m];
          for(m=0; m<nnls_m; m++) nnls_a[1][m]=-k2smedian*cti[m];
          /* Fill  B array */
          for(m=0; m<nnls_m; m++) nnls_b[m]=ct[m];
          /* Apply data weights */
          if(pet.isWeight) nnlsWght(nnls_n, nnls_m, nnls_a, nnls_b, tac.w);

          /* NNLS */
          ret=nnls(nnls_a, nnls_m, nnls_n, nnls_b, nnls_x, &nnls_rnorm,
                   nnls_wp, nnls_zz, nnls_index);
          if(ret>1) continue; /* no solution is possible */
          /* Get R1 */
          r1out.m[pi][yi][xi][0]=nnls_x[0];

        } /* next column */
      } /* next row */
    } /* next plane */
    fitFinish=clock();
    free(nnls_mat);
    if(verbose>0) {
      fprintf(stdout, "\ndone.\n");
    }
    if(verbose>1 || thresholded_nr>0)
      fprintf(stdout, "%d pixels were not fitted due to threshold.\n",
              thresholded_nr);
    if(verbose>1 || nosolution_nr>0)
      fprintf(stdout, "no NNLS solution for %d pixels.\n", nosolution_nr);
  }


  /* No need for dynamic image or input tac anymore */
  imgEmpty(&pet); dftEmpty(&tac);


  /*
   *  Save parametric image(s)
   */
  if(verbose>1) fprintf(stdout, "writing parametric images\n");
  ret=imgWrite(bpfile, &bpout);
  if(ret) {
    fprintf(stderr, "Error: %s\n", bpout.statmsg);
    imgEmpty(&tout); imgEmpty(&bpout); imgEmpty(&r1out); imgEmpty(&k2out);
    imgEmpty(&t3out); imgEmpty(&dualout);
    imgEmpty(&k2sout);
    return(13);
  }
  if(verbose>0) {
    if(!bp_plus_one) fprintf(stdout, "BP image written in %s\n", bpfile);
    else fprintf(stdout, "(BP+1) image written in %s\n", bpfile);
  }
  if(r1file[0]) {
    ret=imgWrite(r1file, &r1out);
    if(ret) {
      fprintf(stderr, "Error: %s\n", r1out.statmsg);
      imgEmpty(&tout); imgEmpty(&bpout); imgEmpty(&r1out); imgEmpty(&k2out);
      imgEmpty(&t3out); imgEmpty(&dualout);
      imgEmpty(&k2sout);
      return(14);
    }
    if(verbose>0) fprintf(stdout, "R1 image written in %s\n", r1file);
  }
  if(k2file[0]) {
    ret=imgWrite(k2file, &k2out);
    if(ret) {
      fprintf(stderr, "Error: %s\n", k2out.statmsg);
      imgEmpty(&tout); imgEmpty(&bpout); imgEmpty(&r1out); imgEmpty(&k2out);
      imgEmpty(&t3out); imgEmpty(&dualout);
      imgEmpty(&k2sout);
      return(15);
    }
    if(verbose>0) fprintf(stdout, "k2 image written in %s\n", k2file);
  }
  if(k2sfile[0]) {
    ret=imgWrite(k2sfile, &k2sout);
    if(ret) {
      fprintf(stderr, "Error: %s\n", k2sout.statmsg);
      imgEmpty(&tout); imgEmpty(&bpout); imgEmpty(&r1out); imgEmpty(&k2out);
      imgEmpty(&t3out); imgEmpty(&dualout);
      imgEmpty(&k2sout);
      return(16);
    }
    if(verbose>0) fprintf(stdout, "k2' image written in %s\n", k2sfile);
  }
  if(t3file[0]) {
    ret=imgWrite(t3file, &t3out);
    if(ret) {
      fprintf(stderr, "Error: %s\n", t3out.statmsg);
      imgEmpty(&tout); imgEmpty(&bpout); imgEmpty(&r1out); imgEmpty(&k2out);
      imgEmpty(&t3out); imgEmpty(&dualout);
      imgEmpty(&k2sout);
      return(17);
    }
    if(verbose>0) fprintf(stdout, "theta3 image written in %s\n", t3file);
  }
  if(dualfile[0]) {
    ret=imgWrite(dualfile, &dualout);
    if(ret) {
      fprintf(stderr, "Error: %s\n", dualout.statmsg);
      imgEmpty(&tout); imgEmpty(&bpout); imgEmpty(&r1out); imgEmpty(&k2out);
      imgEmpty(&t3out); imgEmpty(&dualout);
      imgEmpty(&k2sout);
      return(18);
    }
    if(verbose>0)
      fprintf(stdout, "dual solution image written in %s\n", dualfile);
  }
  if(regfile[0]) {
    ret=imgWrite(regfile, &tout);
    if(ret) {
      fprintf(stderr, "Error: %s\n", tout.statmsg);
      imgEmpty(&tout); imgEmpty(&bpout); imgEmpty(&r1out); imgEmpty(&k2out);
      imgEmpty(&t3out); imgEmpty(&dualout);
      imgEmpty(&k2sout);
      return(19);
    }
    if(verbose>0)
      fprintf(stdout, "Regression parameter image written in %s\n", regfile);
  }

  imgEmpty(&tout); imgEmpty(&bpout); imgEmpty(&r1out); imgEmpty(&k2out);
  imgEmpty(&t3out); imgEmpty(&dualout); imgEmpty(&k2sout);


  /* How long did the fitting take */
  if(verbose>1) fprintf(stdout, "parameter estimation time := %.1f [s]\n",
    (double)(fitFinish-fitStart) / CLOCKS_PER_SEC );

  return(0);
}
/*****************************************************************************/

/*****************************************************************************/
/// @endcond
