/** @file parmean.c
 *  @brief Calculate mean, median, and SD of parameter values.
 *  @details Replaces old application resplsta.
 *  @copyright (c) Turku PET Centre
 *  @author Vesa Oikonen
 */
/// @cond
/*****************************************************************************/
#include "tpcclibConfig.h"
/*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
/*****************************************************************************/
#include "tpcextensions.h"
#include "tpcstatist.h"
#include "tpcpar.h"
/*****************************************************************************/

/*****************************************************************************/
static char *info[] = {
  "Calculate mean, median, and SD of parameter values.",
  "By default, statstics are calculated over all regional results.",
  " ",
  "Usage: @P [options] filename [filename for statistics]",
  " ",
  "Options:",
  " -par=<list>",
  "     List of parameter numbers or names to use; all by default.",
  " -tac=<list>",
  "     List of TAC numbers or names to use; all by default.",
  " -stdoptions", // List standard options like --help, -v, etc
  " ",
  "Example 1:",
  "Calculate statistics from parameter numbers 1-3 into a file",
  "     @P -par=1-3 simresults.par simstat.par",  
  " ",
  "Example 2:",
  "Calculate statistics from regions with name 'GM' and print in stdout",
  "     @P -tac=GM simresults.par",
  " ",
  "See also: paradd, parcoll, parformat, parrenp, parai, rescoll",
  " ",
  "Keywords: simulation, parameter, average, standard deviation",
  0};
/*****************************************************************************/

/*****************************************************************************/
/* Turn on the globbing of the command line, since it is disabled by default in
   mingw-w64 (_dowildcard=0); in MinGW32 define _CRT_glob instead, if necessary;
   In Unix&Linux wildcard command line processing is enabled by default. */
/*
#undef _CRT_glob
#define _CRT_glob -1
*/
int _dowildcard = -1;
/*****************************************************************************/

/*****************************************************************************/
/**
 *  Main
 */
int main(int argc, char **argv)
{
  int ai, help=0, version=0, verbose=1;
  char newfile[FILENAME_MAX], parfile[FILENAME_MAX];
  char *pars=NULL, *tacs=NULL; // remember to free allocated memory before exit
  int drymode=0; // 1=no files are actually edited or created


#ifdef MINGW
  // Use Unix/Linux default of two-digit exponents in MinGW on Windows
  _set_output_format(_TWO_DIGIT_EXPONENT);
#endif

  
  /*
   *  Get arguments
   */
  if(argc==1) {tpcPrintUsage(argv[0], info, stderr); return(1);}
  newfile[0]=parfile[0]=(char)0;
  /* Options */
  for(ai=1; ai<argc; ai++) if(*argv[ai]=='-') {
    if(tpcProcessStdOptions(argv[ai], &help, &version, &verbose)==0) continue;
    char *cptr=argv[ai]+1; if(*cptr=='-') cptr++; if(!*cptr) continue;
    if(strncasecmp(cptr, "PAR=", 4)==0 && strnlen(cptr, 5)>4) {
      pars=strdup(cptr+4); continue;
    } else if(strncasecmp(cptr, "TAC=", 4)==0 && strnlen(cptr, 5)>4) {
      tacs=strdup(cptr+4); continue;
    } else if(strncasecmp(cptr, "DRY", 2)==0) {
      drymode=1; continue;
    }
    fprintf(stderr, "Error: invalid option '%s'.\n", argv[ai]);
    return(1);
  } else break; // tac name argument may start with '-'
  
  /* Print help or version? */
  if(help==2) {tpcHtmlUsage(argv[0], info, ""); free(pars); free(tacs); return(0);}
  if(help) {tpcPrintUsage(argv[0], info, stdout); free(pars); free(tacs); return(0);}
  if(version) {tpcPrintBuild(argv[0], stdout); free(pars); free(tacs); return(0);}

  TPCSTATUS status; statusInit(&status);
  statusSet(&status, __func__, __FILE__, __LINE__, TPCERROR_OK);
  status.verbose=verbose-3;

  /* Process other arguments, starting from the first non-option */
  if(ai<argc) {strlcpy(parfile, argv[ai], FILENAME_MAX); ai++;}
  if(ai<argc) {strlcpy(newfile, argv[ai], FILENAME_MAX); ai++;}
  if(ai<argc) {
    fprintf(stderr, "Error: too many arguments: '%s'.\n", argv[ai]);
    return(1);
  }

  /* Did we get all the information that we need? */
  if(!parfile[0]) {
    fprintf(stderr, "Error: missing parameter filename.\n");
    return(1);
  }


  /* In verbose mode print arguments and options */
  if(verbose>1) {
    printf("parfile := %s\n", parfile);
    if(newfile[0]) printf("newfile := %s\n", newfile);
    if(pars!=NULL) printf("pars := '%s'\n", pars);
    if(tacs!=NULL) printf("tacs := '%s'\n", tacs);
    printf("drymode := %d\n", drymode);
  }



  /* 
   *  Read the file
   */
  if(verbose>1) printf("reading %s\n", parfile);
  PAR par; parInit(&par);
  if(parRead(&par, parfile, &status)!=TPCERROR_OK) {
    fprintf(stderr, "Error: %s\n", errorMsg(status.error));
    parFree(&par); return(2);
  }
  if(verbose>2) {
    printf("fileformat := %s\n", parFormattxt(par.format));
    printf("parNr := %d\n", par.parNr);
    printf("tacNr := %d\n", par.tacNr);
  }
  if(par.tacNr<2 || par.parNr<1) {
    fprintf(stderr, "Error: no data to calculate statistics.\n");
    parFree(&par); return(2);
  }
  /* Sort regions by name */
  if(parSortByName(&par, &status)!=TPCERROR_OK) {
    fprintf(stderr, "Error: %s\n", errorMsg(status.error));
    parFree(&par); return(2);
  }

  /* List the region and parameter names */
  if(verbose>10) {
    fflush(stdout);
    printf("\nRegion names in %s:\n", parfile);
    for(int i=0; i<par.tacNr; i++) printf("%s\n", par.r[i].name);
    fflush(stdout);
    printf("\nParameter names in %s:\n", parfile);
    for(int i=0; i<par.parNr; i++) printf("%s\n", par.n[i].name);
    fflush(stdout);
  }


  /* 
   *  Select the data to be used 
   */
  int parNr=0, tacNr=0;

  if(pars==NULL) {
    /* No parameters specified: select all */
    parSelectParameters(&par, NULL, 0, &status);
  } else {
    char buf[512];
    for(int i=0; i<strTokenNr(pars, ",;"); i++) {
      if(strTokenNCpy(pars, ",;", i+1, buf, 512)<1) continue;
      int n=parSelectParameters(&par, buf, 0, &status);
      if(n==0) fprintf(stderr, "Warning: no parameter(s) match '%s'\n", buf);
      else if(verbose>2) printf("%d parameter(s) match '%s'\n", n, buf);
    }
  }
  parNr=parSelectedParameters(&par);
  if(parNr==0) {
    fprintf(stderr, "Error: no matching parameter(s) to use.\n");
    parFree(&par); free(pars); free(tacs); return(3);
  }
  if(verbose>2 || drymode) {
    for(int i=0; i<par.parNr; i++) 
      if(par.n[i].sw) printf("selected_parameter: %s\n", par.n[i].name);
    fflush(stdout);
  }

  if(tacs==NULL) {
    /* No TACs specified: select all */
    parSelectTACs(&par, NULL, 0, &status);
  } else {
    char buf[512];
    for(int i=0; i<strTokenNr(tacs, ",;"); i++) {
      if(strTokenNCpy(tacs, ",;", i+1, buf, 512)<1) continue;
      int n=parSelectTACs(&par, buf, 0, &status);
      if(n==0) fprintf(stderr, "Warning: no TAC(s) match '%s'\n", buf);
      else if(verbose>2) printf("%d TAC(s) match '%s'\n", n, buf);
    }
  }
  tacNr=parSelectedTACs(&par);
  if(tacNr==0) {
    fprintf(stderr, "Error: no matching TAC(s) to use.\n");
    parFree(&par); free(pars); free(tacs); return(4);
  }
  if(verbose>2 || drymode) {
    for(int i=0; i<par.tacNr; i++) 
      if(par.r[i].sw) printf("selected_tac: %s\n", par.r[i].name);
    fflush(stdout);
  }
  /* par and tac strings are not needed later */
  free(pars); free(tacs);

  /*
   *  Delete parameters and TACs that were not selected.
   */
  if(parNr!=par.parNr) {
    int i=par.parNr-1;
    while(i>=0) {
      if(par.n[i].sw==0) parDeletePar(&par, i);
      i--;
    }
  }
  if(tacNr!=par.tacNr) {
    int i=par.tacNr-1;
    while(i>=0) {
      if(par.r[i].sw==0) parDeleteTAC(&par, i);
      i--;
    }
  }


  /*
   *  Allocate space for the statistics
   */
  if(verbose>1) {printf("preparing the results\n"); fflush(stdout);}
  PAR out; parInit(&out);
  if(parAllocate(&out, par.parNr, 6)!=TPCERROR_OK) {
    fprintf(stderr, "Error: cannot allocate memory.\n");
    parFree(&par); return(5);
  }
  out.parNr=par.parNr; out.tacNr=6;
  /* Set the file format */
  if(newfile[0]) {
    out.format=parFormatFromExtension(newfile);
    if(out.format==PAR_FORMAT_UNKNOWN) out.format=par.format;
  } else out.format=par.format;
  if(out.format==PAR_FORMAT_UNKNOWN) out.format=PAR_FORMAT_TSV_UK;
  /* Set parameter names */
  for(int i=0; i<out.parNr; i++) strcpy(out.n[i].name, par.n[i].name);
  /* Set 'tac' names */
  strcpy(out.r[0].name, "Mean");
  strcpy(out.r[1].name, "Median");
  strcpy(out.r[2].name, "STDEV");
  strcpy(out.r[3].name, "Min");
  strcpy(out.r[4].name, "Max");
  strcpy(out.r[5].name, "N");


  /*
   *  Calculate the means etc
   */
  for(int pi=0; pi<par.parNr; pi++) {
    if(verbose>2) {printf("computing statistics for %s\n", par.n[pi].name); fflush(stdout);}
    int n=0;
    double p[par.tacNr];
    for(int ri=0; ri<par.tacNr; ri++) if(isfinite(par.r[ri].p[pi])) p[n++]=par.r[ri].p[pi];
    statMeanSD(p, n, &out.r[0].p[pi], &out.r[2].p[pi], NULL);
    out.r[1].p[pi]=statMedian(p, n);
    doubleRange(p, n, &out.r[3].p[pi], &out.r[4].p[pi]);
    out.r[5].p[pi]=n;
  }
  parFree(&par);


  /* 
   *  Write the results
   */
  {
    FILE *fp=NULL;
    if(newfile[0] && drymode==0) {
      if(verbose>1) {printf("  saving\n"); fflush(stdout);}
      fp=fopen(newfile, "w");
      if(fp==NULL) {
        fprintf(stderr, "Error: cannot open file for writing.\n");
        parFree(&out); return(11);
      }
    } else {
      fp=stdout;
    }
    if(newfile[0] && drymode) {printf("  results would be saved in %s\n", newfile); fflush(stdout);}
    int ret=parWrite(&out, fp, PAR_FORMAT_UNKNOWN, 1, &status);
    parFree(&out);
    if(newfile[0] && drymode==0) fclose(fp); 
    if(ret!=TPCERROR_OK) {
      fprintf(stderr, "Error: %s\n", errorMsg(status.error));
      return(12);
    }
    if(drymode==0 && newfile[0] && verbose>0) printf("written %s\n", newfile);
  }

  return(0);
}
/*****************************************************************************/

/*****************************************************************************/
/// @endcond
