/******************************************************************************

  Copyright (c) 2013 Turku PET Centre

  File:        csv
  Description: General I/O functions for CSV files (comma-separated values).

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 3 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  See the GNU Lesser General Public License for more details:
  http://www.gnu.org/copyleft/lesser.html

  You should have received a copy of the GNU Lesser General Public License
  along with this library/program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

  Turku PET Centre hereby disclaims all copyright interest in the program.
  Juhani Knuuti
  Director, Professor
  Turku PET Centre, Turku, Finland, http://www.turkupetcentre.fi/

  Modification history:
  2013-10-07 Vesa Oikonen
    First created, based on libtpccurveio.


******************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <math.h>
#include <ctype.h>
#include <string.h>
#include <strings.h>
/*****************************************************************************/
#include "libtpcmisc.h"
/*****************************************************************************/
#include "include/tacio.h"
/*****************************************************************************/

/*****************************************************************************/
/** Initiate struct for CSV data */
void csvInit(
  /** Pointer to CSV struct */
  CSV *csv
) {
  if(csv==NULL) return;
  csv->c=NULL;
  csv->nr=csv->row_nr=csv->col_nr=0;
  csv->separator=(char)0;
}
/*****************************************************************************/

/*****************************************************************************/
/** Delete the contents of CSV data struct */
void csvEmpty(
  /** Pointer to initiated CSV struct */
  CSV *csv
) {
  int i;
  if(csv==NULL) return;
  for(i=0; i<csv->nr; i++)
    if(csv->c[i].content!=NULL) free(csv->c[i].content);
  free(csv->c); csv->c=NULL;
  csv->nr=csv->row_nr=csv->col_nr=0;
  csv->separator=(char)0;
}
/*****************************************************************************/

/*****************************************************************************/
/** Determine the field separator (comma, semi-colon, or tab) in CSV or TSV
 *  file, and, if possible, the decimal separator inside fields (comma or dot).
\return Returns <>0 in case of an error.
 */
void csvSeparator(
  /** Pointer to ASCII file opened for reading */
  FILE *fp,
  /** Pointer to char where field separator character will be written;
   *  default ',' will be written if not determined; enter NULL if not needed.*/  
  char *field_separator,
  /** Pointer to char where decimal separator character will be written;
   *  default '.' will be written if not determined; enter NULL if not needed.*/  
  char *decimal_separator,
  /** Verbose level; if zero, nothing is printed into stdout */
  int verbose
) {
  int c, inside_quotes, tabnr, semnr, comnr, dotnr;
  char fs, ds;
  
  if(verbose>0) printf("csvSeparator()\n");
  /* Initiate results */
  if(field_separator==NULL && decimal_separator==NULL) return;
  if(field_separator!=NULL) *field_separator=',';
  if(decimal_separator!=NULL) *decimal_separator='.';
  /* Check input */
  if(fp==NULL) return;
  
  /* Check how many ; , . \t characters are found outside double quotes */
  inside_quotes=0; tabnr=semnr=comnr=dotnr=0;
  while((c=fgetc(fp))!=EOF) {
    if(c=='"') {
      if(inside_quotes==0) inside_quotes=1; else inside_quotes=0;
      continue;
    }
    if(inside_quotes==1) continue;
    if(c==';') semnr++;
    else if(c=='\t') tabnr++;
    else if(c==',') comnr++;
    else if(c=='.') dotnr++;
  }
  rewind(fp);
  if(verbose>1) {
    printf("semicolon_nr := %d\n", semnr);
    printf("tabulator_nr := %d\n", tabnr);
    printf("dot_nr := %d\n", dotnr);
    printf("comma_nr := %d\n", comnr);
  }
  if(semnr==0 && tabnr==0 && dotnr==0 && comnr==0)
    return; // we will use the defaults
  
  /* If at least one semi-colon or tab, then assume that it is the field
     separator */
  if(semnr>0) fs=';';
  else if(tabnr>0) fs='\t';
  else fs=',';
  if(fs==',') {
    /* If comma is field separator, then decimal separator must be dot */
    ds='.';
  } else if(fs==';') {
    /* If semi-colon is field separator, then decimal separator usually is
       comma */
    ds=',';
  } else {
    /* With tab as field separator, decimal separator can be whichever;
       dots should not be used to mark missing fields in TSV, so we will
       assume that the more frequent one is correct */
    if(dotnr>comnr)
      ds='.';
    else if(comnr>dotnr)
      ds=',';
    else if(comnr==0) // neither exists, so it really does not matter
      ds='.';
    else // equal numbers, we guess dot
      ds='.';      
  }
  if(verbose>1) {
    if(fs=='\t') printf("field_separator := tab\n");
    else printf("field_separator := %c\n", fs);
    printf("decimal_separator := %c\n", ds);
  }
  if(field_separator!=NULL) *field_separator=fs;
  if(decimal_separator!=NULL) *decimal_separator=ds;
 
  return;
}
/*****************************************************************************/

/*****************************************************************************/
/** Read CSV file contents into initiated CSV struct,
 *  allocating memory as needed.
\return Returns TACIO status code 
 */ 
int csvRead(
  /** Pointer to initiated CSV struct; any previous contents are deleted */
  CSV *csv,
  /** Name of CSV file to read */
  char *fname,
  /** Verbose level; if zero, nothing is printed into stdout */
  int verbose
) {
  FILE *fp;
  int i, nr, ret, nonprintable=0, inside_quotes=0, previous, col_nr=0;
  const int MAX_CSV_FIELD_LENGTH=1024;
  char buf[MAX_CSV_FIELD_LENGTH+1];

  if(verbose>0) printf("csvRead(csv, '%s', ...)\n", fname);
  /* Check input */ 
  if(csv==NULL || fname==NULL) return TACIO_FAULT;
  /* Open file; note that 'b' is required for fgetpos() and fsetpos() to work
     correctly, but those are not needed here */
  fp=fopen(fname, "r"); if(fp==NULL) return TACIO_CANNOTOPEN;

  /* Check the file size */
  nr=nonprintable=0; while((ret=fgetc(fp))!=EOF) {
    if(iscntrl(ret) && ret!=13 && ret!=10 && ret!=9) {nonprintable=1; break;}
    nr++;
  }
  if(verbose>1) printf("filesize := %d\n", nr);
  if(nr<2) {fclose(fp); return TACIO_INVALIDFORMAT;}
  if(nr>5000000) {fclose(fp); return TACIO_TOOBIG;}
  rewind(fp);

  /* Determine the field separator (unless set outside) */
  if(csv->separator==(char)0) {
#if(0)  
    /* Check if ; character is found outside double quotes */
    inside_quotes=0; nr=0;
    while((ret=fgetc(fp))!=EOF) {
      if(ret=='"') {
        if(inside_quotes==0) inside_quotes=1; else inside_quotes=0;
	continue;
      }
      if(inside_quotes==1) continue;
      if(ret==';') nr++;
    }
    if(verbose>1) printf("semicolon_nr := %d\n", nr);
    /* If at least one, then assume that ; is the separator, otherwise , */
    if(nr>0) csv->separator=';'; else csv->separator=',';
    rewind(fp);
#else
    csvSeparator(fp, &csv->separator, NULL, verbose-6);
#endif
  }
  if(verbose>1) printf("separator := '%c'\n", csv->separator);

  /* Determine the number of fields in CSV file */
  inside_quotes=0; nr=0; previous=0;
  while((ret=fgetc(fp))!=EOF) {
    if(ret=='"') {
      if(inside_quotes==0) inside_quotes=1; else inside_quotes=0;
      previous=ret; continue;
    }
    if(inside_quotes==0) {
      if(ret==csv->separator) {
        nr++; previous=ret;
	continue;
      }
      if( (ret==13 || ret==10) && previous!=13 && previous!=10) {
        nr++; previous=ret;
	continue;
      }
    } //printf("%c", (char)ret);
    previous=ret;
  }
  rewind(fp); if(verbose>1) printf("field_nr := %d\n", nr);

  /* Allocate memory for fields */
  csv->c=(CSV_item*)calloc(nr, sizeof(CSV_item));
  if(csv->c==NULL) {fclose(fp); return TACIO_OUTOFMEMORY;}
  csv->nr=nr;

  /* Copy field contents from CSV file */
  inside_quotes=0; nr=0; previous=0; i=0; col_nr=0;
  while((ret=fgetc(fp))!=EOF) {
    if(ret=='"') {
      if(inside_quotes==0) inside_quotes=1; else inside_quotes=0;
      previous=ret; continue;
    }
    if(inside_quotes==0) {
      if(ret==csv->separator) {
        buf[i]=(char)0; //printf("'%s'\n", buf);
        if(i>0) {
          csv->c[nr].content=(char*)malloc(i+1);
          if(csv->c[nr].content!=NULL) strcpy(csv->c[nr].content, buf);
        } //printf("\n---\n");
        csv->c[nr].row=1+csv->row_nr; csv->c[nr].col=1+col_nr;
	i=0; nr++; col_nr++; previous=ret;
	continue;
      }
      if( (ret==13 || ret==10) && previous!=13 && previous!=10) {
        buf[i]=(char)0; //printf("'%s'\n", buf); 
        if(i>0) {
          csv->c[nr].content=(char*)malloc(i+1);
          if(csv->c[nr].content!=NULL) strcpy(csv->c[nr].content, buf);
        }
        //printf("\n---\n");
        col_nr++; if(col_nr>csv->col_nr) csv->col_nr=col_nr;
        csv->c[nr].row=1+csv->row_nr; csv->c[nr].col=col_nr;
        i=0; nr++; col_nr=0; previous=ret; csv->row_nr++;
	continue;
      }
    }
    if(i<MAX_CSV_FIELD_LENGTH) buf[i]=(char)ret; i++;
    previous=ret;
  }
  
  fclose(fp);
  return TACIO_OK;
}
/*****************************************************************************/

/*****************************************************************************/
/** Print contents of CSV struct in stdout */
void csvPrint(
  /** Pointer to struct containing CSV data */
  CSV *csv
) {
  int i, row;
  if(csv==NULL) {printf("csv := NULL\n"); return;}
  if(csv->nr<1) {printf("csv := empty\n"); return;}
  printf("csv_nr := %d\n", csv->nr);
  printf("csv_row_nr := %d\n", csv->row_nr);
  printf("csv_col_nr := %d\n", csv->col_nr);
  printf("csv_separator := %c\n", csv->separator);
  row=0;
  for(i=0; i<csv->nr; i++) {
    if(csv->c[i].row>row) {row=csv->c[i].row; printf("\n");}
    else if(csv->c[i].col>1) printf("\t");
    if(csv->c[i].content!=NULL) printf("'%s'", csv->c[i].content);
    else printf("''");
  }
  printf("\n");
  return;
}
/*****************************************************************************/

/*****************************************************************************/
/** Write specified string into (CSV) file, opened for writing.
\return Returns 0 when successful.
 */
int csvWriteText(
  /** File pointer, opened for write */
  FILE *fp,
  /** Pointer to string which is written into fp; functions checks the
   *  nr of characters written; '\n' is not added. */
  const char *txt,
  /** Convert (1) or do not convert (0) commas to semicolons and
   *  dots to commas */
  int tointl
) {
  int n;
  char *cptr, *ctxt;

  if(fp==NULL) return(1);
  if(txt==NULL || strlen(txt)<1) return(0);
  ctxt=strdup(txt);
  if(tointl!=0) {
    while((cptr=strchr(ctxt, ','))!=NULL) *cptr=';';
    while((cptr=strchr(ctxt, '.'))!=NULL) *cptr=',';
  }
  n=fprintf(fp, "%s", ctxt);
  if(n<strlen(ctxt)) {free(ctxt); return(2);}
  free(ctxt);
  return(0);
}
/*****************************************************************************/

/*****************************************************************************/
/** Write CSV data into specified file using the column separator specified
 *  inside CSV struct. Field contents are written as they are, that is, no
 *  conversions for decimal separator is done here. 
\return Returns TACIO status.
 */
int csvWrite(
  /** Pointer to CSV struct, contents of which are to be written */
  CSV *csv,
  /** Name of file to write CSV contents in. If file exists, original file
      is renamed to a backup file. If string 'stdout' is given, then
      contents are written in stdout. */
  char *filename,
  /** Verbose level; if zero, nothing extra is printed into stdout */
  int verbose
) {
  char is_stdout=0;
  FILE *fp;

  if(verbose>0) printf("csvWrite(csv, '%s', ...)\n", filename);

  /* Check that there is some data to write */
  if(csv==NULL) return TACIO_FAULT;
  if(verbose>1) {
    printf("csv_nr := %d\n", csv->nr);
    printf("csv_row_nr := %d\n", csv->row_nr);
    printf("csv_col_nr := %d\n", csv->col_nr);
    printf("csv_separator := %c\n", csv->separator);
  }
  if(csv->nr<1) return TACIO_NOTABLE;

  /* Check if writing to stdout */
  if(!strcasecmp(filename, "stdout")) is_stdout=1;

  /* Check if file exists; backup, if necessary */
  if(!is_stdout) (void)backupExistingFile(filename, NULL, NULL);

  /* Open output file */
  if(is_stdout) fp=(FILE*)stdout;
  else {
    if(verbose>1) printf("opening file for write\n");
    if((fp=fopen(filename, "w")) == NULL) return TACIO_CANNOTWRITE;
  }

  /* Write file */
  if(verbose>1) printf("writing data\n");
  int i, row, n;
  row=0;
  for(i=0; i<csv->nr; i++) {
    if(csv->c[i].row>row) {row=csv->c[i].row; if(i>0) fprintf(fp, "\n");}
    else if(csv->c[i].col>1) fprintf(fp, "%c", csv->separator);
    if(csv->c[i].content!=NULL) fprintf(fp, "%s", csv->c[i].content);
  }
  n=fprintf(fp, "\n");

  /* Close file */
  fflush(fp); if(!is_stdout) fclose(fp);

  /* Quit */
  if(n<1) return TACIO_CANNOTWRITE;
  return(TACIO_OK);
}
/*****************************************************************************/

/*****************************************************************************/
