/** @file csvio.c
 *  @brief CSV file i/o functions.
 *  @todo csvRead() should have an option to read file only if it has no binary part.
 */
/*****************************************************************************/
#include "tpcclibConfig.h"
/*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>
/*****************************************************************************/
#include "tpccsv.h"
/*****************************************************************************/

/*****************************************************************************/
/** Write CSV data as a tab separated list into file opened for writing. 
    List contains the cell rows, columns, and values.

    Data is not sorted, and cell contents are written as they are, that is, no conversions for 
    decimal separator is done here.
    @return enum tpcerror (TPCERROR_OK when successful).
    @author Vesa Oikonen
    @sa csvWrite, csvSetDimensions, csvTrimRight
 */
int csvList(
  /** Pointer to CSV structure, contents of which are to be written. */
  CSV *csv,
  /** Output file pointer; usually stdout. */
  FILE *fp
) {
  if(fp==NULL) return TPCERROR_CANNOT_WRITE;
  if(csv==NULL || csv->nr<1) return TPCERROR_NO_DATA;

  for(int i=0; i<csv->nr; i++)
    if(fprintf(fp, "%d\t%d\t%s\n", 1+csv->c[i].row, 1+csv->c[i].col, csv->c[i].content)<5)
      return TPCERROR_CANNOT_WRITE;

  return(TPCERROR_OK);
}
/*****************************************************************************/

/*****************************************************************************/
/** Write CSV data into file opened for writing, using the column separator specified inside CSV structure. 

    Field contents are written as they are, that is, no conversions for decimal separator is done here. 
    @return enum tpcerror (TPCERROR_OK when successful).
    @author Vesa Oikonen
    @sa csvRead, csvList, csvSetDimensions, csvTrimRight
 */
int csvWrite(
  /** Pointer to CSV structure, contents of which are to be written. */
  CSV *csv,
  /** Forced regularization (1), or not (0);
      if regularized, then row_nr rows are written, each with col_nr columns;
      otherwise, empty rows are not written, and missing cells are not written to the end of rows. */
  int regular,
  /** Output file pointer. */
  FILE *fp,
  /** Pointer to status data; enter NULL if not needed. */
  TPCSTATUS *status
) {
  int verbose=0; if(status!=NULL) verbose=status->verbose;
  if(fp==NULL) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_CANNOT_WRITE);
    return TPCERROR_CANNOT_WRITE;
  }
  if(csv==NULL || csv->nr<1) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
    return TPCERROR_NO_DATA;
  }
  if(csv->separator!=',' && csv->separator!=';' && csv->separator!='\t' && csv->separator!=' ') {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_INVALID_SEPARATOR);
    return TPCERROR_INVALID_SEPARATOR;
  }
  if(verbose>10) {
    printf("%s():\n", __func__);
    printf("regular := %d\n", regular);
    printf("csv_nr := %d\n", csv->nr);
    printf("csv_row_nr := %d\n", csv->row_nr);
    printf("csv_col_nr := %d\n", csv->col_nr);
    if(csv->separator=='\t') printf("csv_separator := tab\n");
    else if(csv->separator==' ') printf("csv_separator := space\n");
    else printf("csv_separator := '%c'\n", csv->separator);
  }

  /* Write in file */
  int wn=0;
  for(int ri=0; ri<csv->row_nr; ri++) {
    int n=csv->col_nr;
    if(regular==0) {n=csvRowLength(csv, ri); if(n==0) continue;}
    for(int ci=0; ci<n; ci++) {
      if(ci>0) wn+=fprintf(fp, "%c", csv->separator);
      char *cptr=csvCell(csv, ri, ci);
      if(cptr!=NULL) wn+=fprintf(fp, "%s", cptr);
    }
    wn+=fprintf(fp, "\n");
  }
  if(wn<1) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_CANNOT_WRITE);
    return TPCERROR_CANNOT_WRITE;
  }

  statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
  return(TPCERROR_OK);
}
/*****************************************************************************/

/*****************************************************************************/
/** Read CSV file contents into CSV structure, allocating memory as needed.

    Lines consisting only of space characters, including tabs, are not read.
    Partial support for spaces as delimiters. 

    @return enum tpcerror (TPCERROR_OK when successful).
    @pre Before first use initialize the CSV structure with csvInit().
    @post After last use free memory in the CSV structure with csvFree().
    @bug File is assumed to be relatively well-formatted. Specifically,
     both tabs and spaces must not be used as field delimiters inside one file. 
    @author Vesa Oikonen
    @sa csvInit, csvFree, csvWrite, csvSearchField, csvCell, csvCellReplace, csvIsRegular
 */
int csvRead(
  /** Pointer to CSV to read into; any previous contents of CSV are preserved. */
  CSV *csv,
  /** Input file pointer. */
  FILE *fp,
  /** Pointer to status data; enter NULL if not needed. */
  TPCSTATUS *status
) {
  statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_FAIL);
  if(csv==NULL || fp==NULL) return TPCERROR_FAIL;
  int verbose=0; if(status!=NULL) verbose=status->verbose;
  if(verbose>10) printf("%s()\n", __func__);

  /* Get the size of the ASCII part of the file */
  size_t fsize=asciiFileSize(fp, NULL);
  if(verbose>11) printf("  ASCII size := %d\n", (int)fsize);
  /* If ASCII part is too small, then lets consider that an error */
  if(fsize<1) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
    return TPCERROR_NO_DATA;
  }
  /* If ASCII part is too large, then lets consider that an error */
  if(fsize>50000000) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_TOO_BIG);
    return TPCERROR_TOO_BIG;
  }
  /* Read that to a string */
  rewind(fp);
  char *data;
  data=asciiFileRead(fp, NULL, fsize+1); rewind(fp);
  if(data==NULL) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
    return TPCERROR_NO_DATA;
  }
  if(verbose>20) printf("  ASCII file read\n");
  
  /* Read one line at a time from the string and determine the field and decimal separators */
  int i=0, j;
  int tab_nr=0, sem_nr=0, com_nr=0, dot_nr=0, spa_nr=0;
  char *cptr, *line, *lptr;
  cptr=data;
  while((line=strTokenDup(cptr, "\n\r", &j))!=NULL) {
    if(verbose>80) printf("line='%s'\n", line);
    /* If line starts with '#' then jump over it */
    if(line[0]=='#') {free(line); cptr+=j; continue;}
    /* If line contains only space characters then jump over it */
    if(strIsSpaceOnly(line)) {free(line); cptr+=j; continue;}
    /* Compute the nr of dots, commas etc outside quotes */
    lptr=line; while((lptr=strstrNoQuotation(lptr, "\t"))!=NULL) {tab_nr++;lptr++;}
    lptr=line; while((lptr=strstrNoQuotation(lptr, ";"))!=NULL) {sem_nr++; lptr++;}
    lptr=line; while((lptr=strstrNoQuotation(lptr, ","))!=NULL) {com_nr++; lptr++;}
    lptr=line; while((lptr=strstrNoQuotation(lptr, "."))!=NULL) {dot_nr++; lptr++;}
    lptr=line; while((lptr=strstrNoQuotation(lptr, " "))!=NULL) {spa_nr++; lptr++;}
    free(line); cptr+=j; i++;
  }
  if(verbose>10) {
    printf("dataline_nr := %d\n", i);
    printf("semicolon_nr := %d\n", sem_nr);
    printf("tabulator_nr := %d\n", tab_nr);
    printf("dot_nr := %d\n", dot_nr);
    printf("comma_nr := %d\n", com_nr);
    printf("space_nr := %d\n", spa_nr);
  }
  if(sem_nr==0 && tab_nr==0 && dot_nr==0 && com_nr==0 && spa_nr==0) {
    csv->separator='\t'; // the default
  } else if(sem_nr>0) {
    // If at least one semi-colon, then assume that it is the field separator
    csv->separator=';';
  } else if(tab_nr>0) {
    // If at least one tab, then assume that it is the field separator
    csv->separator='\t';
  } else if(spa_nr==0) {
    // If no spaces, then comma must be the field separator
    csv->separator=',';
  } else {
    // Spaces exist, so is space or comma the field separator ?
    if(com_nr==0) {
      // No commas, thus space is probably field separator
      csv->separator=' ';
    } else if(dot_nr>0) {
      // Dots and commas exist, probably decimal point, and comma as field separator
      csv->separator=',';
    } else {
      // No dots, but commas and spaces; lets assume that the more frequent one is the field separator
      if(com_nr>spa_nr) csv->separator=','; else csv->separator=' ';
    }
  }
  if(verbose>10) {
    if(csv->separator=='\t') printf("field_separator := tab\n");
    else if(csv->separator==' ') printf("field_separator := space\n");
    else printf("field_separator := %c\n", csv->separator);
  }

  /* Copy field values into CSV */
  cptr=data; i=0; int ret;
  while((line=strTokenDup(cptr, "\n\r", &j))!=NULL) {
    /* If line starts with '#' then jump over it */
    if(line[0]=='#') {free(line); cptr+=j; continue;}
    /* If line contains only space characters then jump over it */
    if(strIsSpaceOnly(line)) {free(line); cptr+=j; continue;}
    /* Write contents into CSV as a new data row */
    if(csv->separator!=' ') ret=csvPutLine(csv, line, status);
    else ret=csvPutLineWithSpaces(csv, line, status);
    if(verbose>1 && ret!=0) fprintf(stderr, "Warning: cannot read line %d: '%s'.\n", i, line);
    /* Prepare for the next line */
    free(line); cptr+=j; i++;
  }
  if(i==0) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
    return TPCERROR_NO_DATA;
  }

  free(data);
  statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
  return(TPCERROR_OK);
}
/*****************************************************************************/

/*****************************************************************************/
/** Process a given text line (string) to add a new row of fields to CSV,
    using as field delimiter the character specified in CSV structure.
    @return tpcerror (TPCERROR_OK when successful).
    @pre Before first use initialize the CSV structure with csvInit().
    @post Remember to free the memory in CSV after last use with csvFree().
    @author Vesa Oikonen
    @sa csvInit, csvFree, csvWrite, csvPutString, csvPutInt, csvPutLineWithSpaces
 */
int csvPutLine(
  /** Pointer to initiated CSV; previous contents are not changed. */
  CSV *csv,
  /** Pointer to the CSV file line to be processed. */
  const char *line,
  /** Pointer to status data; enter NULL if not needed. */
  TPCSTATUS *status
) {
  if(csv==NULL) return TPCERROR_FAIL;
  if(line==NULL || strlen(line)<1) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
    return TPCERROR_NO_DATA;
  }
  int verbose=0; if(status!=NULL) verbose=status->verbose;
  if(verbose>10) printf("%s():\n", __func__);
  if(verbose>12) printf("'%s'\n", line);

  //size_t len=strlen(line);
  char delimiter=csv->separator;

  /* Space is not supported here */
  if(delimiter==' ') {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_INVALID_SEPARATOR);
    return TPCERROR_INVALID_SEPARATOR;
  }

  int ret, i, last_was_delim=0;
  int field_nr=0;
  char *cptr=(char*)line, *lptr, *s;
  /* If the first character is delimiter, we have had an empty field */
  if(*cptr==delimiter) {
    if(verbose>20) printf("first char is delimiter.\n");
    ret=csvPutString(csv, "", !field_nr); if(ret!=TPCERROR_OK) {
      statusSet(status, __func__, __FILE__, __LINE__, ret);
      return ret;
    }
    last_was_delim=1; cptr++; field_nr++;
  }
  /* Read all fields */
  int single_quotation=0;
  int double_quotation=0;
  lptr=cptr;
  while(*cptr && *lptr) {
    if(verbose>20) printf("cptr='%s'\n", cptr);
    /* Read next field */
    lptr=cptr; i=0;
    while(*lptr) {
      // jump over quoted sequences
      if(*lptr=='\'') {
        if(single_quotation==0 && strchr(lptr+1, '\'')!=NULL) single_quotation=1;
        else single_quotation=0;
        lptr++; i++; continue;
      }
      if(*lptr=='\"') {
        if(double_quotation==0 && strchr(lptr+1, '\"')!=NULL) double_quotation=1;
        else double_quotation=0;
        lptr++; i++; continue;
      }
      if(single_quotation==1 || double_quotation==1) {lptr++; i++; continue;}
      // if this character is the delimiter, then stop
      if(*lptr==delimiter) break;
      // otherwise continue search
      lptr++; i++;
    }
    s=strndup(cptr, i);
    if(verbose>20) printf("  s='%s'\n", s);
    ret=csvPutString(csv, s, !field_nr); if(ret!=TPCERROR_OK) {
      statusSet(status, __func__, __FILE__, __LINE__, ret);
      free(s); return ret;
    }
    free(s); field_nr++;
    if(*lptr==delimiter) {
      last_was_delim=1; cptr+=(i+1);
    } else {last_was_delim=0; cptr+=(i+1);}
  }
  if(verbose>20) printf("line finished.\n");
  /* If the last character is delimiter, we have an empty field in the end */
  if(last_was_delim) {
    if(verbose>20) printf("last char is delimiter.\n");
    ret=csvPutString(csv, "", !field_nr); if(ret!=TPCERROR_OK) {
      statusSet(status, __func__, __FILE__, __LINE__, ret);
      return ret;
    }
    field_nr++;
  }
  if(verbose>20) printf("ending %s()\n", __func__);
  statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
  return(TPCERROR_OK);
}
/*****************************************************************************/

/*****************************************************************************/
/** Process a given text line (string) to add a new row of fields to CSV,
    using spaces as field delimiters, independent on what is told in CSV struct.
    @return tpcerror (TPCERROR_OK when successful).
    @pre Before first use initialize the CSV struct with csvInit().
    @post Remember to free the memory in CSV after last use with csvFree().
    @author Vesa Oikonen
    @sa csvPutLine, csvRemoveEmptyLines, csvRead
 */
int csvPutLineWithSpaces(
  /** Pointer to initiated CSV; previous contents are not changed. */
  CSV *csv,
  /** Pointer to the CSV file line to be processed. */
  const char *line,
  /** Pointer to status data; enter NULL if not needed. */
  TPCSTATUS *status
) {
  if(csv==NULL) return TPCERROR_FAIL;
  if(line==NULL || strlen(line)<1) {
    statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
    return TPCERROR_NO_DATA;
  }
  int verbose=0; if(status!=NULL) verbose=status->verbose;
  if(verbose>10) printf("%s():\n", __func__);
  if(verbose>12) printf("'%s'\n", line);

  char *cptr=(char*)line;
  char *lptr=cptr, *s;
  int single_quotation=0;
  int double_quotation=0;
  int ret;
  size_t j;
  int field_nr=0;
  while(*cptr && *lptr) {
    if(verbose>20) printf("cptr='%s'\n", cptr);
    // Pass the spaces
    j=strspn(cptr, " \t\n\r"); cptr+=j; if(!cptr) break;
    // Find the end of token
    lptr=cptr; j=0;
    while(*lptr) {
      // jump over quoted sequences */
      if(*lptr=='\'') {
        if(single_quotation==0 && strchr(lptr+1, '\'')!=NULL) single_quotation=1;
        else single_quotation=0;
        lptr++; j++; continue;
      }
      if(*lptr=='\"') {
        if(double_quotation==0 && strchr(lptr+1, '\"')!=NULL) double_quotation=1;
        else double_quotation=0;
        lptr++; j++; continue;
      }
      if(single_quotation==1 || double_quotation==1) {lptr++; j++; continue;}
      // if this character is the delimiter, then stop
      if(*lptr==' ') break;
      // otherwise continue search
      lptr++; j++;
    }
    if(j==0) break;
    s=strndup(cptr, j);
    if(verbose>20) printf("  s='%s'\n", s);
    ret=csvPutString(csv, s, !field_nr); if(ret!=TPCERROR_OK) {
      statusSet(status, __func__, __FILE__, __LINE__, ret);
      free(s); return ret;
    }
    free(s); cptr+=j; field_nr++;
    if(verbose>20) printf("  csv.nr=%d\n", csv->nr);
  }

  statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
  return(TPCERROR_OK);
}
/*****************************************************************************/

/*****************************************************************************/
/** Removes any initial and trailing space characters from CSV. 
    Space characters in the middle of the string are not removed.
    @return tpcerror (TPCERROR_OK when successful).
    @author Vesa Oikonen
    @sa csvRemoveEmptyLines, csvRead, strCleanSpaces
 */
int csvCleanSpaces(
  /** Pointer to CSV, the content of which is to be cleaned. */
  CSV *csv
) {
  if(csv==NULL) return TPCERROR_FAIL;
  for(int i=0; i<csv->nr; i++) strCleanSpaces(csv->c[i].content);
  return(TPCERROR_OK);
}
/*****************************************************************************/

/*****************************************************************************/
