TPCCLIB
Loading...
Searching...
No Matches
csvio.c File Reference

CSV file i/o functions. More...

#include "tpcclibConfig.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>
#include "tpccsv.h"

Go to the source code of this file.

Functions

int csvList (CSV *csv, FILE *fp)
 
int csvWrite (CSV *csv, int regular, FILE *fp, TPCSTATUS *status)
 
int csvRead (CSV *csv, FILE *fp, TPCSTATUS *status)
 
int csvPutLine (CSV *csv, const char *line, TPCSTATUS *status)
 
int csvPutLineWithSpaces (CSV *csv, const char *line, TPCSTATUS *status)
 
int csvCleanSpaces (CSV *csv)
 

Detailed Description

CSV file i/o functions.

Todo
csvRead() should have an option to read file only if it has no binary part.

Definition in file csvio.c.

Function Documentation

◆ csvCleanSpaces()

int csvCleanSpaces ( CSV * csv)

Removes any initial and trailing space characters from CSV. Space characters in the middle of the string are not removed.

Returns
tpcerror (TPCERROR_OK when successful).
Author
Vesa Oikonen
See also
csvRemoveEmptyLines, csvRead, strCleanSpaces
Parameters
csvPointer to CSV, the content of which is to be cleaned.

Definition at line 422 of file csvio.c.

425 {
426 if(csv==NULL) return TPCERROR_FAIL;
427 for(int i=0; i<csv->nr; i++) strCleanSpaces(csv->c[i].content);
428 return(TPCERROR_OK);
429}
int strCleanSpaces(char *s)
Definition stringext.c:300
char * content
Definition tpccsv.h:30
CSV_item * c
Definition tpccsv.h:38
int nr
Definition tpccsv.h:42
@ TPCERROR_FAIL
General error.
@ TPCERROR_OK
No error.

Referenced by parReadCSV().

◆ csvList()

int csvList ( CSV * csv,
FILE * fp )

Write CSV data as a tab separated list into file opened for writing. List contains the cell rows, columns, and values.

Data is not sorted, and cell contents are written as they are, that is, no conversions for decimal separator is done here.

Returns
enum tpcerror (TPCERROR_OK when successful).
Author
Vesa Oikonen
See also
csvWrite, csvSetDimensions, csvTrimRight
Parameters
csvPointer to CSV structure, contents of which are to be written.
fpOutput file pointer; usually stdout.

Definition at line 27 of file csvio.c.

32 {
33 if(fp==NULL) return TPCERROR_CANNOT_WRITE;
34 if(csv==NULL || csv->nr<1) return TPCERROR_NO_DATA;
35
36 for(int i=0; i<csv->nr; i++)
37 if(fprintf(fp, "%d\t%d\t%s\n", 1+csv->c[i].row, 1+csv->c[i].col, csv->c[i].content)<5)
39
40 return(TPCERROR_OK);
41}
int col
Definition tpccsv.h:28
int row
Definition tpccsv.h:26
@ TPCERROR_NO_DATA
File contains no data.
@ TPCERROR_CANNOT_WRITE
Cannot write file.

Referenced by tacReadSIF().

◆ csvPutLine()

int csvPutLine ( CSV * csv,
const char * line,
TPCSTATUS * status )

Process a given text line (string) to add a new row of fields to CSV, using as field delimiter the character specified in CSV structure.

Returns
tpcerror (TPCERROR_OK when successful).
Precondition
Before first use initialize the CSV structure with csvInit().
Postcondition
Remember to free the memory in CSV after last use with csvFree().
Author
Vesa Oikonen
See also
csvInit, csvFree, csvWrite, csvPutString, csvPutInt, csvPutLineWithSpaces
Parameters
csvPointer to initiated CSV; previous contents are not changed.
linePointer to the CSV file line to be processed.
statusPointer to status data; enter NULL if not needed.

Definition at line 251 of file csvio.c.

258 {
259 if(csv==NULL) return TPCERROR_FAIL;
260 if(line==NULL || strlen(line)<1) {
261 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
262 return TPCERROR_NO_DATA;
263 }
264 int verbose=0; if(status!=NULL) verbose=status->verbose;
265 if(verbose>10) printf("%s():\n", __func__);
266 if(verbose>12) printf("'%s'\n", line);
267
268 //size_t len=strlen(line);
269 char delimiter=csv->separator;
270
271 /* Space is not supported here */
272 if(delimiter==' ') {
273 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_INVALID_SEPARATOR);
275 }
276
277 int ret, i, last_was_delim=0;
278 int field_nr=0;
279 char *cptr=(char*)line, *lptr, *s;
280 /* If the first character is delimiter, we have had an empty field */
281 if(*cptr==delimiter) {
282 if(verbose>20) printf("first char is delimiter.\n");
283 ret=csvPutString(csv, "", !field_nr); if(ret!=TPCERROR_OK) {
284 statusSet(status, __func__, __FILE__, __LINE__, ret);
285 return ret;
286 }
287 last_was_delim=1; cptr++; field_nr++;
288 }
289 /* Read all fields */
290 int single_quotation=0;
291 int double_quotation=0;
292 lptr=cptr;
293 while(*cptr && *lptr) {
294 if(verbose>20) printf("cptr='%s'\n", cptr);
295 /* Read next field */
296 lptr=cptr; i=0;
297 while(*lptr) {
298 // jump over quoted sequences
299 if(*lptr=='\'') {
300 if(single_quotation==0 && strchr(lptr+1, '\'')!=NULL) single_quotation=1;
301 else single_quotation=0;
302 lptr++; i++; continue;
303 }
304 if(*lptr=='\"') {
305 if(double_quotation==0 && strchr(lptr+1, '\"')!=NULL) double_quotation=1;
306 else double_quotation=0;
307 lptr++; i++; continue;
308 }
309 if(single_quotation==1 || double_quotation==1) {lptr++; i++; continue;}
310 // if this character is the delimiter, then stop
311 if(*lptr==delimiter) break;
312 // otherwise continue search
313 lptr++; i++;
314 }
315 s=strndup(cptr, i);
316 if(verbose>20) printf(" s='%s'\n", s);
317 ret=csvPutString(csv, s, !field_nr); if(ret!=TPCERROR_OK) {
318 statusSet(status, __func__, __FILE__, __LINE__, ret);
319 free(s); return ret;
320 }
321 free(s); field_nr++;
322 if(*lptr==delimiter) {
323 last_was_delim=1; cptr+=(i+1);
324 } else {last_was_delim=0; cptr+=(i+1);}
325 }
326 if(verbose>20) printf("line finished.\n");
327 /* If the last character is delimiter, we have an empty field in the end */
328 if(last_was_delim) {
329 if(verbose>20) printf("last char is delimiter.\n");
330 ret=csvPutString(csv, "", !field_nr); if(ret!=TPCERROR_OK) {
331 statusSet(status, __func__, __FILE__, __LINE__, ret);
332 return ret;
333 }
334 field_nr++;
335 }
336 if(verbose>20) printf("ending %s()\n", __func__);
337 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
338 return(TPCERROR_OK);
339}
int csvPutString(CSV *csv, const char *s, int newline)
Definition csv.c:144
void statusSet(TPCSTATUS *s, const char *func, const char *srcfile, int srcline, tpcerror error)
Definition statusmsg.c:142
char * strndup(const char *s, size_t n)
Definition stringext.c:205
char separator
Definition tpccsv.h:49
int verbose
Verbose level, used by statusPrint() etc.
@ TPCERROR_INVALID_SEPARATOR
Invalid field delimiter.

Referenced by csvRead().

◆ csvPutLineWithSpaces()

int csvPutLineWithSpaces ( CSV * csv,
const char * line,
TPCSTATUS * status )

Process a given text line (string) to add a new row of fields to CSV, using spaces as field delimiters, independent on what is told in CSV struct.

Returns
tpcerror (TPCERROR_OK when successful).
Precondition
Before first use initialize the CSV struct with csvInit().
Postcondition
Remember to free the memory in CSV after last use with csvFree().
Author
Vesa Oikonen
See also
csvPutLine, csvRemoveEmptyLines, csvRead
Parameters
csvPointer to initiated CSV; previous contents are not changed.
linePointer to the CSV file line to be processed.
statusPointer to status data; enter NULL if not needed.

Definition at line 351 of file csvio.c.

358 {
359 if(csv==NULL) return TPCERROR_FAIL;
360 if(line==NULL || strlen(line)<1) {
361 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
362 return TPCERROR_NO_DATA;
363 }
364 int verbose=0; if(status!=NULL) verbose=status->verbose;
365 if(verbose>10) printf("%s():\n", __func__);
366 if(verbose>12) printf("'%s'\n", line);
367
368 char *cptr=(char*)line;
369 char *lptr=cptr, *s;
370 int single_quotation=0;
371 int double_quotation=0;
372 int ret;
373 size_t j;
374 int field_nr=0;
375 while(*cptr && *lptr) {
376 if(verbose>20) printf("cptr='%s'\n", cptr);
377 // Pass the spaces
378 j=strspn(cptr, " \t\n\r"); cptr+=j; if(!cptr) break;
379 // Find the end of token
380 lptr=cptr; j=0;
381 while(*lptr) {
382 // jump over quoted sequences */
383 if(*lptr=='\'') {
384 if(single_quotation==0 && strchr(lptr+1, '\'')!=NULL) single_quotation=1;
385 else single_quotation=0;
386 lptr++; j++; continue;
387 }
388 if(*lptr=='\"') {
389 if(double_quotation==0 && strchr(lptr+1, '\"')!=NULL) double_quotation=1;
390 else double_quotation=0;
391 lptr++; j++; continue;
392 }
393 if(single_quotation==1 || double_quotation==1) {lptr++; j++; continue;}
394 // if this character is the delimiter, then stop
395 if(*lptr==' ') break;
396 // otherwise continue search
397 lptr++; j++;
398 }
399 if(j==0) break;
400 s=strndup(cptr, j);
401 if(verbose>20) printf(" s='%s'\n", s);
402 ret=csvPutString(csv, s, !field_nr); if(ret!=TPCERROR_OK) {
403 statusSet(status, __func__, __FILE__, __LINE__, ret);
404 free(s); return ret;
405 }
406 free(s); cptr+=j; field_nr++;
407 if(verbose>20) printf(" csv.nr=%d\n", csv->nr);
408 }
409
410 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
411 return(TPCERROR_OK);
412}

Referenced by csvRead().

◆ csvRead()

int csvRead ( CSV * csv,
FILE * fp,
TPCSTATUS * status )

Read CSV file contents into CSV structure, allocating memory as needed.

Lines consisting only of space characters, including tabs, are not read. Partial support for spaces as delimiters.

Returns
enum tpcerror (TPCERROR_OK when successful).
Precondition
Before first use initialize the CSV structure with csvInit().
Postcondition
After last use free memory in the CSV structure with csvFree().
Bug
File is assumed to be relatively well-formatted. Specifically, both tabs and spaces must not be used as field delimiters inside one file.
Author
Vesa Oikonen
See also
csvInit, csvFree, csvWrite, csvSearchField, csvCell, csvCellReplace, csvIsRegular
Parameters
csvPointer to CSV to read into; any previous contents of CSV are preserved.
fpInput file pointer.
statusPointer to status data; enter NULL if not needed.

Definition at line 124 of file csvio.c.

131 {
132 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_FAIL);
133 if(csv==NULL || fp==NULL) return TPCERROR_FAIL;
134 int verbose=0; if(status!=NULL) verbose=status->verbose;
135 if(verbose>10) printf("%s()\n", __func__);
136
137 /* Get the size of the ASCII part of the file */
138 size_t fsize=asciiFileSize(fp, NULL);
139 if(verbose>11) printf(" ASCII size := %d\n", (int)fsize);
140 /* If ASCII part is too small, then lets consider that an error */
141 if(fsize<1) {
142 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
143 return TPCERROR_NO_DATA;
144 }
145 /* If ASCII part is too large, then lets consider that an error */
146 if(fsize>50000000) {
147 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_TOO_BIG);
148 return TPCERROR_TOO_BIG;
149 }
150 /* Read that to a string */
151 rewind(fp);
152 char *data;
153 data=asciiFileRead(fp, NULL, fsize+1); rewind(fp);
154 if(data==NULL) {
155 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
156 return TPCERROR_NO_DATA;
157 }
158 if(verbose>20) printf(" ASCII file read\n");
159
160 /* Read one line at a time from the string and determine the field and decimal separators */
161 int i=0, j;
162 int tab_nr=0, sem_nr=0, com_nr=0, dot_nr=0, spa_nr=0;
163 char *cptr, *line, *lptr;
164 cptr=data;
165 while((line=strTokenDup(cptr, "\n\r", &j))!=NULL) {
166 if(verbose>80) printf("line='%s'\n", line);
167 /* If line starts with '#' then jump over it */
168 if(line[0]=='#') {free(line); cptr+=j; continue;}
169 /* If line contains only space characters then jump over it */
170 if(strIsSpaceOnly(line)) {free(line); cptr+=j; continue;}
171 /* Compute the nr of dots, commas etc outside quotes */
172 lptr=line; while((lptr=strstrNoQuotation(lptr, "\t"))!=NULL) {tab_nr++;lptr++;}
173 lptr=line; while((lptr=strstrNoQuotation(lptr, ";"))!=NULL) {sem_nr++; lptr++;}
174 lptr=line; while((lptr=strstrNoQuotation(lptr, ","))!=NULL) {com_nr++; lptr++;}
175 lptr=line; while((lptr=strstrNoQuotation(lptr, "."))!=NULL) {dot_nr++; lptr++;}
176 lptr=line; while((lptr=strstrNoQuotation(lptr, " "))!=NULL) {spa_nr++; lptr++;}
177 free(line); cptr+=j; i++;
178 }
179 if(verbose>10) {
180 printf("dataline_nr := %d\n", i);
181 printf("semicolon_nr := %d\n", sem_nr);
182 printf("tabulator_nr := %d\n", tab_nr);
183 printf("dot_nr := %d\n", dot_nr);
184 printf("comma_nr := %d\n", com_nr);
185 printf("space_nr := %d\n", spa_nr);
186 }
187 if(sem_nr==0 && tab_nr==0 && dot_nr==0 && com_nr==0 && spa_nr==0) {
188 csv->separator='\t'; // the default
189 } else if(sem_nr>0) {
190 // If at least one semi-colon, then assume that it is the field separator
191 csv->separator=';';
192 } else if(tab_nr>0) {
193 // If at least one tab, then assume that it is the field separator
194 csv->separator='\t';
195 } else if(spa_nr==0) {
196 // If no spaces, then comma must be the field separator
197 csv->separator=',';
198 } else {
199 // Spaces exist, so is space or comma the field separator ?
200 if(com_nr==0) {
201 // No commas, thus space is probably field separator
202 csv->separator=' ';
203 } else if(dot_nr>0) {
204 // Dots and commas exist, probably decimal point, and comma as field separator
205 csv->separator=',';
206 } else {
207 // No dots, but commas and spaces; lets assume that the more frequent one is the field separator
208 if(com_nr>spa_nr) csv->separator=','; else csv->separator=' ';
209 }
210 }
211 if(verbose>10) {
212 if(csv->separator=='\t') printf("field_separator := tab\n");
213 else if(csv->separator==' ') printf("field_separator := space\n");
214 else printf("field_separator := %c\n", csv->separator);
215 }
216
217 /* Copy field values into CSV */
218 cptr=data; i=0; int ret;
219 while((line=strTokenDup(cptr, "\n\r", &j))!=NULL) {
220 /* If line starts with '#' then jump over it */
221 if(line[0]=='#') {free(line); cptr+=j; continue;}
222 /* If line contains only space characters then jump over it */
223 if(strIsSpaceOnly(line)) {free(line); cptr+=j; continue;}
224 /* Write contents into CSV as a new data row */
225 if(csv->separator!=' ') ret=csvPutLine(csv, line, status);
226 else ret=csvPutLineWithSpaces(csv, line, status);
227 if(verbose>1 && ret!=0) fprintf(stderr, "Warning: cannot read line %d: '%s'.\n", i, line);
228 /* Prepare for the next line */
229 free(line); cptr+=j; i++;
230 }
231 if(i==0) {
232 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
233 return TPCERROR_NO_DATA;
234 }
235
236 free(data);
237 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
238 return(TPCERROR_OK);
239}
int csvPutLineWithSpaces(CSV *csv, const char *line, TPCSTATUS *status)
Definition csvio.c:351
int csvPutLine(CSV *csv, const char *line, TPCSTATUS *status)
Definition csvio.c:251
char * asciiFileRead(FILE *fp, char *data, size_t maxlen)
size_t asciiFileSize(FILE *fp, int *nonprintable)
char * strstrNoQuotation(const char *haystack, const char *needle)
Definition stringext.c:225
int strIsSpaceOnly(char *s)
Definition stringext.c:671
char * strTokenDup(const char *s1, const char *s2, int *next)
Definition stringext.c:413
@ TPCERROR_TOO_BIG
File is too big.

Referenced by parRead(), tacFormatDetermine(), and tacRead().

◆ csvWrite()

int csvWrite ( CSV * csv,
int regular,
FILE * fp,
TPCSTATUS * status )

Write CSV data into file opened for writing, using the column separator specified inside CSV structure.

Field contents are written as they are, that is, no conversions for decimal separator is done here.

Returns
enum tpcerror (TPCERROR_OK when successful).
Author
Vesa Oikonen
See also
csvRead, csvList, csvSetDimensions, csvTrimRight
Parameters
csvPointer to CSV structure, contents of which are to be written.
regularForced regularization (1), or not (0); if regularized, then row_nr rows are written, each with col_nr columns; otherwise, empty rows are not written, and missing cells are not written to the end of rows.
fpOutput file pointer.
statusPointer to status data; enter NULL if not needed.

Definition at line 52 of file csvio.c.

63 {
64 int verbose=0; if(status!=NULL) verbose=status->verbose;
65 if(fp==NULL) {
66 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_CANNOT_WRITE);
68 }
69 if(csv==NULL || csv->nr<1) {
70 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
71 return TPCERROR_NO_DATA;
72 }
73 if(csv->separator!=',' && csv->separator!=';' && csv->separator!='\t' && csv->separator!=' ') {
74 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_INVALID_SEPARATOR);
76 }
77 if(verbose>10) {
78 printf("%s():\n", __func__);
79 printf("regular := %d\n", regular);
80 printf("csv_nr := %d\n", csv->nr);
81 printf("csv_row_nr := %d\n", csv->row_nr);
82 printf("csv_col_nr := %d\n", csv->col_nr);
83 if(csv->separator=='\t') printf("csv_separator := tab\n");
84 else if(csv->separator==' ') printf("csv_separator := space\n");
85 else printf("csv_separator := '%c'\n", csv->separator);
86 }
87
88 /* Write in file */
89 int wn=0;
90 for(int ri=0; ri<csv->row_nr; ri++) {
91 int n=csv->col_nr;
92 if(regular==0) {n=csvRowLength(csv, ri); if(n==0) continue;}
93 for(int ci=0; ci<n; ci++) {
94 if(ci>0) wn+=fprintf(fp, "%c", csv->separator);
95 char *cptr=csvCell(csv, ri, ci);
96 if(cptr!=NULL) wn+=fprintf(fp, "%s", cptr);
97 }
98 wn+=fprintf(fp, "\n");
99 }
100 if(wn<1) {
101 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_CANNOT_WRITE);
103 }
104
105 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
106 return(TPCERROR_OK);
107}
char * csvCell(CSV *csv, int row, int col)
Definition csv.c:358
int csvRowLength(CSV *csv, int row)
Definition csv.c:244
int row_nr
Definition tpccsv.h:44
int col_nr
Definition tpccsv.h:46

Referenced by parRead(), parReadFIT(), parReadRES(), parWriteCSV(), tacRead(), tacReadSimple(), and tacWriteCSV().