TPCCLIB
Loading...
Searching...
No Matches
htmlfind.c
Go to the documentation of this file.
1
7/*****************************************************************************/
8#include "tpcclibConfig.h"
9/*****************************************************************************/
10#include <stdio.h>
11#include <stdlib.h>
12#include <unistd.h>
13#include <string.h>
14#include <ctype.h>
15#include <dirent.h>
16#include <sys/stat.h>
17/*****************************************************************************/
18#include "libtpcmisc.h"
19/*****************************************************************************/
20/* Global variables */
22int html_replace_mode=0;
24int html_case_sensitive=0;
26int html_file_nr=0;
28int html_find_nr=0;
29/*****************************************************************************/
30
31/*****************************************************************************/
33static char *info[] = {
34 "Program for searching HTML and XHTML code in a specified file or in every",
35 "file with extensions *.htm, *.html and *.xhtml that are found under the",
36 "specified path. Optionally, the (X)HTML code fragment can be changed to",
37 "the given text in every place where it is found.",
38 " ",
39 "Usage: @P [options] path_or_file search_text",
40 " ",
41 "Options:",
42 " -replace <Substitute text>",
43 " Search text is replaced by specified text in every instance;",
44 " substitute must not contain certain special characters like '&'",
45 " -replace-from-file <Filename>",
46 " Search text is replaced in every instance by the contents of",
47 " the specified ASCII text file, including new line characters;",
48 " text length must not exceed 2047 characters",
49 " -case[-sensitive]",
50 " Search is case-sensitive. By default, upper-and lowercase letters",
51 " are considered equal",
52 " -stdoptions", // List standard options like --help, -v, etc
53 " ",
54 "Example 1:",
55 "Command for finding out if and where the specified HTML file",
56 "contains HTML code fragment '<a href=':",
57 " @P webpage.html \"<a href=\"",
58 " ",
59 "Example 2:",
60 "Command for replacing a web address with another in all HTML",
61 "files which can be found in the current directory and below it:",
62 " @P -replace www.new.fi . www.old.fi",
63 " ",
64 "Alternatively, in bash you can search a string in files using grep",
65 "from files with certain extension, for example:",
66 " grep \"string_to_search\" . -R --include \"*.c\" ",
67 " ",
68 "Keywords: HTML, tools",
69 0};
70/*****************************************************************************/
71
72/*****************************************************************************/
73/* Turn on the globbing of the command line, since it is disabled by default in
74 mingw-w64 (_dowildcard=0); in MinGW32 define _CRT_glob instead, if necessary;
75 In Unix&Linux wildcard command line processing is enabled by default. */
76/*
77#undef _CRT_glob
78#define _CRT_glob -1
79*/
80int _dowildcard = -1;
81/*****************************************************************************/
82
83/*****************************************************************************/
88int html_find(
90 char *htmlfile,
92 char *searchstring,
95 char *replacestring,
97 int verbose
98) {
99 FILE *fpi, *fpo=NULL;
100 int c, i, n=0, len, replen=0, line=1, column=0, last_c=0, found_nr=0;
101 char tempfile[FILENAME_MAX], *buf;
102
103
104 /* Check the input */
105 if(htmlfile==NULL || searchstring==NULL) return(-1);
106 if(strlen(htmlfile)<1) return(-1);
107 if(verbose>1) fprintf(stdout, "html_find(%s, %s, %s)\n",
108 htmlfile, searchstring, replacestring);
109
110 /* If searchstring was not specified, then filename is shown in any case */
111 len=strlen(searchstring);
112 if(len<1) {
113 if(verbose>=0) fprintf(stdout, "%s (1,1)\n", htmlfile);
114 return(0);
115 }
116
117 /* Allocate memory for the character buffer */
118 if(html_replace_mode==1) {
119 replen=strlen(replacestring);
120 buf=malloc(replen+128); if(buf==NULL) return(-2);
121 } else buf=NULL;
122
123
124 /*
125 * Try to find the search string
126 */
127 /* Open the HTML file */
128 if((fpi=fopen(htmlfile, "r"))==NULL) {
129 if(html_replace_mode==1) free(buf);
130 return(-3);
131 }
132 /* Open the output HTML file */
133 if(html_replace_mode==1) {
134 strcpy(tempfile, htmlfile); strcat(tempfile, ".bak");
135 if((fpo=fopen(tempfile, "w"))==NULL) {
136 fclose(fpi); free(buf); return(-4);}
137 }
138 /* Read the file */
139 while((c=fgetc(fpi))!=EOF) {
140 if(html_replace_mode==1) buf[n]=c;
141 /* ignore end of line characters */
142 if(c=='\n' || c=='\r') {
143 if(html_replace_mode==1 && n==0) {fputc(c, fpo);}
144 column=0;
145 if((c=='\n'&&last_c!='\r') || (c=='\r'&&last_c!='\n')) line++;
146 last_c=c; continue;
147 }
148 column++;
149 /* convert it to uppercase, if search is case-insensitive */
150 if(html_case_sensitive==0) c=toupper(c);
151 /* matching character ? */
152 if(c==searchstring[n]) {
153 n++;
154 if(n==len) { /* Match was found */
155 if(verbose>=0)
156 fprintf(stdout, "%s (%d,%d)\n", htmlfile, line, column);
157 n=0; found_nr++;
158 if(html_replace_mode==1) { /* replace */
159 i=0;
160 while(i<replen)
161 if(fputc(replacestring[i++], fpo)==EOF) { /* cannot write */
162 fclose(fpi); fclose(fpo); remove(tempfile); free(buf); return(-11);
163 }
164 }
165 }
166 } else { /* No match */
167 if(html_replace_mode==1) { /* write buf to temp file */
168 i=0;
169 while(i<=n) if(fputc(buf[i++], fpo)==EOF) { /* cannot write */
170 fclose(fpi); fclose(fpo); remove(tempfile); free(buf); return(-12);}
171 }
172 n=0;
173 }
174 last_c=c;
175 }
176 fclose(fpi);
177
178 if(html_replace_mode==1) {
179 fclose(fpo); free(buf);
180 if(found_nr==0)
181 remove(tempfile);
182 else {
183 remove(htmlfile); rename(tempfile, htmlfile);
184 if(verbose>0) printf("%d '%s'(s) substituted with '%s' in %s\n",
185 found_nr, searchstring, replacestring, htmlfile);
186 }
187 }
188 return(found_nr);
189}
190/*****************************************************************************/
191
192/*****************************************************************************/
198int html_recursive_find(
200 char *searchpath,
202 char *searchstring,
205 char *replacestring,
207 int verbose
208) {
209 DIR *dp;
210 struct dirent *de;
211 struct stat fst;
212 char tempname[FILENAME_MAX];
213 int ret, n;
214
215 if(verbose>1) printf("html_recursive_find(%s, %s, %s)\n",
216 searchpath, searchstring, replacestring);
217 /* Check the input */
218 if(searchpath==NULL || searchstring==NULL) return(1);
219 if(strlen(searchpath)<1) return(1);
220
221 /* Check whether searchpath is a directory */
222 stat(searchpath, &fst);
223 if(S_ISDIR(fst.st_mode)) { /* it is */
224 if(verbose>3) printf(" %s is directory\n", searchpath);
225 /* Open the directory */
226 dp=opendir(searchpath); if(dp==NULL) return(2);
227 /* Go throught the directory */
228 while((de=readdir(dp))!=NULL) {
229 if(verbose>5) printf("d_name='%s'\n", de->d_name);
230 if(de->d_name[0]=='.') continue; /* Ignore hidden and 'system' dirs */
231 /* Combine path and name */
232 sprintf(tempname, "%s/%s", searchpath, de->d_name);
233 if(verbose>3) printf("name='%s'\n", tempname);
234 /* Go for it (recursively) */
235 ret=html_recursive_find(tempname, searchstring, replacestring, verbose);
236 if(ret) {closedir(dp); return(ret);}
237 }
238 closedir(dp);
239 } else { /* it is a file */
240 /* Check if filename extension is .htm, .html, or .xhtml */
241 n=strlen(searchpath);
242 if((n>=5 && strcasecmp(searchpath+n-4, ".htm")==0) ||
243 (n>=6 && strcasecmp(searchpath+n-5, ".html")==0) ||
244 (n>=7 && strcasecmp(searchpath+n-6, ".xhtml")==0)) {
245 /* it is, so search/replace in it */
246 ret=html_find(searchpath, searchstring, replacestring, verbose);
247 if(verbose>8) printf(" html_find(%s, %s)=%d\n",
248 tempname, searchstring, ret);
249 if(ret>0) {
250 html_file_nr++; html_find_nr+=ret;
251 }
252 }
253 }
254 return(0);
255}
256/*****************************************************************************/
257
258/*****************************************************************************/
262int main(int argc, char *argv[])
263{
264 int ai, help=0, version=0, verbose=1;
265 unsigned int i;
266 int ret;
267 char *cptr;
268 char searchpath[FILENAME_MAX], searchstring[1024], replacestring[2048];
269
270
271 /*
272 * Get arguments
273 */
274 if(argc==1) {tpcPrintUsage(argv[0], info, stderr); return(1);}
275 searchpath[0]=searchstring[0]=replacestring[0]=(char)0;
276 /* Options */
277 for(ai=1; ai<argc; ai++) if(*argv[ai]=='-') {
278 if(tpcProcessStdOptions(argv[ai], &help, &version, &verbose)==0) continue;
279 cptr=argv[ai]+1;
280 if(strcasecmp(cptr, "CASE-INSENSITIVE")==0) {
281 html_case_sensitive=0; continue;
282 } else if(strncasecmp(cptr, "CASE-SENSITIVE", 4)==0) {
283 html_case_sensitive=1; continue;
284 } else if(strcasecmp(cptr, "REPLACE")==0) {
285 html_replace_mode=1; ai++;
286 if(ai<argc) {strcpy(replacestring, argv[ai]); continue;}
287 } else if(strcasecmp(cptr, "REPLACE-FROM-FILE")==0) {
288 html_replace_mode=1; ai++;
289 if(ai<argc) {
290 FILE *fp; int c; fp=fopen(argv[ai], "r");
291 if(fp!=NULL) {
292 if(verbose>1) printf("reading %s\n", argv[ai]);
293 i=0; c=fgetc(fp);
294 while(c!=EOF && i<2047) {replacestring[i++]=c; c=fgetc(fp);}
295 replacestring[i]=(char)0;
296 if(feof(fp)==0) {
297 fprintf(stderr, "Error: too much contents in %s\n.\n", argv[ai]);
298 fclose(fp); return(1);
299 }
300 fclose(fp); if(i>0) continue;
301 }
302 }
303 }
304 fprintf(stderr, "Error: invalid option '%s'\n", argv[ai]);
305 return(1);
306 } else break;
307
308 /* Process other arguments, starting from the first non-option */
309 for(; ai<argc; ai++) {
310 if(!searchpath[0]) {
311 strcpy(searchpath, argv[ai]); continue;
312 } else if(!searchstring[0]) {
313 strcpy(searchstring, argv[ai]); continue;
314 }
315 fprintf(stderr, "Error: invalid argument '%s'.\n", argv[ai]);
316 return(1);
317 }
318 /* Print help or version? */
319 if(help==2) {tpcHtmlUsage(argv[0], info, ""); return(0);}
320 if(help) {tpcPrintUsage(argv[0], info, stdout); return(0);}
321 if(version) {tpcPrintBuild(argv[0], stdout); return(0);}
322 /* Is something missing? */
323 if(!searchstring[0]) {
324 tpcPrintUsage(argv[0], info, stdout); return(1);}
325 if(html_replace_mode && !replacestring[0]) {
326 tpcPrintUsage(argv[0], info, stdout); return(1);}
327
328 /* In verbose mode print arguments and options */
329 if(verbose>1) {
330 printf("searchpath := %s\n", searchpath);
331 printf("searchstring := %s\n", searchstring);
332 printf("replacestring := %s\n", replacestring);
333 printf("html_replace_mode := %d\n", html_replace_mode);
334 printf("html_case_sensitive := %d\n", html_case_sensitive);
335 }
336
337
338 /*
339 * Convert the search string to uppercase,
340 * if search is case-insensitive
341 */
342 if(html_case_sensitive==0)
343 for(i=0; i<strlen(searchstring); i++)
344 searchstring[i]=(char)toupper((int)searchstring[i]);
345
346 /*
347 * Search the HTML files recursively for the search string
348 */
349 ret=html_recursive_find(searchpath, searchstring, replacestring, verbose);
350 if(ret) {
351 fprintf(stderr, "Error in searching HTML files (%d).\n", ret);
352 return(2);
353 }
354 if(html_find_nr>0)
355 fprintf(stdout, "Search text was found %d times in %d file(s).\n",
356 html_find_nr, html_file_nr);
357 else
358 fprintf(stdout, "Search text was not found.\n");
359
360 return(0);
361}
362/*****************************************************************************/
363
364/*****************************************************************************/
Header file for libtpcmisc.
int tpcProcessStdOptions(const char *s, int *print_usage, int *print_version, int *verbose_level)
Definition proginfo.c:40
int tpcHtmlUsage(const char *program, char *text[], const char *path)
Definition proginfo.c:213
void tpcPrintBuild(const char *program, FILE *fp)
Definition proginfo.c:383
void tpcPrintUsage(const char *program, char *text[], FILE *fp)
Definition proginfo.c:158