/[pcre]/code/tags/pcre-4.5/pcregrep.c
ViewVC logotype

Contents of /code/tags/pcre-4.5/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 63 - (show annotations) (download)
Sat Feb 24 21:40:03 2007 UTC (7 years, 7 months ago) by nigel
Original Path: code/trunk/pcregrep.c
File MIME type: text/plain
File size: 15295 byte(s)
Load pcre-4.0 into code/trunk.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories. */
8
9 #include <ctype.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <stdlib.h>
13 #include <errno.h>
14 #include "config.h"
15 #include "pcre.h"
16
17 #define FALSE 0
18 #define TRUE 1
19
20 typedef int BOOL;
21
22 #define VERSION "3.0 14-Jan-2003"
23 #define MAX_PATTERN_COUNT 100
24
25
26 /*************************************************
27 * Global variables *
28 *************************************************/
29
30 static char *pattern_filename = NULL;
31 static int pattern_count = 0;
32 static pcre **pattern_list;
33 static pcre_extra **hints_list;
34
35 static BOOL count_only = FALSE;
36 static BOOL filenames = TRUE;
37 static BOOL filenames_only = FALSE;
38 static BOOL invert = FALSE;
39 static BOOL number = FALSE;
40 static BOOL recurse = FALSE;
41 static BOOL silent = FALSE;
42 static BOOL whole_lines = FALSE;
43
44 /* Structure for options and list of them */
45
46 typedef struct option_item {
47 int one_char;
48 char *long_name;
49 char *help_text;
50 } option_item;
51
52 static option_item optionlist[] = {
53 { -1, "help", "display this help and exit" },
54 { 'c', "count", "print only a count of matching lines per FILE" },
55 { 'h', "no-filename", "suppress the prefixing filename on output" },
56 { 'i', "ignore-case", "ignore case distinctions" },
57 { 'l', "files-with-matches", "print only FILE names containing matches" },
58 { 'n', "line-number", "print line number with output lines" },
59 { 'r', "recursive", "recursively scan sub-directories" },
60 { 's', "no-messages", "suppress error messages" },
61 { 'u', "utf-8", "use UTF-8 mode" },
62 { 'V', "version", "print version information and exit" },
63 { 'v', "invert-match", "select non-matching lines" },
64 { 'x', "line-regex", "force PATTERN to match only whole lines" },
65 { 'x', "line-regexp", "force PATTERN to match only whole lines" },
66 { 0, NULL, NULL }
67 };
68
69
70 /*************************************************
71 * Functions for directory scanning *
72 *************************************************/
73
74 /* These functions are defined so that they can be made system specific,
75 although at present the only ones are for Unix, Win32, and for "no directory
76 recursion support". */
77
78
79 /************* Directory scanning in Unix ***********/
80
81 #if IS_UNIX
82 #include <sys/types.h>
83 #include <sys/stat.h>
84 #include <dirent.h>
85
86 typedef DIR directory_type;
87
88 int
89 isdirectory(char *filename)
90 {
91 struct stat statbuf;
92 if (stat(filename, &statbuf) < 0)
93 return 0; /* In the expectation that opening as a file will fail */
94 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
95 }
96
97 directory_type *
98 opendirectory(char *filename)
99 {
100 return opendir(filename);
101 }
102
103 char *
104 readdirectory(directory_type *dir)
105 {
106 for (;;)
107 {
108 struct dirent *dent = readdir(dir);
109 if (dent == NULL) return NULL;
110 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
111 return dent->d_name;
112 }
113 return NULL; /* Keep compiler happy; never executed */
114 }
115
116 void
117 closedirectory(directory_type *dir)
118 {
119 closedir(dir);
120 }
121
122
123 /************* Directory scanning in Win32 ***********/
124
125 /* I (Philip Hazel) have no means of testing this code. It was contributed by
126 Lionel Fourquaux. */
127
128
129 #elif HAVE_WIN32API
130
131 #ifndef STRICT
132 # define STRICT
133 #endif
134 #ifndef WIN32_LEAN_AND_MEAN
135 # define WIN32_LEAN_AND_MEAN
136 #endif
137 #include <windows.h>
138
139 typedef struct directory_type
140 {
141 HANDLE handle;
142 BOOL first;
143 WIN32_FIND_DATA data;
144 } directory_type;
145
146 int
147 isdirectory(char *filename)
148 {
149 DWORD attr = GetFileAttributes(filename);
150 if (attr == INVALID_FILE_ATTRIBUTES)
151 return 0;
152 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
153 }
154
155 directory_type *
156 opendirectory(char *filename)
157 {
158 size_t len;
159 char *pattern;
160 directory_type *dir;
161 DWORD err;
162 len = strlen(filename);
163 pattern = (char *) malloc(len + 3);
164 dir = (directory_type *) malloc(sizeof(*dir));
165 if ((pattern == NULL) || (dir == NULL))
166 {
167 fprintf(stderr, "pcregrep: malloc failed\n");
168 exit(2);
169 }
170 memcpy(pattern, filename, len);
171 memcpy(&(pattern[len]), "\\*", 3);
172 dir->handle = FindFirstFile(pattern, &(dir->data));
173 if (dir->handle != INVALID_HANDLE_VALUE)
174 {
175 free(pattern);
176 dir->first = TRUE;
177 return dir;
178 }
179 err = GetLastError();
180 free(pattern);
181 free(dir);
182 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
183 return NULL;
184 }
185
186 char *
187 readdirectory(directory_type *dir)
188 {
189 for (;;)
190 {
191 if (!dir->first)
192 {
193 if (!FindNextFile(dir->handle, &(dir->data)))
194 return NULL;
195 }
196 else
197 {
198 dir->first = FALSE;
199 }
200 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
201 return dir->data.cFileName;
202 }
203 #ifndef _MSC_VER
204 return NULL; /* Keep compiler happy; never executed */
205 #endif
206 }
207
208 void
209 closedirectory(directory_type *dir)
210 {
211 FindClose(dir->handle);
212 free(dir);
213 }
214
215
216 /************* Directory scanning when we can't do it ***********/
217
218 /* The type is void, and apart from isdirectory(), the functions do nothing. */
219
220 #else
221
222 typedef void directory_type;
223
224 int isdirectory(char *filename) { return FALSE; }
225 directory_type * opendirectory(char *filename) {}
226 char *readdirectory(directory_type *dir) {}
227 void closedirectory(directory_type *dir) {}
228
229 #endif
230
231
232
233 #if ! HAVE_STRERROR
234 /*************************************************
235 * Provide strerror() for non-ANSI libraries *
236 *************************************************/
237
238 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
239 in their libraries, but can provide the same facility by this simple
240 alternative function. */
241
242 extern int sys_nerr;
243 extern char *sys_errlist[];
244
245 char *
246 strerror(int n)
247 {
248 if (n < 0 || n >= sys_nerr) return "unknown error number";
249 return sys_errlist[n];
250 }
251 #endif /* HAVE_STRERROR */
252
253
254
255 /*************************************************
256 * Grep an individual file *
257 *************************************************/
258
259 static int
260 pcregrep(FILE *in, char *name)
261 {
262 int rc = 1;
263 int linenumber = 0;
264 int count = 0;
265 int offsets[99];
266 char buffer[BUFSIZ];
267
268 while (fgets(buffer, sizeof(buffer), in) != NULL)
269 {
270 BOOL match = FALSE;
271 int i;
272 int length = (int)strlen(buffer);
273 if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;
274 linenumber++;
275
276 for (i = 0; !match && i < pattern_count; i++)
277 {
278 match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,
279 offsets, 99) >= 0;
280 if (match && whole_lines && offsets[1] != length) match = FALSE;
281 }
282
283 if (match != invert)
284 {
285 if (count_only) count++;
286
287 else if (filenames_only)
288 {
289 fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);
290 return 0;
291 }
292
293 else if (silent) return 0;
294
295 else
296 {
297 if (name != NULL) fprintf(stdout, "%s:", name);
298 if (number) fprintf(stdout, "%d:", linenumber);
299 fprintf(stdout, "%s\n", buffer);
300 }
301
302 rc = 0;
303 }
304 }
305
306 if (count_only)
307 {
308 if (name != NULL) fprintf(stdout, "%s:", name);
309 fprintf(stdout, "%d\n", count);
310 }
311
312 return rc;
313 }
314
315
316
317
318 /*************************************************
319 * Grep a file or recurse into a directory *
320 *************************************************/
321
322 static int
323 grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,
324 BOOL only_one_at_top)
325 {
326 int rc = 1;
327 int sep;
328 FILE *in;
329
330 /* If the file is a directory and we are recursing, scan each file within it.
331 The scanning code is localized so it can be made system-specific. */
332
333 if ((sep = isdirectory(filename)) != 0 && recurse)
334 {
335 char buffer[1024];
336 char *nextfile;
337 directory_type *dir = opendirectory(filename);
338
339 if (dir == NULL)
340 {
341 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,
342 strerror(errno));
343 return 2;
344 }
345
346 while ((nextfile = readdirectory(dir)) != NULL)
347 {
348 int frc;
349 sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);
350 frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);
351 if (frc == 0 && rc == 1) rc = 0;
352 }
353
354 closedirectory(dir);
355 return rc;
356 }
357
358 /* If the file is not a directory, or we are not recursing, scan it. If this is
359 the first and only argument at top level, we don't show the file name (unless
360 we are only showing the file name). Otherwise, control is via the
361 show_filenames variable. */
362
363 in = fopen(filename, "r");
364 if (in == NULL)
365 {
366 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));
367 return 2;
368 }
369
370 rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?
371 filename : NULL);
372 fclose(in);
373 return rc;
374 }
375
376
377
378
379 /*************************************************
380 * Usage function *
381 *************************************************/
382
383 static int
384 usage(int rc)
385 {
386 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");
387 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
388 return rc;
389 }
390
391
392
393
394 /*************************************************
395 * Help function *
396 *************************************************/
397
398 static void
399 help(void)
400 {
401 option_item *op;
402
403 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
404 printf("Search for PATTERN in each FILE or standard input.\n");
405 printf("PATTERN must be present if -f is not used.\n");
406 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
407
408 printf("Options:\n");
409
410 for (op = optionlist; op->one_char != 0; op++)
411 {
412 int n;
413 char s[4];
414 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
415 printf(" %s --%s%n", s, op->long_name, &n);
416 n = 30 - n;
417 if (n < 1) n = 1;
418 printf("%.*s%s\n", n, " ", op->help_text);
419 }
420
421 printf("\n -f<filename> or --file=<filename>\n");
422 printf(" Read patterns from <filename> instead of using a command line option.\n");
423 printf(" Trailing white space is removed; blanks lines are ignored.\n");
424 printf(" There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
425
426 printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");
427 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
428 }
429
430
431
432
433 /*************************************************
434 * Handle an option *
435 *************************************************/
436
437 static int
438 handle_option(int letter, int options)
439 {
440 switch(letter)
441 {
442 case -1: help(); exit(0);
443 case 'c': count_only = TRUE; break;
444 case 'h': filenames = FALSE; break;
445 case 'i': options |= PCRE_CASELESS; break;
446 case 'l': filenames_only = TRUE;
447 case 'n': number = TRUE; break;
448 case 'r': recurse = TRUE; break;
449 case 's': silent = TRUE; break;
450 case 'u': options |= PCRE_UTF8; break;
451 case 'v': invert = TRUE; break;
452 case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;
453
454 case 'V':
455 fprintf(stderr, "pcregrep version %s using ", VERSION);
456 fprintf(stderr, "PCRE version %s\n", pcre_version());
457 exit(0);
458 break;
459
460 default:
461 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
462 exit(usage(2));
463 }
464
465 return options;
466 }
467
468
469
470
471 /*************************************************
472 * Main program *
473 *************************************************/
474
475 int
476 main(int argc, char **argv)
477 {
478 int i, j;
479 int rc = 1;
480 int options = 0;
481 int errptr;
482 const char *error;
483 BOOL only_one_at_top;
484
485 /* Process the options */
486
487 for (i = 1; i < argc; i++)
488 {
489 if (argv[i][0] != '-') break;
490
491 /* Missing options */
492
493 if (argv[i][1] == 0) exit(usage(2));
494
495 /* Long name options */
496
497 if (argv[i][1] == '-')
498 {
499 option_item *op;
500
501 if (strncmp(argv[i]+2, "file=", 5) == 0)
502 {
503 pattern_filename = argv[i] + 7;
504 continue;
505 }
506
507 for (op = optionlist; op->one_char != 0; op++)
508 {
509 if (strcmp(argv[i]+2, op->long_name) == 0)
510 {
511 options = handle_option(op->one_char, options);
512 break;
513 }
514 }
515 if (op->one_char == 0)
516 {
517 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
518 exit(usage(2));
519 }
520 }
521
522 /* One-char options */
523
524 else
525 {
526 char *s = argv[i] + 1;
527 while (*s != 0)
528 {
529 if (*s == 'f')
530 {
531 pattern_filename = s + 1;
532 if (pattern_filename[0] == 0)
533 {
534 if (i >= argc - 1)
535 {
536 fprintf(stderr, "pcregrep: File name missing after -f\n");
537 exit(usage(2));
538 }
539 pattern_filename = argv[++i];
540 }
541 break;
542 }
543 else options = handle_option(*s++, options);
544 }
545 }
546 }
547
548 pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
549 hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
550
551 if (pattern_list == NULL || hints_list == NULL)
552 {
553 fprintf(stderr, "pcregrep: malloc failed\n");
554 return 2;
555 }
556
557 /* Compile the regular expression(s). */
558
559 if (pattern_filename != NULL)
560 {
561 FILE *f = fopen(pattern_filename, "r");
562 char buffer[BUFSIZ];
563 if (f == NULL)
564 {
565 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
566 strerror(errno));
567 return 2;
568 }
569 while (fgets(buffer, sizeof(buffer), f) != NULL)
570 {
571 char *s = buffer + (int)strlen(buffer);
572 if (pattern_count >= MAX_PATTERN_COUNT)
573 {
574 fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
575 MAX_PATTERN_COUNT);
576 return 2;
577 }
578 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
579 if (s == buffer) continue;
580 *s = 0;
581 pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
582 &errptr, NULL);
583 if (pattern_list[pattern_count++] == NULL)
584 {
585 fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
586 pattern_count, errptr, error);
587 return 2;
588 }
589 }
590 fclose(f);
591 }
592
593 /* If no file name, a single regex must be given inline */
594
595 else
596 {
597 if (i >= argc) return usage(2);
598 pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);
599 if (pattern_list[0] == NULL)
600 {
601 fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,
602 error);
603 return 2;
604 }
605 pattern_count++;
606 }
607
608 /* Study the regular expressions, as we will be running them may times */
609
610 for (j = 0; j < pattern_count; j++)
611 {
612 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
613 if (error != NULL)
614 {
615 char s[16];
616 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
617 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
618 return 2;
619 }
620 }
621
622 /* If there are no further arguments, do the business on stdin and exit */
623
624 if (i >= argc) return pcregrep(stdin, NULL);
625
626 /* Otherwise, work through the remaining arguments as files or directories.
627 Pass in the fact that there is only one argument at top level - this suppresses
628 the file name if the argument is not a directory. */
629
630 only_one_at_top = (i == argc - 1);
631 if (filenames_only) filenames = TRUE;
632
633 for (; i < argc; i++)
634 {
635 int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
636 if (frc == 0 && rc == 1) rc = 0;
637 }
638
639 return rc;
640 }
641
642 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12