/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 53 - (hide annotations) (download)
Sat Feb 24 21:39:42 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 13249 byte(s)
Load pcre-3.5 into code/trunk.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 53 its pattern matching. On a Unix system it can recurse into directories. */
7 nigel 49
8 nigel 53 #include <ctype.h>
9 nigel 49 #include <stdio.h>
10     #include <string.h>
11     #include <stdlib.h>
12     #include <errno.h>
13     #include "config.h"
14     #include "pcre.h"
15    
16     #define FALSE 0
17     #define TRUE 1
18    
19     typedef int BOOL;
20    
21 nigel 53 #define VERSION "2.0 01-Aug-2001"
22     #define MAX_PATTERN_COUNT 100
23 nigel 49
24    
25     /*************************************************
26     * Global variables *
27     *************************************************/
28    
29 nigel 53 static char *pattern_filename = NULL;
30     static int pattern_count = 0;
31     static pcre **pattern_list;
32     static pcre_extra **hints_list;
33 nigel 49
34     static BOOL count_only = FALSE;
35 nigel 53 static BOOL filenames = TRUE;
36 nigel 49 static BOOL filenames_only = FALSE;
37     static BOOL invert = FALSE;
38     static BOOL number = FALSE;
39 nigel 53 static BOOL recurse = FALSE;
40 nigel 49 static BOOL silent = FALSE;
41     static BOOL whole_lines = FALSE;
42    
43 nigel 53 /* Structure for options and list of them */
44 nigel 49
45 nigel 53 typedef struct option_item {
46     int one_char;
47     char *long_name;
48     char *help_text;
49     } option_item;
50 nigel 49
51 nigel 53 static option_item optionlist[] = {
52     { -1, "help", "display this help and exit" },
53     { 'c', "count", "print only a count of matching lines per FILE" },
54     { 'h', "no-filename", "suppress the prefixing filename on output" },
55     { 'i', "ignore-case", "ignore case distinctions" },
56     { 'l', "files-with-matches", "print only FILE names containing matches" },
57     { 'n', "line-number", "print line number with output lines" },
58     { 'r', "recursive", "recursively scan sub-directories" },
59     { 's', "no-messages", "suppress error messages" },
60     { 'V', "version", "print version information and exit" },
61     { 'v', "invert-match", "select non-matching lines" },
62     { 'x', "line-regex", "force PATTERN to match only whole lines" },
63     { 'x', "line-regexp", "force PATTERN to match only whole lines" },
64     { 0, NULL, NULL }
65     };
66    
67    
68     /*************************************************
69     * Functions for directory scanning *
70     *************************************************/
71    
72     /* These functions are defined so that they can be made system specific,
73     although at present the only ones are for Unix, and for "no directory recursion
74     support". */
75    
76    
77     /************* Directory scanning in Unix ***********/
78    
79     #if IS_UNIX
80     #include <sys/types.h>
81     #include <sys/stat.h>
82     #include <dirent.h>
83    
84     typedef DIR directory_type;
85    
86     int
87     isdirectory(char *filename)
88     {
89     struct stat statbuf;
90     if (stat(filename, &statbuf) < 0)
91     return 0; /* In the expectation that opening as a file will fail */
92     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
93     }
94    
95     directory_type *
96     opendirectory(char *filename)
97     {
98     return opendir(filename);
99     }
100    
101     char *
102     readdirectory(directory_type *dir)
103     {
104     for (;;)
105     {
106     struct dirent *dent = readdir(dir);
107     if (dent == NULL) return NULL;
108     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
109     return dent->d_name;
110     }
111     return NULL; /* Keep compiler happy; never executed */
112     }
113    
114     void
115     closedirectory(directory_type *dir)
116     {
117     closedir(dir);
118     }
119    
120    
121     #else
122    
123    
124     /************* Directory scanning when we can't do it ***********/
125    
126     /* The type is void, and apart from isdirectory(), the functions do nothing. */
127    
128     typedef void directory_type;
129    
130     int isdirectory(char *filename) { return FALSE; }
131     directory_type * opendirectory(char *filename) {}
132     char *readdirectory(directory_type *dir) {}
133     void closedirectory(directory_type *dir) {}
134    
135     #endif
136    
137    
138    
139 nigel 49 #if ! HAVE_STRERROR
140     /*************************************************
141     * Provide strerror() for non-ANSI libraries *
142     *************************************************/
143    
144     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
145     in their libraries, but can provide the same facility by this simple
146     alternative function. */
147    
148     extern int sys_nerr;
149     extern char *sys_errlist[];
150    
151     char *
152     strerror(int n)
153     {
154     if (n < 0 || n >= sys_nerr) return "unknown error number";
155     return sys_errlist[n];
156     }
157     #endif /* HAVE_STRERROR */
158    
159    
160    
161     /*************************************************
162     * Grep an individual file *
163     *************************************************/
164    
165     static int
166     pcregrep(FILE *in, char *name)
167     {
168     int rc = 1;
169     int linenumber = 0;
170     int count = 0;
171     int offsets[99];
172     char buffer[BUFSIZ];
173    
174     while (fgets(buffer, sizeof(buffer), in) != NULL)
175     {
176 nigel 53 BOOL match = FALSE;
177     int i;
178 nigel 49 int length = (int)strlen(buffer);
179     if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;
180     linenumber++;
181    
182 nigel 53 for (i = 0; !match && i < pattern_count; i++)
183     {
184     match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,
185     offsets, 99) >= 0;
186     if (match && whole_lines && offsets[1] != length) match = FALSE;
187     }
188 nigel 49
189     if (match != invert)
190     {
191     if (count_only) count++;
192    
193     else if (filenames_only)
194     {
195     fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);
196     return 0;
197     }
198    
199     else if (silent) return 0;
200    
201     else
202     {
203     if (name != NULL) fprintf(stdout, "%s:", name);
204     if (number) fprintf(stdout, "%d:", linenumber);
205     fprintf(stdout, "%s\n", buffer);
206     }
207    
208     rc = 0;
209     }
210     }
211    
212     if (count_only)
213     {
214     if (name != NULL) fprintf(stdout, "%s:", name);
215     fprintf(stdout, "%d\n", count);
216     }
217    
218     return rc;
219     }
220    
221    
222    
223    
224     /*************************************************
225 nigel 53 * Grep a file or recurse into a directory *
226     *************************************************/
227    
228     static int
229     grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,
230     BOOL only_one_at_top)
231     {
232     int rc = 1;
233     int sep;
234     FILE *in;
235    
236     /* If the file is a directory and we are recursing, scan each file within it.
237     The scanning code is localized so it can be made system-specific. */
238    
239     if ((sep = isdirectory(filename)) != 0 && recurse)
240     {
241     char buffer[1024];
242     char *nextfile;
243     directory_type *dir = opendirectory(filename);
244    
245     if (dir == NULL)
246     {
247     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,
248     strerror(errno));
249     return 2;
250     }
251    
252     while ((nextfile = readdirectory(dir)) != NULL)
253     {
254     int frc;
255     sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);
256     frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);
257     if (frc == 0 && rc == 1) rc = 0;
258     }
259    
260     closedirectory(dir);
261     return rc;
262     }
263    
264     /* If the file is not a directory, or we are not recursing, scan it. If this is
265     the first and only argument at top level, we don't show the file name.
266     Otherwise, control is via the show_filenames variable. */
267    
268     in = fopen(filename, "r");
269     if (in == NULL)
270     {
271     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));
272     return 2;
273     }
274    
275     rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);
276     fclose(in);
277     return rc;
278     }
279    
280    
281    
282    
283     /*************************************************
284 nigel 49 * Usage function *
285     *************************************************/
286    
287     static int
288     usage(int rc)
289     {
290 nigel 53 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");
291     fprintf(stderr, "Type `pcregrep --help' for more information.\n");
292 nigel 49 return rc;
293     }
294    
295    
296    
297    
298     /*************************************************
299 nigel 53 * Help function *
300     *************************************************/
301    
302     static void
303     help(void)
304     {
305     option_item *op;
306    
307     printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");
308     printf("Search for PATTERN in each FILE or standard input.\n");
309     printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
310    
311     printf("Options:\n");
312    
313     for (op = optionlist; op->one_char != 0; op++)
314     {
315     int n;
316     char s[4];
317     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
318     printf(" %s --%s%n", s, op->long_name, &n);
319     n = 30 - n;
320     if (n < 1) n = 1;
321     printf("%.*s%s\n", n, " ", op->help_text);
322     }
323    
324     printf("\n -f<filename> or --file=<filename>\n");
325     printf(" Read patterns from <filename> instead of using a command line option.\n");
326     printf(" Trailing white space is removed; blanks lines are ignored.\n");
327     printf(" There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
328    
329     printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");
330     printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
331     }
332    
333    
334    
335    
336     /*************************************************
337     * Handle an option *
338     *************************************************/
339    
340     static int
341     handle_option(int letter, int options)
342     {
343     switch(letter)
344     {
345     case -1: help(); exit(0);
346     case 'c': count_only = TRUE; break;
347     case 'h': filenames = FALSE; break;
348     case 'i': options |= PCRE_CASELESS; break;
349     case 'l': filenames_only = TRUE;
350     case 'n': number = TRUE; break;
351     case 'r': recurse = TRUE; break;
352     case 's': silent = TRUE; break;
353     case 'v': invert = TRUE; break;
354     case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;
355    
356     case 'V':
357     fprintf(stderr, "pcregrep version %s using ", VERSION);
358     fprintf(stderr, "PCRE version %s\n", pcre_version());
359     exit(0);
360     break;
361    
362     default:
363     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
364     exit(usage(2));
365     }
366    
367     return options;
368     }
369    
370    
371    
372    
373     /*************************************************
374 nigel 49 * Main program *
375     *************************************************/
376    
377     int
378     main(int argc, char **argv)
379     {
380 nigel 53 int i, j;
381 nigel 49 int rc = 1;
382     int options = 0;
383     int errptr;
384     const char *error;
385 nigel 53 BOOL only_one_at_top;
386 nigel 49
387     /* Process the options */
388    
389     for (i = 1; i < argc; i++)
390     {
391     if (argv[i][0] != '-') break;
392 nigel 53
393     /* Long name options */
394    
395     if (argv[i][1] == '-')
396 nigel 49 {
397 nigel 53 option_item *op;
398    
399     if (strncmp(argv[i]+2, "file=", 5) == 0)
400 nigel 49 {
401 nigel 53 pattern_filename = argv[i] + 7;
402     continue;
403     }
404 nigel 49
405 nigel 53 for (op = optionlist; op->one_char != 0; op++)
406     {
407     if (strcmp(argv[i]+2, op->long_name) == 0)
408     {
409     options = handle_option(op->one_char, options);
410     break;
411     }
412     }
413     if (op->one_char == 0)
414     {
415     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
416     exit(usage(2));
417     }
418     }
419 nigel 49
420 nigel 53 /* One-char options */
421    
422     else
423     {
424     char *s = argv[i] + 1;
425     while (*s != 0)
426     {
427     if (*s == 'f')
428     {
429     pattern_filename = s + 1;
430     if (pattern_filename[0] == 0)
431     {
432     if (i >= argc - 1)
433     {
434     fprintf(stderr, "pcregrep: File name missing after -f\n");
435     exit(usage(2));
436     }
437     pattern_filename = argv[++i];
438     }
439     break;
440     }
441     else options = handle_option(*s++, options);
442 nigel 49 }
443     }
444     }
445    
446 nigel 53 pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
447     hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
448 nigel 49
449 nigel 53 if (pattern_list == NULL || hints_list == NULL)
450     {
451     fprintf(stderr, "pcregrep: malloc failed\n");
452     return 2;
453     }
454 nigel 49
455 nigel 53 /* Compile the regular expression(s). */
456 nigel 49
457 nigel 53 if (pattern_filename != NULL)
458 nigel 49 {
459 nigel 53 FILE *f = fopen(pattern_filename, "r");
460     char buffer[BUFSIZ];
461     if (f == NULL)
462     {
463     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
464     strerror(errno));
465     return 2;
466     }
467     while (fgets(buffer, sizeof(buffer), f) != NULL)
468     {
469     char *s = buffer + (int)strlen(buffer);
470     if (pattern_count >= MAX_PATTERN_COUNT)
471     {
472     fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
473     MAX_PATTERN_COUNT);
474     return 2;
475     }
476     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
477     if (s == buffer) continue;
478     *s = 0;
479     pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
480     &errptr, NULL);
481     if (pattern_list[pattern_count++] == NULL)
482     {
483     fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
484     pattern_count, errptr, error);
485     return 2;
486     }
487     }
488     fclose(f);
489 nigel 49 }
490    
491 nigel 53 /* If no file name, a single regex must be given inline */
492 nigel 49
493 nigel 53 else
494 nigel 49 {
495 nigel 53 if (i >= argc) return usage(0);
496     pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);
497     if (pattern_list[0] == NULL)
498     {
499     fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,
500     error);
501     return 2;
502     }
503     pattern_count++;
504 nigel 49 }
505    
506 nigel 53 /* Study the regular expressions, as we will be running them may times */
507    
508     for (j = 0; j < pattern_count; j++)
509     {
510     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
511     if (error != NULL)
512     {
513     char s[16];
514     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
515     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
516     return 2;
517     }
518     }
519    
520 nigel 49 /* If there are no further arguments, do the business on stdin and exit */
521    
522     if (i >= argc) return pcregrep(stdin, NULL);
523    
524 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
525     Pass in the fact that there is only one argument at top level - this suppresses
526     the file name if the argument is not a directory. */
527 nigel 49
528 nigel 53 only_one_at_top = (i == argc - 1);
529 nigel 49 if (filenames_only) filenames = TRUE;
530    
531     for (; i < argc; i++)
532     {
533 nigel 53 int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
534     if (frc == 0 && rc == 1) rc = 0;
535 nigel 49 }
536    
537     return rc;
538     }
539    
540     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12