/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 75 - (hide annotations) (download)
Sat Feb 24 21:40:37 2007 UTC (7 years, 6 months ago) by nigel
File MIME type: text/plain
File size: 17061 byte(s)
Load pcre-5.0 into code/trunk.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 nigel 75 Copyright (c) 1997-2004 University of Cambridge
10    
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 nigel 53 #include <ctype.h>
41 nigel 49 #include <stdio.h>
42     #include <string.h>
43     #include <stdlib.h>
44     #include <errno.h>
45     #include "config.h"
46     #include "pcre.h"
47    
48     #define FALSE 0
49     #define TRUE 1
50    
51     typedef int BOOL;
52    
53 nigel 63 #define VERSION "3.0 14-Jan-2003"
54 nigel 53 #define MAX_PATTERN_COUNT 100
55 nigel 49
56    
57     /*************************************************
58     * Global variables *
59     *************************************************/
60    
61 nigel 53 static char *pattern_filename = NULL;
62     static int pattern_count = 0;
63     static pcre **pattern_list;
64     static pcre_extra **hints_list;
65 nigel 49
66     static BOOL count_only = FALSE;
67 nigel 53 static BOOL filenames = TRUE;
68 nigel 49 static BOOL filenames_only = FALSE;
69     static BOOL invert = FALSE;
70     static BOOL number = FALSE;
71 nigel 53 static BOOL recurse = FALSE;
72 nigel 49 static BOOL silent = FALSE;
73     static BOOL whole_lines = FALSE;
74    
75 nigel 53 /* Structure for options and list of them */
76 nigel 49
77 nigel 53 typedef struct option_item {
78     int one_char;
79 nigel 67 const char *long_name;
80     const char *help_text;
81 nigel 53 } option_item;
82 nigel 49
83 nigel 53 static option_item optionlist[] = {
84     { -1, "help", "display this help and exit" },
85     { 'c', "count", "print only a count of matching lines per FILE" },
86     { 'h', "no-filename", "suppress the prefixing filename on output" },
87     { 'i', "ignore-case", "ignore case distinctions" },
88     { 'l', "files-with-matches", "print only FILE names containing matches" },
89     { 'n', "line-number", "print line number with output lines" },
90     { 'r', "recursive", "recursively scan sub-directories" },
91     { 's', "no-messages", "suppress error messages" },
92 nigel 63 { 'u', "utf-8", "use UTF-8 mode" },
93 nigel 53 { 'V', "version", "print version information and exit" },
94     { 'v', "invert-match", "select non-matching lines" },
95     { 'x', "line-regex", "force PATTERN to match only whole lines" },
96     { 'x', "line-regexp", "force PATTERN to match only whole lines" },
97     { 0, NULL, NULL }
98     };
99    
100    
101     /*************************************************
102     * Functions for directory scanning *
103     *************************************************/
104    
105     /* These functions are defined so that they can be made system specific,
106 nigel 63 although at present the only ones are for Unix, Win32, and for "no directory
107     recursion support". */
108 nigel 53
109    
110     /************* Directory scanning in Unix ***********/
111    
112     #if IS_UNIX
113     #include <sys/types.h>
114     #include <sys/stat.h>
115     #include <dirent.h>
116    
117     typedef DIR directory_type;
118    
119 nigel 67 static int
120 nigel 53 isdirectory(char *filename)
121     {
122     struct stat statbuf;
123     if (stat(filename, &statbuf) < 0)
124     return 0; /* In the expectation that opening as a file will fail */
125     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
126     }
127    
128 nigel 67 static directory_type *
129 nigel 53 opendirectory(char *filename)
130     {
131     return opendir(filename);
132     }
133    
134 nigel 67 static char *
135 nigel 53 readdirectory(directory_type *dir)
136     {
137     for (;;)
138     {
139     struct dirent *dent = readdir(dir);
140     if (dent == NULL) return NULL;
141     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
142     return dent->d_name;
143     }
144     return NULL; /* Keep compiler happy; never executed */
145     }
146    
147 nigel 67 static void
148 nigel 53 closedirectory(directory_type *dir)
149     {
150     closedir(dir);
151     }
152    
153    
154 nigel 63 /************* Directory scanning in Win32 ***********/
155 nigel 53
156 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
157     Lionel Fourquaux. */
158 nigel 53
159 nigel 63
160     #elif HAVE_WIN32API
161    
162     #ifndef STRICT
163     # define STRICT
164     #endif
165     #ifndef WIN32_LEAN_AND_MEAN
166     # define WIN32_LEAN_AND_MEAN
167     #endif
168     #include <windows.h>
169    
170     typedef struct directory_type
171     {
172     HANDLE handle;
173     BOOL first;
174     WIN32_FIND_DATA data;
175     } directory_type;
176    
177     int
178     isdirectory(char *filename)
179     {
180     DWORD attr = GetFileAttributes(filename);
181     if (attr == INVALID_FILE_ATTRIBUTES)
182     return 0;
183     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
184     }
185    
186     directory_type *
187     opendirectory(char *filename)
188     {
189     size_t len;
190     char *pattern;
191     directory_type *dir;
192     DWORD err;
193     len = strlen(filename);
194     pattern = (char *) malloc(len + 3);
195     dir = (directory_type *) malloc(sizeof(*dir));
196     if ((pattern == NULL) || (dir == NULL))
197     {
198     fprintf(stderr, "pcregrep: malloc failed\n");
199     exit(2);
200     }
201     memcpy(pattern, filename, len);
202     memcpy(&(pattern[len]), "\\*", 3);
203     dir->handle = FindFirstFile(pattern, &(dir->data));
204     if (dir->handle != INVALID_HANDLE_VALUE)
205     {
206     free(pattern);
207     dir->first = TRUE;
208     return dir;
209     }
210     err = GetLastError();
211     free(pattern);
212     free(dir);
213     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
214     return NULL;
215     }
216    
217     char *
218     readdirectory(directory_type *dir)
219     {
220     for (;;)
221     {
222     if (!dir->first)
223     {
224     if (!FindNextFile(dir->handle, &(dir->data)))
225     return NULL;
226     }
227     else
228     {
229     dir->first = FALSE;
230     }
231     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
232     return dir->data.cFileName;
233     }
234     #ifndef _MSC_VER
235     return NULL; /* Keep compiler happy; never executed */
236     #endif
237     }
238    
239     void
240     closedirectory(directory_type *dir)
241     {
242     FindClose(dir->handle);
243     free(dir);
244     }
245    
246    
247 nigel 53 /************* Directory scanning when we can't do it ***********/
248    
249     /* The type is void, and apart from isdirectory(), the functions do nothing. */
250    
251 nigel 63 #else
252    
253 nigel 53 typedef void directory_type;
254    
255     int isdirectory(char *filename) { return FALSE; }
256     directory_type * opendirectory(char *filename) {}
257     char *readdirectory(directory_type *dir) {}
258     void closedirectory(directory_type *dir) {}
259    
260     #endif
261    
262    
263    
264 nigel 49 #if ! HAVE_STRERROR
265     /*************************************************
266     * Provide strerror() for non-ANSI libraries *
267     *************************************************/
268    
269     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
270     in their libraries, but can provide the same facility by this simple
271     alternative function. */
272    
273     extern int sys_nerr;
274     extern char *sys_errlist[];
275    
276     char *
277     strerror(int n)
278     {
279     if (n < 0 || n >= sys_nerr) return "unknown error number";
280     return sys_errlist[n];
281     }
282     #endif /* HAVE_STRERROR */
283    
284    
285    
286     /*************************************************
287     * Grep an individual file *
288     *************************************************/
289    
290     static int
291     pcregrep(FILE *in, char *name)
292     {
293     int rc = 1;
294     int linenumber = 0;
295     int count = 0;
296     int offsets[99];
297     char buffer[BUFSIZ];
298    
299     while (fgets(buffer, sizeof(buffer), in) != NULL)
300     {
301 nigel 53 BOOL match = FALSE;
302     int i;
303 nigel 49 int length = (int)strlen(buffer);
304     if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;
305     linenumber++;
306    
307 nigel 53 for (i = 0; !match && i < pattern_count; i++)
308     {
309     match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,
310     offsets, 99) >= 0;
311     if (match && whole_lines && offsets[1] != length) match = FALSE;
312     }
313 nigel 49
314     if (match != invert)
315     {
316     if (count_only) count++;
317    
318     else if (filenames_only)
319     {
320     fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);
321     return 0;
322     }
323    
324     else if (silent) return 0;
325    
326     else
327     {
328     if (name != NULL) fprintf(stdout, "%s:", name);
329     if (number) fprintf(stdout, "%d:", linenumber);
330     fprintf(stdout, "%s\n", buffer);
331     }
332    
333     rc = 0;
334     }
335     }
336    
337     if (count_only)
338     {
339     if (name != NULL) fprintf(stdout, "%s:", name);
340     fprintf(stdout, "%d\n", count);
341     }
342    
343     return rc;
344     }
345    
346    
347    
348    
349     /*************************************************
350 nigel 53 * Grep a file or recurse into a directory *
351     *************************************************/
352    
353     static int
354 nigel 67 grep_or_recurse(char *filename, BOOL dir_recurse, BOOL show_filenames,
355 nigel 53 BOOL only_one_at_top)
356     {
357     int rc = 1;
358     int sep;
359     FILE *in;
360    
361     /* If the file is a directory and we are recursing, scan each file within it.
362     The scanning code is localized so it can be made system-specific. */
363    
364 nigel 67 if ((sep = isdirectory(filename)) != 0 && dir_recurse)
365 nigel 53 {
366     char buffer[1024];
367     char *nextfile;
368     directory_type *dir = opendirectory(filename);
369    
370     if (dir == NULL)
371     {
372     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,
373     strerror(errno));
374     return 2;
375     }
376    
377     while ((nextfile = readdirectory(dir)) != NULL)
378     {
379     int frc;
380     sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);
381 nigel 67 frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);
382 nigel 53 if (frc == 0 && rc == 1) rc = 0;
383     }
384    
385     closedirectory(dir);
386     return rc;
387     }
388    
389     /* If the file is not a directory, or we are not recursing, scan it. If this is
390 nigel 63 the first and only argument at top level, we don't show the file name (unless
391     we are only showing the file name). Otherwise, control is via the
392     show_filenames variable. */
393 nigel 53
394     in = fopen(filename, "r");
395     if (in == NULL)
396     {
397     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));
398     return 2;
399     }
400    
401 nigel 63 rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?
402     filename : NULL);
403 nigel 53 fclose(in);
404     return rc;
405     }
406    
407    
408    
409    
410     /*************************************************
411 nigel 49 * Usage function *
412     *************************************************/
413    
414     static int
415     usage(int rc)
416     {
417 nigel 63 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");
418 nigel 53 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
419 nigel 49 return rc;
420     }
421    
422    
423    
424    
425     /*************************************************
426 nigel 53 * Help function *
427     *************************************************/
428    
429     static void
430     help(void)
431     {
432     option_item *op;
433    
434 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
435 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
436 nigel 63 printf("PATTERN must be present if -f is not used.\n");
437 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
438    
439     printf("Options:\n");
440    
441     for (op = optionlist; op->one_char != 0; op++)
442     {
443     int n;
444     char s[4];
445     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
446     printf(" %s --%s%n", s, op->long_name, &n);
447     n = 30 - n;
448     if (n < 1) n = 1;
449     printf("%.*s%s\n", n, " ", op->help_text);
450     }
451    
452     printf("\n -f<filename> or --file=<filename>\n");
453     printf(" Read patterns from <filename> instead of using a command line option.\n");
454     printf(" Trailing white space is removed; blanks lines are ignored.\n");
455     printf(" There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
456    
457     printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");
458     printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
459     }
460    
461    
462    
463    
464     /*************************************************
465     * Handle an option *
466     *************************************************/
467    
468     static int
469     handle_option(int letter, int options)
470     {
471     switch(letter)
472     {
473     case -1: help(); exit(0);
474     case 'c': count_only = TRUE; break;
475     case 'h': filenames = FALSE; break;
476     case 'i': options |= PCRE_CASELESS; break;
477     case 'l': filenames_only = TRUE;
478     case 'n': number = TRUE; break;
479     case 'r': recurse = TRUE; break;
480     case 's': silent = TRUE; break;
481 nigel 63 case 'u': options |= PCRE_UTF8; break;
482 nigel 53 case 'v': invert = TRUE; break;
483     case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;
484    
485     case 'V':
486     fprintf(stderr, "pcregrep version %s using ", VERSION);
487     fprintf(stderr, "PCRE version %s\n", pcre_version());
488     exit(0);
489     break;
490    
491     default:
492     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
493     exit(usage(2));
494     }
495    
496     return options;
497     }
498    
499    
500    
501    
502     /*************************************************
503 nigel 49 * Main program *
504     *************************************************/
505    
506     int
507     main(int argc, char **argv)
508     {
509 nigel 53 int i, j;
510 nigel 49 int rc = 1;
511     int options = 0;
512     int errptr;
513     const char *error;
514 nigel 53 BOOL only_one_at_top;
515 nigel 49
516     /* Process the options */
517    
518     for (i = 1; i < argc; i++)
519     {
520     if (argv[i][0] != '-') break;
521 nigel 53
522 nigel 63 /* Missing options */
523    
524     if (argv[i][1] == 0) exit(usage(2));
525    
526 nigel 53 /* Long name options */
527    
528     if (argv[i][1] == '-')
529 nigel 49 {
530 nigel 53 option_item *op;
531    
532     if (strncmp(argv[i]+2, "file=", 5) == 0)
533 nigel 49 {
534 nigel 53 pattern_filename = argv[i] + 7;
535     continue;
536     }
537 nigel 49
538 nigel 53 for (op = optionlist; op->one_char != 0; op++)
539     {
540     if (strcmp(argv[i]+2, op->long_name) == 0)
541     {
542     options = handle_option(op->one_char, options);
543     break;
544     }
545     }
546     if (op->one_char == 0)
547     {
548     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
549     exit(usage(2));
550     }
551     }
552 nigel 49
553 nigel 53 /* One-char options */
554    
555     else
556     {
557     char *s = argv[i] + 1;
558     while (*s != 0)
559     {
560     if (*s == 'f')
561     {
562     pattern_filename = s + 1;
563     if (pattern_filename[0] == 0)
564     {
565     if (i >= argc - 1)
566     {
567     fprintf(stderr, "pcregrep: File name missing after -f\n");
568     exit(usage(2));
569     }
570     pattern_filename = argv[++i];
571     }
572     break;
573     }
574     else options = handle_option(*s++, options);
575 nigel 49 }
576     }
577     }
578    
579 nigel 71 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
580     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
581 nigel 49
582 nigel 53 if (pattern_list == NULL || hints_list == NULL)
583     {
584     fprintf(stderr, "pcregrep: malloc failed\n");
585     return 2;
586     }
587 nigel 49
588 nigel 53 /* Compile the regular expression(s). */
589 nigel 49
590 nigel 53 if (pattern_filename != NULL)
591 nigel 49 {
592 nigel 53 FILE *f = fopen(pattern_filename, "r");
593     char buffer[BUFSIZ];
594     if (f == NULL)
595     {
596     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
597     strerror(errno));
598     return 2;
599     }
600     while (fgets(buffer, sizeof(buffer), f) != NULL)
601     {
602     char *s = buffer + (int)strlen(buffer);
603     if (pattern_count >= MAX_PATTERN_COUNT)
604     {
605     fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
606     MAX_PATTERN_COUNT);
607     return 2;
608     }
609     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
610     if (s == buffer) continue;
611     *s = 0;
612     pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
613     &errptr, NULL);
614     if (pattern_list[pattern_count++] == NULL)
615     {
616     fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
617     pattern_count, errptr, error);
618     return 2;
619     }
620     }
621     fclose(f);
622 nigel 49 }
623    
624 nigel 53 /* If no file name, a single regex must be given inline */
625 nigel 49
626 nigel 53 else
627 nigel 49 {
628 nigel 63 if (i >= argc) return usage(2);
629 nigel 53 pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);
630     if (pattern_list[0] == NULL)
631     {
632     fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,
633     error);
634     return 2;
635     }
636     pattern_count++;
637 nigel 49 }
638    
639 nigel 53 /* Study the regular expressions, as we will be running them may times */
640    
641     for (j = 0; j < pattern_count; j++)
642     {
643     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
644     if (error != NULL)
645     {
646     char s[16];
647     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
648     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
649     return 2;
650     }
651     }
652    
653 nigel 49 /* If there are no further arguments, do the business on stdin and exit */
654    
655     if (i >= argc) return pcregrep(stdin, NULL);
656    
657 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
658     Pass in the fact that there is only one argument at top level - this suppresses
659     the file name if the argument is not a directory. */
660 nigel 49
661 nigel 53 only_one_at_top = (i == argc - 1);
662 nigel 49 if (filenames_only) filenames = TRUE;
663    
664     for (; i < argc; i++)
665     {
666 nigel 53 int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
667     if (frc == 0 && rc == 1) rc = 0;
668 nigel 49 }
669    
670     return rc;
671     }
672    
673     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12