/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 77 - (hide annotations) (download)
Sat Feb 24 21:40:45 2007 UTC (7 years, 4 months ago) by nigel
File MIME type: text/plain
File size: 33424 byte(s)
Load pcre-6.0 into code/trunk.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 nigel 77 Copyright (c) 1997-2005 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 nigel 53 #include <ctype.h>
41 nigel 49 #include <stdio.h>
42     #include <string.h>
43     #include <stdlib.h>
44     #include <errno.h>
45 nigel 77
46     #include <sys/types.h>
47     #include <sys/stat.h>
48     #include <unistd.h>
49    
50 nigel 49 #include "config.h"
51     #include "pcre.h"
52    
53     #define FALSE 0
54     #define TRUE 1
55    
56     typedef int BOOL;
57    
58 nigel 77 #define VERSION "4.0 07-Jun-2005"
59 nigel 53 #define MAX_PATTERN_COUNT 100
60 nigel 49
61 nigel 77 #if BUFSIZ > 8192
62     #define MBUFTHIRD BUFSIZ
63     #else
64     #define MBUFTHIRD 8192
65     #endif
66 nigel 49
67 nigel 77
68    
69 nigel 49 /*************************************************
70     * Global variables *
71     *************************************************/
72    
73 nigel 53 static char *pattern_filename = NULL;
74 nigel 77 static char *stdin_name = (char *)"(standard input)";
75 nigel 53 static int pattern_count = 0;
76     static pcre **pattern_list;
77     static pcre_extra **hints_list;
78 nigel 49
79 nigel 77 static char *include_pattern = NULL;
80     static char *exclude_pattern = NULL;
81    
82     static pcre *include_compiled = NULL;
83     static pcre *exclude_compiled = NULL;
84    
85     static int after_context = 0;
86     static int before_context = 0;
87     static int both_context = 0;
88    
89 nigel 49 static BOOL count_only = FALSE;
90 nigel 53 static BOOL filenames = TRUE;
91 nigel 49 static BOOL filenames_only = FALSE;
92 nigel 77 static BOOL filenames_nomatch_only = FALSE;
93     static BOOL hyphenpending = FALSE;
94 nigel 49 static BOOL invert = FALSE;
95 nigel 77 static BOOL multiline = FALSE;
96 nigel 49 static BOOL number = FALSE;
97 nigel 77 static BOOL quiet = FALSE;
98 nigel 53 static BOOL recurse = FALSE;
99 nigel 49 static BOOL silent = FALSE;
100     static BOOL whole_lines = FALSE;
101 nigel 77 static BOOL word_match = FALSE;
102 nigel 49
103 nigel 53 /* Structure for options and list of them */
104 nigel 49
105 nigel 77 enum { OP_NODATA, OP_STRING, OP_NUMBER };
106    
107 nigel 53 typedef struct option_item {
108 nigel 77 int type;
109 nigel 53 int one_char;
110 nigel 77 void *dataptr;
111 nigel 67 const char *long_name;
112     const char *help_text;
113 nigel 53 } option_item;
114 nigel 49
115 nigel 53 static option_item optionlist[] = {
116 nigel 77 { OP_NODATA, -1, NULL, "", " terminate options" },
117     { OP_NODATA, -1, NULL, "help", "display this help and exit" },
118     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
119     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
120     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
121     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
122     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
123     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
124     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
125     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
126     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
127     { OP_STRING, -1, &stdin_name, "label=name", "set name for standard input" },
128     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
129     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
130     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
131     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
132     { OP_STRING, -1, &exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
133     { OP_STRING, -1, &include_pattern, "include=pattern","include matching files when recursing" },
134     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
135     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
136     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
137     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
138     { OP_NODATA, 'w', NULL, "word-regex(p)", "force PATTERN to match only as a word" },
139     { OP_NODATA, 'x', NULL, "line-regex(p)", "force PATTERN to match only whole lines" },
140     { OP_NODATA, 0, NULL, NULL, NULL }
141 nigel 53 };
142    
143    
144     /*************************************************
145     * Functions for directory scanning *
146     *************************************************/
147    
148     /* These functions are defined so that they can be made system specific,
149 nigel 63 although at present the only ones are for Unix, Win32, and for "no directory
150     recursion support". */
151 nigel 53
152    
153     /************* Directory scanning in Unix ***********/
154    
155     #if IS_UNIX
156     #include <sys/types.h>
157     #include <sys/stat.h>
158     #include <dirent.h>
159    
160     typedef DIR directory_type;
161    
162 nigel 67 static int
163 nigel 53 isdirectory(char *filename)
164     {
165     struct stat statbuf;
166     if (stat(filename, &statbuf) < 0)
167     return 0; /* In the expectation that opening as a file will fail */
168     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
169     }
170    
171 nigel 67 static directory_type *
172 nigel 53 opendirectory(char *filename)
173     {
174     return opendir(filename);
175     }
176    
177 nigel 67 static char *
178 nigel 53 readdirectory(directory_type *dir)
179     {
180     for (;;)
181     {
182     struct dirent *dent = readdir(dir);
183     if (dent == NULL) return NULL;
184     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
185     return dent->d_name;
186     }
187     return NULL; /* Keep compiler happy; never executed */
188     }
189    
190 nigel 67 static void
191 nigel 53 closedirectory(directory_type *dir)
192     {
193     closedir(dir);
194     }
195    
196    
197 nigel 63 /************* Directory scanning in Win32 ***********/
198 nigel 53
199 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
200 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
201     when it did not exist. */
202 nigel 53
203 nigel 63
204     #elif HAVE_WIN32API
205    
206     #ifndef STRICT
207     # define STRICT
208     #endif
209     #ifndef WIN32_LEAN_AND_MEAN
210     # define WIN32_LEAN_AND_MEAN
211     #endif
212 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
213     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
214     #endif
215    
216 nigel 63 #include <windows.h>
217    
218     typedef struct directory_type
219     {
220     HANDLE handle;
221     BOOL first;
222     WIN32_FIND_DATA data;
223     } directory_type;
224    
225     int
226     isdirectory(char *filename)
227     {
228     DWORD attr = GetFileAttributes(filename);
229     if (attr == INVALID_FILE_ATTRIBUTES)
230     return 0;
231     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
232     }
233    
234     directory_type *
235     opendirectory(char *filename)
236     {
237     size_t len;
238     char *pattern;
239     directory_type *dir;
240     DWORD err;
241     len = strlen(filename);
242     pattern = (char *) malloc(len + 3);
243     dir = (directory_type *) malloc(sizeof(*dir));
244     if ((pattern == NULL) || (dir == NULL))
245     {
246     fprintf(stderr, "pcregrep: malloc failed\n");
247     exit(2);
248     }
249     memcpy(pattern, filename, len);
250     memcpy(&(pattern[len]), "\\*", 3);
251     dir->handle = FindFirstFile(pattern, &(dir->data));
252     if (dir->handle != INVALID_HANDLE_VALUE)
253     {
254     free(pattern);
255     dir->first = TRUE;
256     return dir;
257     }
258     err = GetLastError();
259     free(pattern);
260     free(dir);
261     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
262     return NULL;
263     }
264    
265     char *
266     readdirectory(directory_type *dir)
267     {
268     for (;;)
269     {
270     if (!dir->first)
271     {
272     if (!FindNextFile(dir->handle, &(dir->data)))
273     return NULL;
274     }
275     else
276     {
277     dir->first = FALSE;
278     }
279     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
280     return dir->data.cFileName;
281     }
282     #ifndef _MSC_VER
283     return NULL; /* Keep compiler happy; never executed */
284     #endif
285     }
286    
287     void
288     closedirectory(directory_type *dir)
289     {
290     FindClose(dir->handle);
291     free(dir);
292     }
293    
294    
295 nigel 53 /************* Directory scanning when we can't do it ***********/
296    
297     /* The type is void, and apart from isdirectory(), the functions do nothing. */
298    
299 nigel 63 #else
300    
301 nigel 53 typedef void directory_type;
302    
303     int isdirectory(char *filename) { return FALSE; }
304     directory_type * opendirectory(char *filename) {}
305     char *readdirectory(directory_type *dir) {}
306     void closedirectory(directory_type *dir) {}
307    
308     #endif
309    
310    
311    
312 nigel 49 #if ! HAVE_STRERROR
313     /*************************************************
314     * Provide strerror() for non-ANSI libraries *
315     *************************************************/
316    
317     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
318     in their libraries, but can provide the same facility by this simple
319     alternative function. */
320    
321     extern int sys_nerr;
322     extern char *sys_errlist[];
323    
324     char *
325     strerror(int n)
326     {
327     if (n < 0 || n >= sys_nerr) return "unknown error number";
328     return sys_errlist[n];
329     }
330     #endif /* HAVE_STRERROR */
331    
332    
333    
334     /*************************************************
335 nigel 77 * Print the previous "after" lines *
336 nigel 49 *************************************************/
337    
338 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
339     and at the end of the file.
340    
341     Arguments:
342     lastmatchnumber the number of the last matching line, plus one
343     lastmatchrestart where we restarted after the last match
344     endptr end of available data
345     printname filename for printing
346    
347     Returns: nothing
348     */
349    
350     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
351     char *endptr, char *printname)
352     {
353     if (after_context > 0 && lastmatchnumber > 0)
354     {
355     int count = 0;
356     while (lastmatchrestart < endptr && count++ < after_context)
357     {
358     char *pp = lastmatchrestart;
359     if (printname != NULL) fprintf(stdout, "%s-", printname);
360     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
361     while (*pp != '\n') pp++;
362     fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
363     lastmatchrestart = pp + 1;
364     }
365     hyphenpending = TRUE;
366     }
367     }
368    
369    
370    
371     /*************************************************
372     * Grep an individual file *
373     *************************************************/
374    
375     /* This is called from grep_or_recurse() below. It uses a buffer that is three
376     times the value of MBUFTHIRD. The matching point is never allowed to stray into
377     the top third of the buffer, thus keeping more of the file available for
378     context printing or for multiline scanning. For large files, the pointer will
379     be in the middle third most of the time, so the bottom third is available for
380     "before" context printing.
381    
382     Arguments:
383     in the fopened FILE stream
384     printname the file name if it is to be printed for each match
385     or NULL if the file name is not to be printed
386     it cannot be NULL if filenames[_nomatch]_only is set
387    
388     Returns: 0 if there was at least one match
389     1 otherwise (no matches)
390     */
391    
392 nigel 49 static int
393 nigel 77 pcregrep(FILE *in, char *printname)
394 nigel 49 {
395     int rc = 1;
396 nigel 77 int linenumber = 1;
397     int lastmatchnumber = 0;
398 nigel 49 int count = 0;
399     int offsets[99];
400 nigel 77 char *lastmatchrestart = NULL;
401     char buffer[3*MBUFTHIRD];
402     char *ptr = buffer;
403     char *endptr;
404     size_t bufflength;
405     BOOL endhyphenpending = FALSE;
406 nigel 49
407 nigel 77 /* Do the first read into the start of the buffer and set up the pointer to
408     end of what we have. */
409    
410     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
411     endptr = buffer + bufflength;
412    
413     /* Loop while the current pointer is not at the end of the file. For large
414     files, endptr will be at the end of the buffer when we are in the middle of the
415     file, but ptr will never get there, because as soon as it gets over 2/3 of the
416     way, the buffer is shifted left and re-filled. */
417    
418     while (ptr < endptr)
419 nigel 49 {
420 nigel 77 int i;
421 nigel 53 BOOL match = FALSE;
422 nigel 77 char *t = ptr;
423     size_t length, linelength;
424 nigel 49
425 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
426     of the subject string to pass to pcre_exec(). In multiline mode, it is the
427     length remainder of the data in the buffer. Otherwise, it is the length of
428     the next line. After matching, we always advance by the length of the next
429     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
430     that any match is constrained to be in the first line. */
431    
432     linelength = 0;
433     while (t < endptr && *t++ != '\n') linelength++;
434     length = multiline? endptr - ptr : linelength;
435    
436     /* Run through all the patterns until one matches. Note that we don't include
437     the final newline in the subject string. */
438    
439 nigel 53 for (i = 0; !match && i < pattern_count; i++)
440     {
441 nigel 77 match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
442 nigel 53 offsets, 99) >= 0;
443     }
444 nigel 49
445 nigel 77 /* If it's a match or a not-match (as required), print what's wanted. */
446    
447 nigel 49 if (match != invert)
448     {
449 nigel 77 BOOL hyphenprinted = FALSE;
450    
451     if (filenames_nomatch_only) return 1;
452    
453 nigel 49 if (count_only) count++;
454    
455     else if (filenames_only)
456     {
457 nigel 77 fprintf(stdout, "%s\n", printname);
458 nigel 49 return 0;
459     }
460    
461 nigel 77 else if (quiet) return 0;
462 nigel 49
463     else
464     {
465 nigel 77 /* See if there is a requirement to print some "after" lines from a
466     previous match. We never print any overlaps. */
467    
468     if (after_context > 0 && lastmatchnumber > 0)
469     {
470     int linecount = 0;
471     char *p = lastmatchrestart;
472    
473     while (p < ptr && linecount < after_context)
474     {
475     while (*p != '\n') p++;
476     p++;
477     linecount++;
478     }
479    
480     /* It is important to advance lastmatchrestart during this printing so
481     that it interacts correctly with any "before" printing below. */
482    
483     while (lastmatchrestart < p)
484     {
485     char *pp = lastmatchrestart;
486     if (printname != NULL) fprintf(stdout, "%s-", printname);
487     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
488     while (*pp != '\n') pp++;
489     fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
490     lastmatchrestart = pp + 1;
491     }
492     if (lastmatchrestart != ptr) hyphenpending = TRUE;
493     }
494    
495     /* If there were non-contiguous lines printed above, insert hyphens. */
496    
497     if (hyphenpending)
498     {
499     fprintf(stdout, "--\n");
500     hyphenpending = FALSE;
501     hyphenprinted = TRUE;
502     }
503    
504     /* See if there is a requirement to print some "before" lines for this
505     match. Again, don't print overlaps. */
506    
507     if (before_context > 0)
508     {
509     int linecount = 0;
510     char *p = ptr;
511    
512     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
513     linecount++ < before_context)
514     {
515     p--;
516     while (p > buffer && p[-1] != '\n') p--;
517     }
518    
519     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
520     fprintf(stdout, "--\n");
521    
522     while (p < ptr)
523     {
524     char *pp = p;
525     if (printname != NULL) fprintf(stdout, "%s-", printname);
526     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
527     while (*pp != '\n') pp++;
528     fprintf(stdout, "%.*s", pp - p + 1, p);
529     p = pp + 1;
530     }
531     }
532    
533     /* Now print the matching line(s); ensure we set hyphenpending at the end
534     of the file. */
535    
536     endhyphenpending = TRUE;
537     if (printname != NULL) fprintf(stdout, "%s:", printname);
538 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
539 nigel 77
540     /* In multiline mode, we want to print to the end of the line in which
541     the end of the matched string is found, so we adjust linelength and the
542     line number appropriately. Because the PCRE_FIRSTLINE option is set, the
543     start of the match will always be before the first \n character. */
544    
545     if (multiline)
546     {
547     char *endmatch = ptr + offsets[1];
548     t = ptr;
549     while (t < endmatch) { if (*t++ == '\n') linenumber++; }
550     while (endmatch < endptr && *endmatch != '\n') endmatch++;
551     linelength = endmatch - ptr;
552     }
553    
554     fprintf(stdout, "%.*s\n", linelength, ptr);
555 nigel 49 }
556    
557 nigel 77 rc = 0; /* Had some success */
558    
559     /* Remember where the last match happened for after_context. We remember
560     where we are about to restart, and that line's number. */
561    
562     lastmatchrestart = ptr + linelength + 1;
563     lastmatchnumber = linenumber + 1;
564 nigel 49 }
565 nigel 77
566     /* Advance to after the newline and increment the line number. */
567    
568     ptr += linelength + 1;
569     linenumber++;
570    
571     /* If we haven't yet reached the end of the file (the buffer is full), and
572     the current point is in the top 1/3 of the buffer, slide the buffer down by
573     1/3 and refill it. Before we do this, if some unprinted "after" lines are
574     about to be lost, print them. */
575    
576     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
577     {
578     if (after_context > 0 &&
579     lastmatchnumber > 0 &&
580     lastmatchrestart < buffer + MBUFTHIRD)
581     {
582     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
583     lastmatchnumber = 0;
584     }
585    
586     /* Now do the shuffle */
587    
588     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
589     ptr -= MBUFTHIRD;
590     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
591     endptr = buffer + bufflength;
592    
593     /* Adjust any last match point */
594    
595     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
596     }
597     } /* Loop through the whole file */
598    
599     /* End of file; print final "after" lines if wanted; do_after_lines sets
600     hyphenpending if it prints something. */
601    
602     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
603     hyphenpending |= endhyphenpending;
604    
605     /* Print the file name if we are looking for those without matches and there
606     were none. If we found a match, we won't have got this far. */
607    
608     if (filenames_nomatch_only)
609     {
610     fprintf(stdout, "%s\n", printname);
611     return 0;
612 nigel 49 }
613    
614 nigel 77 /* Print the match count if wanted */
615    
616 nigel 49 if (count_only)
617     {
618 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
619 nigel 49 fprintf(stdout, "%d\n", count);
620     }
621    
622     return rc;
623     }
624    
625    
626    
627     /*************************************************
628 nigel 53 * Grep a file or recurse into a directory *
629     *************************************************/
630    
631 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
632     recursing; if it's a file, grep it.
633    
634     Arguments:
635     pathname the path to investigate
636     dir_recurse TRUE if recursing is wanted (-r)
637     show_filenames TRUE if file names are wanted for multiple files, except
638     for the only file at top level when not filenames_only
639     only_one_at_top TRUE if the path is the only one at toplevel
640    
641     Returns: 0 if there was at least one match
642     1 if there were no matches
643     2 there was some kind of error
644    
645     However, file opening failures are suppressed if "silent" is set.
646     */
647    
648 nigel 53 static int
649 nigel 77 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,
650 nigel 53 BOOL only_one_at_top)
651     {
652     int rc = 1;
653     int sep;
654     FILE *in;
655 nigel 77 char *printname;
656 nigel 53
657 nigel 77 /* If the file name is "-" we scan stdin */
658 nigel 53
659 nigel 77 if (strcmp(pathname, "-") == 0)
660 nigel 53 {
661 nigel 77 return pcregrep(stdin,
662     (filenames_only || filenames_nomatch_only ||
663     (show_filenames && !only_one_at_top))?
664     stdin_name : NULL);
665     }
666    
667     /* If the file is a directory and we are recursing, scan each file within it,
668     subject to any include or exclude patterns that were set. The scanning code is
669     localized so it can be made system-specific. */
670    
671     if ((sep = isdirectory(pathname)) != 0 && dir_recurse)
672     {
673 nigel 53 char buffer[1024];
674     char *nextfile;
675 nigel 77 directory_type *dir = opendirectory(pathname);
676 nigel 53
677     if (dir == NULL)
678     {
679 nigel 77 if (!silent)
680     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
681     strerror(errno));
682 nigel 53 return 2;
683     }
684    
685     while ((nextfile = readdirectory(dir)) != NULL)
686     {
687 nigel 77 int frc, blen;
688     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
689     blen = strlen(buffer);
690    
691     if (exclude_compiled != NULL &&
692     pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
693     continue;
694    
695     if (include_compiled != NULL &&
696     pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
697     continue;
698    
699 nigel 67 frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);
700 nigel 77 if (frc > 1) rc = frc;
701     else if (frc == 0 && rc == 1) rc = 0;
702 nigel 53 }
703    
704     closedirectory(dir);
705     return rc;
706     }
707    
708     /* If the file is not a directory, or we are not recursing, scan it. If this is
709 nigel 63 the first and only argument at top level, we don't show the file name (unless
710     we are only showing the file name). Otherwise, control is via the
711     show_filenames variable. */
712 nigel 53
713 nigel 77 in = fopen(pathname, "r");
714 nigel 53 if (in == NULL)
715     {
716 nigel 77 if (!silent)
717     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
718     strerror(errno));
719 nigel 53 return 2;
720     }
721    
722 nigel 77 printname = (filenames_only || filenames_nomatch_only ||
723     (show_filenames && !only_one_at_top))? pathname : NULL;
724    
725     rc = pcregrep(in, printname);
726    
727 nigel 53 fclose(in);
728     return rc;
729     }
730    
731    
732    
733    
734     /*************************************************
735 nigel 49 * Usage function *
736     *************************************************/
737    
738     static int
739     usage(int rc)
740     {
741 nigel 77 fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");
742 nigel 53 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
743 nigel 49 return rc;
744     }
745    
746    
747    
748    
749     /*************************************************
750 nigel 53 * Help function *
751     *************************************************/
752    
753     static void
754     help(void)
755     {
756     option_item *op;
757    
758 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
759 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
760 nigel 63 printf("PATTERN must be present if -f is not used.\n");
761 nigel 77 printf("\"-\" can be used as a file name to mean STDIN.\n");
762 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
763    
764     printf("Options:\n");
765    
766     for (op = optionlist; op->one_char != 0; op++)
767     {
768     int n;
769     char s[4];
770     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
771     printf(" %s --%s%n", s, op->long_name, &n);
772     n = 30 - n;
773     if (n < 1) n = 1;
774     printf("%.*s%s\n", n, " ", op->help_text);
775     }
776    
777 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
778     printf("trailing white space is removed and blank lines are ignored.\n");
779     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
780 nigel 53
781 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
782 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
783     }
784    
785    
786    
787    
788     /*************************************************
789 nigel 77 * Handle a single-letter, no data option *
790 nigel 53 *************************************************/
791    
792     static int
793     handle_option(int letter, int options)
794     {
795     switch(letter)
796     {
797     case -1: help(); exit(0);
798     case 'c': count_only = TRUE; break;
799     case 'h': filenames = FALSE; break;
800     case 'i': options |= PCRE_CASELESS; break;
801 nigel 77 case 'l': filenames_only = TRUE; break;
802     case 'L': filenames_nomatch_only = TRUE; break;
803     case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
804 nigel 53 case 'n': number = TRUE; break;
805 nigel 77 case 'q': quiet = TRUE; break;
806 nigel 53 case 'r': recurse = TRUE; break;
807     case 's': silent = TRUE; break;
808 nigel 63 case 'u': options |= PCRE_UTF8; break;
809 nigel 53 case 'v': invert = TRUE; break;
810 nigel 77 case 'w': word_match = TRUE; break;
811     case 'x': whole_lines = TRUE; break;
812 nigel 53
813     case 'V':
814     fprintf(stderr, "pcregrep version %s using ", VERSION);
815     fprintf(stderr, "PCRE version %s\n", pcre_version());
816     exit(0);
817     break;
818    
819     default:
820     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
821     exit(usage(2));
822     }
823    
824     return options;
825     }
826    
827    
828    
829    
830     /*************************************************
831 nigel 49 * Main program *
832     *************************************************/
833    
834 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
835    
836 nigel 49 int
837     main(int argc, char **argv)
838     {
839 nigel 53 int i, j;
840 nigel 49 int rc = 1;
841     int options = 0;
842     int errptr;
843     const char *error;
844 nigel 53 BOOL only_one_at_top;
845 nigel 49
846     /* Process the options */
847    
848     for (i = 1; i < argc; i++)
849     {
850 nigel 77 option_item *op = NULL;
851     char *option_data = (char *)""; /* default to keep compiler happy */
852     BOOL longop;
853     BOOL longopwasequals = FALSE;
854    
855 nigel 49 if (argv[i][0] != '-') break;
856 nigel 53
857 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
858     but only if we have previously had -f to define the patterns. */
859 nigel 63
860 nigel 77 if (argv[i][1] == 0)
861     {
862     if (pattern_filename != NULL) break;
863     else exit(usage(2));
864     }
865 nigel 63
866 nigel 77 /* Handle a long name option, or -- to terminate the options */
867 nigel 53
868     if (argv[i][1] == '-')
869 nigel 49 {
870 nigel 77 char *arg = argv[i] + 2;
871     char *argequals = strchr(arg, '=');
872 nigel 53
873 nigel 77 if (*arg == 0) /* -- terminates options */
874 nigel 49 {
875 nigel 77 i++;
876     break; /* out of the options-handling loop */
877 nigel 53 }
878 nigel 49
879 nigel 77 longop = TRUE;
880    
881     /* Some long options have data that follows after =, for example file=name.
882     Some options have variations in the long name spelling: specifically, we
883     allow "regexp" because GNU grep allows it, though I personally go along
884     with Jeff Friedl in preferring "regex" without the "p". These options are
885     entered in the table as "regex(p)". No option is in both these categories,
886     fortunately. */
887    
888 nigel 53 for (op = optionlist; op->one_char != 0; op++)
889     {
890 nigel 77 char *opbra = strchr(op->long_name, '(');
891     char *equals = strchr(op->long_name, '=');
892     if (opbra == NULL) /* Not a (p) case */
893 nigel 53 {
894 nigel 77 if (equals == NULL) /* Not thing=data case */
895     {
896     if (strcmp(arg, op->long_name) == 0) break;
897     }
898     else /* Special case xxx=data */
899     {
900     int oplen = equals - op->long_name;
901     int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
902     if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
903     {
904     option_data = arg + arglen;
905     if (*option_data == '=')
906     {
907     option_data++;
908     longopwasequals = TRUE;
909     }
910     break;
911     }
912     }
913 nigel 53 }
914 nigel 77 else /* Special case xxxx(p) */
915     {
916     char buff1[24];
917     char buff2[24];
918     int baselen = opbra - op->long_name;
919     sprintf(buff1, "%.*s", baselen, op->long_name);
920     sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
921     opbra + 1);
922     if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
923     break;
924     }
925 nigel 53 }
926 nigel 77
927 nigel 53 if (op->one_char == 0)
928     {
929     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
930     exit(usage(2));
931     }
932     }
933 nigel 49
934 nigel 77 /* One-char options; many that have no data may be in a single argument; we
935     continue till we hit the last one or one that needs data. */
936 nigel 53
937     else
938     {
939     char *s = argv[i] + 1;
940 nigel 77 longop = FALSE;
941 nigel 53 while (*s != 0)
942     {
943 nigel 77 for (op = optionlist; op->one_char != 0; op++)
944     { if (*s == op->one_char) break; }
945     if (op->one_char == 0)
946 nigel 53 {
947 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
948     *s, argv[i]);
949     exit(usage(2));
950     }
951     if (op->type != OP_NODATA || s[1] == 0)
952     {
953     option_data = s+1;
954 nigel 53 break;
955     }
956 nigel 77 options = handle_option(*s++, options);
957 nigel 49 }
958     }
959 nigel 77
960     /* At this point we should have op pointing to a matched option */
961    
962     if (op->type == OP_NODATA)
963     options = handle_option(op->one_char, options);
964     else
965     {
966     if (*option_data == 0)
967     {
968     if (i >= argc - 1 || longopwasequals)
969     {
970     fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
971     exit(usage(2));
972     }
973     option_data = argv[++i];
974     }
975    
976     if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else
977     {
978     char *endptr;
979     int n = strtoul(option_data, &endptr, 10);
980     if (*endptr != 0)
981     {
982     if (longop)
983     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",
984     option_data, op->long_name);
985     else
986     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
987     option_data, op->one_char);
988     exit(usage(2));
989     }
990     *((int *)op->dataptr) = n;
991     }
992     }
993 nigel 49 }
994    
995 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
996     for -A and -B. */
997    
998     if (both_context > 0)
999     {
1000     if (after_context == 0) after_context = both_context;
1001     if (before_context == 0) before_context = both_context;
1002     }
1003    
1004 nigel 71 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1005     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1006 nigel 49
1007 nigel 53 if (pattern_list == NULL || hints_list == NULL)
1008     {
1009     fprintf(stderr, "pcregrep: malloc failed\n");
1010     return 2;
1011     }
1012 nigel 49
1013 nigel 53 /* Compile the regular expression(s). */
1014 nigel 49
1015 nigel 53 if (pattern_filename != NULL)
1016 nigel 49 {
1017 nigel 53 FILE *f = fopen(pattern_filename, "r");
1018 nigel 77 char buffer[MBUFTHIRD + 16];
1019     char *rdstart;
1020     int adjust = 0;
1021    
1022 nigel 53 if (f == NULL)
1023     {
1024     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1025     strerror(errno));
1026     return 2;
1027     }
1028 nigel 77
1029     if (whole_lines)
1030 nigel 53 {
1031 nigel 77 strcpy(buffer, "^(?:");
1032     adjust = 4;
1033     }
1034     else if (word_match)
1035     {
1036     strcpy(buffer, "\\b");
1037     adjust = 2;
1038     }
1039    
1040     rdstart = buffer + adjust;
1041     while (fgets(rdstart, MBUFTHIRD, f) != NULL)
1042     {
1043     char *s = rdstart + (int)strlen(rdstart);
1044 nigel 53 if (pattern_count >= MAX_PATTERN_COUNT)
1045     {
1046     fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
1047     MAX_PATTERN_COUNT);
1048     return 2;
1049     }
1050 nigel 77 while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;
1051     if (s == rdstart) continue;
1052     if (whole_lines) strcpy(s, ")$");
1053     else if (word_match)strcpy(s, "\\b");
1054     else *s = 0;
1055 nigel 53 pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
1056     &errptr, NULL);
1057     if (pattern_list[pattern_count++] == NULL)
1058     {
1059     fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
1060 nigel 77 pattern_count, errptr - adjust, error);
1061 nigel 53 return 2;
1062     }
1063     }
1064     fclose(f);
1065 nigel 49 }
1066    
1067 nigel 77 /* If no file name, a single regex must be given inline. */
1068 nigel 49
1069 nigel 53 else
1070 nigel 49 {
1071 nigel 77 char buffer[MBUFTHIRD + 16];
1072     char *pat;
1073     int adjust = 0;
1074    
1075 nigel 63 if (i >= argc) return usage(2);
1076 nigel 77
1077     if (whole_lines)
1078     {
1079     sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);
1080     pat = buffer;
1081     adjust = 4;
1082     }
1083     else if (word_match)
1084     {
1085     sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);
1086     pat = buffer;
1087     adjust = 2;
1088     }
1089     else pat = argv[i++];
1090    
1091     pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);
1092    
1093 nigel 53 if (pattern_list[0] == NULL)
1094     {
1095 nigel 77 fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",
1096     errptr - adjust, error);
1097 nigel 53 return 2;
1098     }
1099     pattern_count++;
1100 nigel 49 }
1101    
1102 nigel 77 /* Study the regular expressions, as we will be running them many times */
1103 nigel 53
1104     for (j = 0; j < pattern_count; j++)
1105     {
1106     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1107     if (error != NULL)
1108     {
1109     char s[16];
1110     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1111     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1112     return 2;
1113     }
1114     }
1115    
1116 nigel 77 /* If there are include or exclude patterns, compile them. */
1117    
1118     if (exclude_pattern != NULL)
1119     {
1120     exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);
1121     if (exclude_compiled == NULL)
1122     {
1123     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1124     errptr, error);
1125     return 2;
1126     }
1127     }
1128    
1129     if (include_pattern != NULL)
1130     {
1131     include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);
1132     if (include_compiled == NULL)
1133     {
1134     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1135     errptr, error);
1136     return 2;
1137     }
1138     }
1139    
1140 nigel 49 /* If there are no further arguments, do the business on stdin and exit */
1141    
1142 nigel 77 if (i >= argc) return pcregrep(stdin,
1143     (filenames_only || filenames_nomatch_only)? stdin_name : NULL);
1144 nigel 49
1145 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
1146     Pass in the fact that there is only one argument at top level - this suppresses
1147 nigel 77 the file name if the argument is not a directory and filenames_only is not set.
1148     */
1149 nigel 49
1150 nigel 53 only_one_at_top = (i == argc - 1);
1151 nigel 49
1152     for (; i < argc; i++)
1153     {
1154 nigel 53 int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
1155 nigel 77 if (frc > 1) rc = frc;
1156     else if (frc == 0 && rc == 1) rc = 0;
1157 nigel 49 }
1158    
1159     return rc;
1160     }
1161    
1162 nigel 77 /* End of pcregrep */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12