/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 85 - (hide annotations) (download)
Sat Feb 24 21:41:13 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 33516 byte(s)
Load pcre-6.4 into code/trunk.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 nigel 77 Copyright (c) 1997-2005 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 nigel 53 #include <ctype.h>
41 nigel 49 #include <stdio.h>
42     #include <string.h>
43     #include <stdlib.h>
44     #include <errno.h>
45 nigel 77
46     #include <sys/types.h>
47     #include <sys/stat.h>
48     #include <unistd.h>
49    
50 nigel 49 #include "config.h"
51     #include "pcre.h"
52    
53     #define FALSE 0
54     #define TRUE 1
55    
56     typedef int BOOL;
57    
58 nigel 85 #define VERSION "4.1 05-Sep-2005"
59 nigel 53 #define MAX_PATTERN_COUNT 100
60 nigel 49
61 nigel 77 #if BUFSIZ > 8192
62     #define MBUFTHIRD BUFSIZ
63     #else
64     #define MBUFTHIRD 8192
65     #endif
66 nigel 49
67 nigel 77
68    
69 nigel 49 /*************************************************
70     * Global variables *
71     *************************************************/
72    
73 nigel 53 static char *pattern_filename = NULL;
74 nigel 77 static char *stdin_name = (char *)"(standard input)";
75 nigel 53 static int pattern_count = 0;
76     static pcre **pattern_list;
77     static pcre_extra **hints_list;
78 nigel 49
79 nigel 77 static char *include_pattern = NULL;
80     static char *exclude_pattern = NULL;
81    
82     static pcre *include_compiled = NULL;
83     static pcre *exclude_compiled = NULL;
84    
85     static int after_context = 0;
86     static int before_context = 0;
87     static int both_context = 0;
88    
89 nigel 49 static BOOL count_only = FALSE;
90 nigel 53 static BOOL filenames = TRUE;
91 nigel 49 static BOOL filenames_only = FALSE;
92 nigel 77 static BOOL filenames_nomatch_only = FALSE;
93     static BOOL hyphenpending = FALSE;
94 nigel 49 static BOOL invert = FALSE;
95 nigel 77 static BOOL multiline = FALSE;
96 nigel 49 static BOOL number = FALSE;
97 nigel 77 static BOOL quiet = FALSE;
98 nigel 53 static BOOL recurse = FALSE;
99 nigel 49 static BOOL silent = FALSE;
100     static BOOL whole_lines = FALSE;
101 nigel 77 static BOOL word_match = FALSE;
102 nigel 49
103 nigel 53 /* Structure for options and list of them */
104 nigel 49
105 nigel 77 enum { OP_NODATA, OP_STRING, OP_NUMBER };
106    
107 nigel 53 typedef struct option_item {
108 nigel 77 int type;
109 nigel 53 int one_char;
110 nigel 77 void *dataptr;
111 nigel 67 const char *long_name;
112     const char *help_text;
113 nigel 53 } option_item;
114 nigel 49
115 nigel 53 static option_item optionlist[] = {
116 nigel 77 { OP_NODATA, -1, NULL, "", " terminate options" },
117     { OP_NODATA, -1, NULL, "help", "display this help and exit" },
118     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
119     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
120     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
121     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
122     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
123     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
124     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
125     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
126     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
127     { OP_STRING, -1, &stdin_name, "label=name", "set name for standard input" },
128     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
129     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
130     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
131     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
132     { OP_STRING, -1, &exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
133     { OP_STRING, -1, &include_pattern, "include=pattern","include matching files when recursing" },
134     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
135     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
136     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
137     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
138     { OP_NODATA, 'w', NULL, "word-regex(p)", "force PATTERN to match only as a word" },
139     { OP_NODATA, 'x', NULL, "line-regex(p)", "force PATTERN to match only whole lines" },
140     { OP_NODATA, 0, NULL, NULL, NULL }
141 nigel 53 };
142    
143    
144     /*************************************************
145     * Functions for directory scanning *
146     *************************************************/
147    
148     /* These functions are defined so that they can be made system specific,
149 nigel 63 although at present the only ones are for Unix, Win32, and for "no directory
150     recursion support". */
151 nigel 53
152    
153     /************* Directory scanning in Unix ***********/
154    
155     #if IS_UNIX
156     #include <sys/types.h>
157     #include <sys/stat.h>
158     #include <dirent.h>
159    
160     typedef DIR directory_type;
161    
162 nigel 67 static int
163 nigel 53 isdirectory(char *filename)
164     {
165     struct stat statbuf;
166     if (stat(filename, &statbuf) < 0)
167     return 0; /* In the expectation that opening as a file will fail */
168     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
169     }
170    
171 nigel 67 static directory_type *
172 nigel 53 opendirectory(char *filename)
173     {
174     return opendir(filename);
175     }
176    
177 nigel 67 static char *
178 nigel 53 readdirectory(directory_type *dir)
179     {
180     for (;;)
181     {
182     struct dirent *dent = readdir(dir);
183     if (dent == NULL) return NULL;
184     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
185     return dent->d_name;
186     }
187     return NULL; /* Keep compiler happy; never executed */
188     }
189    
190 nigel 67 static void
191 nigel 53 closedirectory(directory_type *dir)
192     {
193     closedir(dir);
194     }
195    
196    
197 nigel 63 /************* Directory scanning in Win32 ***********/
198 nigel 53
199 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
200 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
201     when it did not exist. */
202 nigel 53
203 nigel 63
204     #elif HAVE_WIN32API
205    
206     #ifndef STRICT
207     # define STRICT
208     #endif
209     #ifndef WIN32_LEAN_AND_MEAN
210     # define WIN32_LEAN_AND_MEAN
211     #endif
212 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
213     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
214     #endif
215    
216 nigel 63 #include <windows.h>
217    
218     typedef struct directory_type
219     {
220     HANDLE handle;
221     BOOL first;
222     WIN32_FIND_DATA data;
223     } directory_type;
224    
225     int
226     isdirectory(char *filename)
227     {
228     DWORD attr = GetFileAttributes(filename);
229     if (attr == INVALID_FILE_ATTRIBUTES)
230     return 0;
231     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
232     }
233    
234     directory_type *
235     opendirectory(char *filename)
236     {
237     size_t len;
238     char *pattern;
239     directory_type *dir;
240     DWORD err;
241     len = strlen(filename);
242     pattern = (char *) malloc(len + 3);
243     dir = (directory_type *) malloc(sizeof(*dir));
244     if ((pattern == NULL) || (dir == NULL))
245     {
246     fprintf(stderr, "pcregrep: malloc failed\n");
247     exit(2);
248     }
249     memcpy(pattern, filename, len);
250     memcpy(&(pattern[len]), "\\*", 3);
251     dir->handle = FindFirstFile(pattern, &(dir->data));
252     if (dir->handle != INVALID_HANDLE_VALUE)
253     {
254     free(pattern);
255     dir->first = TRUE;
256     return dir;
257     }
258     err = GetLastError();
259     free(pattern);
260     free(dir);
261     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
262     return NULL;
263     }
264    
265     char *
266     readdirectory(directory_type *dir)
267     {
268     for (;;)
269     {
270     if (!dir->first)
271     {
272     if (!FindNextFile(dir->handle, &(dir->data)))
273     return NULL;
274     }
275     else
276     {
277     dir->first = FALSE;
278     }
279     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
280     return dir->data.cFileName;
281     }
282     #ifndef _MSC_VER
283     return NULL; /* Keep compiler happy; never executed */
284     #endif
285     }
286    
287     void
288     closedirectory(directory_type *dir)
289     {
290     FindClose(dir->handle);
291     free(dir);
292     }
293    
294    
295 nigel 53 /************* Directory scanning when we can't do it ***********/
296    
297     /* The type is void, and apart from isdirectory(), the functions do nothing. */
298    
299 nigel 63 #else
300    
301 nigel 53 typedef void directory_type;
302    
303     int isdirectory(char *filename) { return FALSE; }
304     directory_type * opendirectory(char *filename) {}
305     char *readdirectory(directory_type *dir) {}
306     void closedirectory(directory_type *dir) {}
307    
308     #endif
309    
310    
311    
312 nigel 49 #if ! HAVE_STRERROR
313     /*************************************************
314     * Provide strerror() for non-ANSI libraries *
315     *************************************************/
316    
317     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
318     in their libraries, but can provide the same facility by this simple
319     alternative function. */
320    
321     extern int sys_nerr;
322     extern char *sys_errlist[];
323    
324     char *
325     strerror(int n)
326     {
327     if (n < 0 || n >= sys_nerr) return "unknown error number";
328     return sys_errlist[n];
329     }
330     #endif /* HAVE_STRERROR */
331    
332    
333    
334     /*************************************************
335 nigel 77 * Print the previous "after" lines *
336 nigel 49 *************************************************/
337    
338 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
339     and at the end of the file.
340    
341     Arguments:
342     lastmatchnumber the number of the last matching line, plus one
343     lastmatchrestart where we restarted after the last match
344     endptr end of available data
345     printname filename for printing
346    
347     Returns: nothing
348     */
349    
350     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
351     char *endptr, char *printname)
352     {
353     if (after_context > 0 && lastmatchnumber > 0)
354     {
355     int count = 0;
356     while (lastmatchrestart < endptr && count++ < after_context)
357     {
358     char *pp = lastmatchrestart;
359     if (printname != NULL) fprintf(stdout, "%s-", printname);
360     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
361     while (*pp != '\n') pp++;
362     fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
363     lastmatchrestart = pp + 1;
364     }
365     hyphenpending = TRUE;
366     }
367     }
368    
369    
370    
371     /*************************************************
372     * Grep an individual file *
373     *************************************************/
374    
375     /* This is called from grep_or_recurse() below. It uses a buffer that is three
376     times the value of MBUFTHIRD. The matching point is never allowed to stray into
377     the top third of the buffer, thus keeping more of the file available for
378     context printing or for multiline scanning. For large files, the pointer will
379     be in the middle third most of the time, so the bottom third is available for
380     "before" context printing.
381    
382     Arguments:
383     in the fopened FILE stream
384     printname the file name if it is to be printed for each match
385     or NULL if the file name is not to be printed
386     it cannot be NULL if filenames[_nomatch]_only is set
387    
388     Returns: 0 if there was at least one match
389     1 otherwise (no matches)
390     */
391    
392 nigel 49 static int
393 nigel 77 pcregrep(FILE *in, char *printname)
394 nigel 49 {
395     int rc = 1;
396 nigel 77 int linenumber = 1;
397     int lastmatchnumber = 0;
398 nigel 49 int count = 0;
399     int offsets[99];
400 nigel 77 char *lastmatchrestart = NULL;
401     char buffer[3*MBUFTHIRD];
402     char *ptr = buffer;
403     char *endptr;
404     size_t bufflength;
405     BOOL endhyphenpending = FALSE;
406 nigel 49
407 nigel 77 /* Do the first read into the start of the buffer and set up the pointer to
408     end of what we have. */
409    
410     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
411     endptr = buffer + bufflength;
412    
413     /* Loop while the current pointer is not at the end of the file. For large
414     files, endptr will be at the end of the buffer when we are in the middle of the
415     file, but ptr will never get there, because as soon as it gets over 2/3 of the
416     way, the buffer is shifted left and re-filled. */
417    
418     while (ptr < endptr)
419 nigel 49 {
420 nigel 77 int i;
421 nigel 53 BOOL match = FALSE;
422 nigel 77 char *t = ptr;
423     size_t length, linelength;
424 nigel 49
425 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
426     of the subject string to pass to pcre_exec(). In multiline mode, it is the
427     length remainder of the data in the buffer. Otherwise, it is the length of
428     the next line. After matching, we always advance by the length of the next
429     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
430     that any match is constrained to be in the first line. */
431    
432     linelength = 0;
433     while (t < endptr && *t++ != '\n') linelength++;
434     length = multiline? endptr - ptr : linelength;
435    
436     /* Run through all the patterns until one matches. Note that we don't include
437     the final newline in the subject string. */
438    
439 nigel 53 for (i = 0; !match && i < pattern_count; i++)
440     {
441 nigel 77 match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
442 nigel 53 offsets, 99) >= 0;
443     }
444 nigel 49
445 nigel 77 /* If it's a match or a not-match (as required), print what's wanted. */
446    
447 nigel 49 if (match != invert)
448     {
449 nigel 77 BOOL hyphenprinted = FALSE;
450    
451     if (filenames_nomatch_only) return 1;
452    
453 nigel 49 if (count_only) count++;
454    
455     else if (filenames_only)
456     {
457 nigel 77 fprintf(stdout, "%s\n", printname);
458 nigel 49 return 0;
459     }
460    
461 nigel 77 else if (quiet) return 0;
462 nigel 49
463     else
464     {
465 nigel 77 /* See if there is a requirement to print some "after" lines from a
466     previous match. We never print any overlaps. */
467    
468     if (after_context > 0 && lastmatchnumber > 0)
469     {
470     int linecount = 0;
471     char *p = lastmatchrestart;
472    
473     while (p < ptr && linecount < after_context)
474     {
475     while (*p != '\n') p++;
476     p++;
477     linecount++;
478     }
479    
480     /* It is important to advance lastmatchrestart during this printing so
481     that it interacts correctly with any "before" printing below. */
482    
483     while (lastmatchrestart < p)
484     {
485     char *pp = lastmatchrestart;
486     if (printname != NULL) fprintf(stdout, "%s-", printname);
487     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
488     while (*pp != '\n') pp++;
489     fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
490     lastmatchrestart = pp + 1;
491     }
492     if (lastmatchrestart != ptr) hyphenpending = TRUE;
493     }
494    
495     /* If there were non-contiguous lines printed above, insert hyphens. */
496    
497     if (hyphenpending)
498     {
499     fprintf(stdout, "--\n");
500     hyphenpending = FALSE;
501     hyphenprinted = TRUE;
502     }
503    
504     /* See if there is a requirement to print some "before" lines for this
505     match. Again, don't print overlaps. */
506    
507     if (before_context > 0)
508     {
509     int linecount = 0;
510     char *p = ptr;
511    
512     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
513     linecount++ < before_context)
514     {
515     p--;
516     while (p > buffer && p[-1] != '\n') p--;
517     }
518    
519     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
520     fprintf(stdout, "--\n");
521    
522     while (p < ptr)
523     {
524     char *pp = p;
525     if (printname != NULL) fprintf(stdout, "%s-", printname);
526     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
527     while (*pp != '\n') pp++;
528     fprintf(stdout, "%.*s", pp - p + 1, p);
529     p = pp + 1;
530     }
531     }
532    
533     /* Now print the matching line(s); ensure we set hyphenpending at the end
534 nigel 85 of the file if any context lines are being output. */
535 nigel 77
536 nigel 85 if (after_context > 0 || before_context > 0)
537     endhyphenpending = TRUE;
538    
539 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
540 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
541 nigel 77
542     /* In multiline mode, we want to print to the end of the line in which
543     the end of the matched string is found, so we adjust linelength and the
544     line number appropriately. Because the PCRE_FIRSTLINE option is set, the
545     start of the match will always be before the first \n character. */
546    
547     if (multiline)
548     {
549     char *endmatch = ptr + offsets[1];
550     t = ptr;
551     while (t < endmatch) { if (*t++ == '\n') linenumber++; }
552     while (endmatch < endptr && *endmatch != '\n') endmatch++;
553     linelength = endmatch - ptr;
554     }
555    
556     fprintf(stdout, "%.*s\n", linelength, ptr);
557 nigel 49 }
558    
559 nigel 77 rc = 0; /* Had some success */
560    
561     /* Remember where the last match happened for after_context. We remember
562     where we are about to restart, and that line's number. */
563    
564     lastmatchrestart = ptr + linelength + 1;
565     lastmatchnumber = linenumber + 1;
566 nigel 49 }
567 nigel 77
568     /* Advance to after the newline and increment the line number. */
569    
570     ptr += linelength + 1;
571     linenumber++;
572    
573     /* If we haven't yet reached the end of the file (the buffer is full), and
574     the current point is in the top 1/3 of the buffer, slide the buffer down by
575     1/3 and refill it. Before we do this, if some unprinted "after" lines are
576     about to be lost, print them. */
577    
578     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
579     {
580     if (after_context > 0 &&
581     lastmatchnumber > 0 &&
582     lastmatchrestart < buffer + MBUFTHIRD)
583     {
584     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
585     lastmatchnumber = 0;
586     }
587    
588     /* Now do the shuffle */
589    
590     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
591     ptr -= MBUFTHIRD;
592     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
593     endptr = buffer + bufflength;
594    
595     /* Adjust any last match point */
596    
597     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
598     }
599     } /* Loop through the whole file */
600    
601     /* End of file; print final "after" lines if wanted; do_after_lines sets
602     hyphenpending if it prints something. */
603    
604     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
605     hyphenpending |= endhyphenpending;
606    
607     /* Print the file name if we are looking for those without matches and there
608     were none. If we found a match, we won't have got this far. */
609    
610     if (filenames_nomatch_only)
611     {
612     fprintf(stdout, "%s\n", printname);
613     return 0;
614 nigel 49 }
615    
616 nigel 77 /* Print the match count if wanted */
617    
618 nigel 49 if (count_only)
619     {
620 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
621 nigel 49 fprintf(stdout, "%d\n", count);
622     }
623    
624     return rc;
625     }
626    
627    
628    
629     /*************************************************
630 nigel 53 * Grep a file or recurse into a directory *
631     *************************************************/
632    
633 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
634     recursing; if it's a file, grep it.
635    
636     Arguments:
637     pathname the path to investigate
638     dir_recurse TRUE if recursing is wanted (-r)
639     show_filenames TRUE if file names are wanted for multiple files, except
640     for the only file at top level when not filenames_only
641     only_one_at_top TRUE if the path is the only one at toplevel
642    
643     Returns: 0 if there was at least one match
644     1 if there were no matches
645     2 there was some kind of error
646    
647     However, file opening failures are suppressed if "silent" is set.
648     */
649    
650 nigel 53 static int
651 nigel 77 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,
652 nigel 53 BOOL only_one_at_top)
653     {
654     int rc = 1;
655     int sep;
656     FILE *in;
657 nigel 77 char *printname;
658 nigel 53
659 nigel 77 /* If the file name is "-" we scan stdin */
660 nigel 53
661 nigel 77 if (strcmp(pathname, "-") == 0)
662 nigel 53 {
663 nigel 77 return pcregrep(stdin,
664     (filenames_only || filenames_nomatch_only ||
665     (show_filenames && !only_one_at_top))?
666     stdin_name : NULL);
667     }
668    
669     /* If the file is a directory and we are recursing, scan each file within it,
670     subject to any include or exclude patterns that were set. The scanning code is
671     localized so it can be made system-specific. */
672    
673     if ((sep = isdirectory(pathname)) != 0 && dir_recurse)
674     {
675 nigel 53 char buffer[1024];
676     char *nextfile;
677 nigel 77 directory_type *dir = opendirectory(pathname);
678 nigel 53
679     if (dir == NULL)
680     {
681 nigel 77 if (!silent)
682     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
683     strerror(errno));
684 nigel 53 return 2;
685     }
686    
687     while ((nextfile = readdirectory(dir)) != NULL)
688     {
689 nigel 77 int frc, blen;
690     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
691     blen = strlen(buffer);
692    
693     if (exclude_compiled != NULL &&
694     pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
695     continue;
696    
697     if (include_compiled != NULL &&
698     pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
699     continue;
700    
701 nigel 67 frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);
702 nigel 77 if (frc > 1) rc = frc;
703     else if (frc == 0 && rc == 1) rc = 0;
704 nigel 53 }
705    
706     closedirectory(dir);
707     return rc;
708     }
709    
710     /* If the file is not a directory, or we are not recursing, scan it. If this is
711 nigel 63 the first and only argument at top level, we don't show the file name (unless
712     we are only showing the file name). Otherwise, control is via the
713     show_filenames variable. */
714 nigel 53
715 nigel 77 in = fopen(pathname, "r");
716 nigel 53 if (in == NULL)
717     {
718 nigel 77 if (!silent)
719     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
720     strerror(errno));
721 nigel 53 return 2;
722     }
723    
724 nigel 77 printname = (filenames_only || filenames_nomatch_only ||
725     (show_filenames && !only_one_at_top))? pathname : NULL;
726    
727     rc = pcregrep(in, printname);
728    
729 nigel 53 fclose(in);
730     return rc;
731     }
732    
733    
734    
735    
736     /*************************************************
737 nigel 49 * Usage function *
738     *************************************************/
739    
740     static int
741     usage(int rc)
742     {
743 nigel 77 fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");
744 nigel 53 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
745 nigel 49 return rc;
746     }
747    
748    
749    
750    
751     /*************************************************
752 nigel 53 * Help function *
753     *************************************************/
754    
755     static void
756     help(void)
757     {
758     option_item *op;
759    
760 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
761 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
762 nigel 63 printf("PATTERN must be present if -f is not used.\n");
763 nigel 77 printf("\"-\" can be used as a file name to mean STDIN.\n");
764 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
765    
766     printf("Options:\n");
767    
768     for (op = optionlist; op->one_char != 0; op++)
769     {
770     int n;
771     char s[4];
772     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
773     printf(" %s --%s%n", s, op->long_name, &n);
774     n = 30 - n;
775     if (n < 1) n = 1;
776     printf("%.*s%s\n", n, " ", op->help_text);
777     }
778    
779 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
780     printf("trailing white space is removed and blank lines are ignored.\n");
781     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
782 nigel 53
783 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
784 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
785     }
786    
787    
788    
789    
790     /*************************************************
791 nigel 77 * Handle a single-letter, no data option *
792 nigel 53 *************************************************/
793    
794     static int
795     handle_option(int letter, int options)
796     {
797     switch(letter)
798     {
799     case -1: help(); exit(0);
800     case 'c': count_only = TRUE; break;
801     case 'h': filenames = FALSE; break;
802     case 'i': options |= PCRE_CASELESS; break;
803 nigel 77 case 'l': filenames_only = TRUE; break;
804     case 'L': filenames_nomatch_only = TRUE; break;
805     case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
806 nigel 53 case 'n': number = TRUE; break;
807 nigel 77 case 'q': quiet = TRUE; break;
808 nigel 53 case 'r': recurse = TRUE; break;
809     case 's': silent = TRUE; break;
810 nigel 63 case 'u': options |= PCRE_UTF8; break;
811 nigel 53 case 'v': invert = TRUE; break;
812 nigel 77 case 'w': word_match = TRUE; break;
813     case 'x': whole_lines = TRUE; break;
814 nigel 53
815     case 'V':
816     fprintf(stderr, "pcregrep version %s using ", VERSION);
817     fprintf(stderr, "PCRE version %s\n", pcre_version());
818     exit(0);
819     break;
820    
821     default:
822     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
823     exit(usage(2));
824     }
825    
826     return options;
827     }
828    
829    
830    
831    
832     /*************************************************
833 nigel 49 * Main program *
834     *************************************************/
835    
836 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
837    
838 nigel 49 int
839     main(int argc, char **argv)
840     {
841 nigel 53 int i, j;
842 nigel 49 int rc = 1;
843     int options = 0;
844     int errptr;
845     const char *error;
846 nigel 53 BOOL only_one_at_top;
847 nigel 49
848     /* Process the options */
849    
850     for (i = 1; i < argc; i++)
851     {
852 nigel 77 option_item *op = NULL;
853     char *option_data = (char *)""; /* default to keep compiler happy */
854     BOOL longop;
855     BOOL longopwasequals = FALSE;
856    
857 nigel 49 if (argv[i][0] != '-') break;
858 nigel 53
859 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
860     but only if we have previously had -f to define the patterns. */
861 nigel 63
862 nigel 77 if (argv[i][1] == 0)
863     {
864     if (pattern_filename != NULL) break;
865     else exit(usage(2));
866     }
867 nigel 63
868 nigel 77 /* Handle a long name option, or -- to terminate the options */
869 nigel 53
870     if (argv[i][1] == '-')
871 nigel 49 {
872 nigel 77 char *arg = argv[i] + 2;
873     char *argequals = strchr(arg, '=');
874 nigel 53
875 nigel 77 if (*arg == 0) /* -- terminates options */
876 nigel 49 {
877 nigel 77 i++;
878     break; /* out of the options-handling loop */
879 nigel 53 }
880 nigel 49
881 nigel 77 longop = TRUE;
882    
883     /* Some long options have data that follows after =, for example file=name.
884     Some options have variations in the long name spelling: specifically, we
885     allow "regexp" because GNU grep allows it, though I personally go along
886     with Jeff Friedl in preferring "regex" without the "p". These options are
887     entered in the table as "regex(p)". No option is in both these categories,
888     fortunately. */
889    
890 nigel 53 for (op = optionlist; op->one_char != 0; op++)
891     {
892 nigel 77 char *opbra = strchr(op->long_name, '(');
893     char *equals = strchr(op->long_name, '=');
894     if (opbra == NULL) /* Not a (p) case */
895 nigel 53 {
896 nigel 77 if (equals == NULL) /* Not thing=data case */
897     {
898     if (strcmp(arg, op->long_name) == 0) break;
899     }
900     else /* Special case xxx=data */
901     {
902     int oplen = equals - op->long_name;
903     int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
904     if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
905     {
906     option_data = arg + arglen;
907     if (*option_data == '=')
908     {
909     option_data++;
910     longopwasequals = TRUE;
911     }
912     break;
913     }
914     }
915 nigel 53 }
916 nigel 77 else /* Special case xxxx(p) */
917     {
918     char buff1[24];
919     char buff2[24];
920     int baselen = opbra - op->long_name;
921     sprintf(buff1, "%.*s", baselen, op->long_name);
922     sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
923     opbra + 1);
924     if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
925     break;
926     }
927 nigel 53 }
928 nigel 77
929 nigel 53 if (op->one_char == 0)
930     {
931     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
932     exit(usage(2));
933     }
934     }
935 nigel 49
936 nigel 77 /* One-char options; many that have no data may be in a single argument; we
937     continue till we hit the last one or one that needs data. */
938 nigel 53
939     else
940     {
941     char *s = argv[i] + 1;
942 nigel 77 longop = FALSE;
943 nigel 53 while (*s != 0)
944     {
945 nigel 77 for (op = optionlist; op->one_char != 0; op++)
946     { if (*s == op->one_char) break; }
947     if (op->one_char == 0)
948 nigel 53 {
949 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
950     *s, argv[i]);
951     exit(usage(2));
952     }
953     if (op->type != OP_NODATA || s[1] == 0)
954     {
955     option_data = s+1;
956 nigel 53 break;
957     }
958 nigel 77 options = handle_option(*s++, options);
959 nigel 49 }
960     }
961 nigel 77
962     /* At this point we should have op pointing to a matched option */
963    
964     if (op->type == OP_NODATA)
965     options = handle_option(op->one_char, options);
966     else
967     {
968     if (*option_data == 0)
969     {
970     if (i >= argc - 1 || longopwasequals)
971     {
972     fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
973     exit(usage(2));
974     }
975     option_data = argv[++i];
976     }
977    
978     if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else
979     {
980     char *endptr;
981     int n = strtoul(option_data, &endptr, 10);
982     if (*endptr != 0)
983     {
984     if (longop)
985     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",
986     option_data, op->long_name);
987     else
988     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
989     option_data, op->one_char);
990     exit(usage(2));
991     }
992     *((int *)op->dataptr) = n;
993     }
994     }
995 nigel 49 }
996    
997 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
998     for -A and -B. */
999    
1000     if (both_context > 0)
1001     {
1002     if (after_context == 0) after_context = both_context;
1003     if (before_context == 0) before_context = both_context;
1004     }
1005    
1006 nigel 71 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1007     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1008 nigel 49
1009 nigel 53 if (pattern_list == NULL || hints_list == NULL)
1010     {
1011     fprintf(stderr, "pcregrep: malloc failed\n");
1012     return 2;
1013     }
1014 nigel 49
1015 nigel 53 /* Compile the regular expression(s). */
1016 nigel 49
1017 nigel 53 if (pattern_filename != NULL)
1018 nigel 49 {
1019 nigel 53 FILE *f = fopen(pattern_filename, "r");
1020 nigel 77 char buffer[MBUFTHIRD + 16];
1021     char *rdstart;
1022     int adjust = 0;
1023    
1024 nigel 53 if (f == NULL)
1025     {
1026     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1027     strerror(errno));
1028     return 2;
1029     }
1030 nigel 77
1031     if (whole_lines)
1032 nigel 53 {
1033 nigel 77 strcpy(buffer, "^(?:");
1034     adjust = 4;
1035     }
1036     else if (word_match)
1037     {
1038     strcpy(buffer, "\\b");
1039     adjust = 2;
1040     }
1041    
1042     rdstart = buffer + adjust;
1043     while (fgets(rdstart, MBUFTHIRD, f) != NULL)
1044     {
1045     char *s = rdstart + (int)strlen(rdstart);
1046 nigel 53 if (pattern_count >= MAX_PATTERN_COUNT)
1047     {
1048     fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
1049     MAX_PATTERN_COUNT);
1050     return 2;
1051     }
1052 nigel 77 while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;
1053     if (s == rdstart) continue;
1054     if (whole_lines) strcpy(s, ")$");
1055     else if (word_match)strcpy(s, "\\b");
1056     else *s = 0;
1057 nigel 53 pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
1058     &errptr, NULL);
1059     if (pattern_list[pattern_count++] == NULL)
1060     {
1061     fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
1062 nigel 77 pattern_count, errptr - adjust, error);
1063 nigel 53 return 2;
1064     }
1065     }
1066     fclose(f);
1067 nigel 49 }
1068    
1069 nigel 77 /* If no file name, a single regex must be given inline. */
1070 nigel 49
1071 nigel 53 else
1072 nigel 49 {
1073 nigel 77 char buffer[MBUFTHIRD + 16];
1074     char *pat;
1075     int adjust = 0;
1076    
1077 nigel 63 if (i >= argc) return usage(2);
1078 nigel 77
1079     if (whole_lines)
1080     {
1081     sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);
1082     pat = buffer;
1083     adjust = 4;
1084     }
1085     else if (word_match)
1086     {
1087     sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);
1088     pat = buffer;
1089     adjust = 2;
1090     }
1091     else pat = argv[i++];
1092    
1093     pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);
1094    
1095 nigel 53 if (pattern_list[0] == NULL)
1096     {
1097 nigel 77 fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",
1098     errptr - adjust, error);
1099 nigel 53 return 2;
1100     }
1101     pattern_count++;
1102 nigel 49 }
1103    
1104 nigel 77 /* Study the regular expressions, as we will be running them many times */
1105 nigel 53
1106     for (j = 0; j < pattern_count; j++)
1107     {
1108     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1109     if (error != NULL)
1110     {
1111     char s[16];
1112     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1113     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1114     return 2;
1115     }
1116     }
1117    
1118 nigel 77 /* If there are include or exclude patterns, compile them. */
1119    
1120     if (exclude_pattern != NULL)
1121     {
1122     exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);
1123     if (exclude_compiled == NULL)
1124     {
1125     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1126     errptr, error);
1127     return 2;
1128     }
1129     }
1130    
1131     if (include_pattern != NULL)
1132     {
1133     include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);
1134     if (include_compiled == NULL)
1135     {
1136     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1137     errptr, error);
1138     return 2;
1139     }
1140     }
1141    
1142 nigel 49 /* If there are no further arguments, do the business on stdin and exit */
1143    
1144 nigel 77 if (i >= argc) return pcregrep(stdin,
1145     (filenames_only || filenames_nomatch_only)? stdin_name : NULL);
1146 nigel 49
1147 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
1148     Pass in the fact that there is only one argument at top level - this suppresses
1149 nigel 77 the file name if the argument is not a directory and filenames_only is not set.
1150     */
1151 nigel 49
1152 nigel 53 only_one_at_top = (i == argc - 1);
1153 nigel 49
1154     for (; i < argc; i++)
1155     {
1156 nigel 53 int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
1157 nigel 77 if (frc > 1) rc = frc;
1158     else if (frc == 0 && rc == 1) rc = 0;
1159 nigel 49 }
1160    
1161     return rc;
1162     }
1163    
1164 nigel 77 /* End of pcregrep */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12