/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 89 - (hide annotations) (download)
Sat Feb 24 21:41:27 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 50765 byte(s)
Load pcre-6.6 into code/trunk.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 nigel 87 Copyright (c) 1997-2006 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 nigel 53 #include <ctype.h>
41 nigel 87 #include <locale.h>
42 nigel 49 #include <stdio.h>
43     #include <string.h>
44     #include <stdlib.h>
45     #include <errno.h>
46 nigel 77
47     #include <sys/types.h>
48     #include <sys/stat.h>
49     #include <unistd.h>
50    
51 nigel 49 #include "config.h"
52     #include "pcre.h"
53    
54     #define FALSE 0
55     #define TRUE 1
56    
57     typedef int BOOL;
58    
59 nigel 87 #define VERSION "4.2 09-Jan-2006"
60 nigel 53 #define MAX_PATTERN_COUNT 100
61 nigel 49
62 nigel 77 #if BUFSIZ > 8192
63     #define MBUFTHIRD BUFSIZ
64     #else
65     #define MBUFTHIRD 8192
66     #endif
67 nigel 49
68 nigel 77
69 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
70     output. The order is important; it is assumed that a file name is wanted for
71     all values greater than FN_DEFAULT. */
72 nigel 77
73 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
74    
75     /* Actions for the -d and -D options */
76    
77     enum { dee_READ, dee_SKIP, dee_RECURSE };
78     enum { DEE_READ, DEE_SKIP };
79    
80     /* Actions for special processing options (flag bits) */
81    
82     #define PO_WORD_MATCH 0x0001
83     #define PO_LINE_MATCH 0x0002
84     #define PO_FIXED_STRINGS 0x0004
85    
86    
87    
88 nigel 49 /*************************************************
89     * Global variables *
90     *************************************************/
91    
92 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
93     regular code. */
94    
95     #ifdef JFRIEDL_DEBUG
96     static int S_arg = -1;
97 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
98     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
99     static const char *jfriedl_prefix = "";
100     static const char *jfriedl_postfix = "";
101 nigel 87 #endif
102    
103     static char *colour_string = (char *)"1;31";
104     static char *colour_option = NULL;
105     static char *dee_option = NULL;
106     static char *DEE_option = NULL;
107 nigel 53 static char *pattern_filename = NULL;
108 nigel 77 static char *stdin_name = (char *)"(standard input)";
109 nigel 87 static char *locale = NULL;
110    
111     static const unsigned char *pcretables = NULL;
112    
113 nigel 53 static int pattern_count = 0;
114     static pcre **pattern_list;
115     static pcre_extra **hints_list;
116 nigel 49
117 nigel 77 static char *include_pattern = NULL;
118     static char *exclude_pattern = NULL;
119    
120     static pcre *include_compiled = NULL;
121     static pcre *exclude_compiled = NULL;
122    
123     static int after_context = 0;
124     static int before_context = 0;
125     static int both_context = 0;
126 nigel 87 static int dee_action = dee_READ;
127     static int DEE_action = DEE_READ;
128     static int error_count = 0;
129     static int filenames = FN_DEFAULT;
130     static int process_options = 0;
131 nigel 77
132 nigel 49 static BOOL count_only = FALSE;
133 nigel 87 static BOOL do_colour = FALSE;
134 nigel 77 static BOOL hyphenpending = FALSE;
135 nigel 49 static BOOL invert = FALSE;
136 nigel 77 static BOOL multiline = FALSE;
137 nigel 49 static BOOL number = FALSE;
138 nigel 87 static BOOL only_matching = FALSE;
139 nigel 77 static BOOL quiet = FALSE;
140 nigel 49 static BOOL silent = FALSE;
141    
142 nigel 53 /* Structure for options and list of them */
143 nigel 49
144 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
145     OP_PATLIST };
146 nigel 77
147 nigel 53 typedef struct option_item {
148 nigel 77 int type;
149 nigel 53 int one_char;
150 nigel 77 void *dataptr;
151 nigel 67 const char *long_name;
152     const char *help_text;
153 nigel 53 } option_item;
154 nigel 49
155 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
156     used to identify them. */
157    
158     #define N_COLOUR (-1)
159     #define N_EXCLUDE (-2)
160     #define N_HELP (-3)
161     #define N_INCLUDE (-4)
162     #define N_LABEL (-5)
163     #define N_LOCALE (-6)
164     #define N_NULL (-7)
165    
166 nigel 53 static option_item optionlist[] = {
167 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
168     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
169     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
170     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
171     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
172     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
173     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
174     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
175     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
176     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
177     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
178     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
179     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
180     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
181     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
182     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
183     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
184     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
185     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
186     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
187     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
188     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
189     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
190     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
191     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
192     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
193     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
194     #ifdef JFRIEDL_DEBUG
195     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
196     #endif
197     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
198     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
199     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
200     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
201     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
202     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
203     { OP_NODATA, 0, NULL, NULL, NULL }
204 nigel 53 };
205    
206 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
207     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
208     that the combination of -w and -x has the same effect as -x on its own, so we
209     can treat them as the same. */
210 nigel 53
211 nigel 87 static const char *prefix[] = {
212     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
213    
214     static const char *suffix[] = {
215     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
216    
217    
218    
219 nigel 53 /*************************************************
220 nigel 87 * OS-specific functions *
221 nigel 53 *************************************************/
222    
223     /* These functions are defined so that they can be made system specific,
224 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
225 nigel 53
226    
227     /************* Directory scanning in Unix ***********/
228    
229     #if IS_UNIX
230     #include <sys/types.h>
231     #include <sys/stat.h>
232     #include <dirent.h>
233    
234     typedef DIR directory_type;
235    
236 nigel 67 static int
237 nigel 53 isdirectory(char *filename)
238     {
239     struct stat statbuf;
240     if (stat(filename, &statbuf) < 0)
241     return 0; /* In the expectation that opening as a file will fail */
242     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
243     }
244    
245 nigel 67 static directory_type *
246 nigel 53 opendirectory(char *filename)
247     {
248     return opendir(filename);
249     }
250    
251 nigel 67 static char *
252 nigel 53 readdirectory(directory_type *dir)
253     {
254     for (;;)
255     {
256     struct dirent *dent = readdir(dir);
257     if (dent == NULL) return NULL;
258     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
259     return dent->d_name;
260     }
261     return NULL; /* Keep compiler happy; never executed */
262     }
263    
264 nigel 67 static void
265 nigel 53 closedirectory(directory_type *dir)
266     {
267     closedir(dir);
268     }
269    
270    
271 nigel 87 /************* Test for regular file in Unix **********/
272    
273     static int
274     isregfile(char *filename)
275     {
276     struct stat statbuf;
277     if (stat(filename, &statbuf) < 0)
278     return 1; /* In the expectation that opening as a file will fail */
279     return (statbuf.st_mode & S_IFMT) == S_IFREG;
280     }
281    
282    
283     /************* Test stdout for being a terminal in Unix **********/
284    
285     static BOOL
286     is_stdout_tty(void)
287     {
288     return isatty(fileno(stdout));
289     }
290    
291    
292 nigel 63 /************* Directory scanning in Win32 ***********/
293 nigel 53
294 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
295 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
296     when it did not exist. */
297 nigel 53
298 nigel 63
299     #elif HAVE_WIN32API
300    
301     #ifndef STRICT
302     # define STRICT
303     #endif
304     #ifndef WIN32_LEAN_AND_MEAN
305     # define WIN32_LEAN_AND_MEAN
306     #endif
307 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
308     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
309     #endif
310    
311 nigel 63 #include <windows.h>
312    
313     typedef struct directory_type
314     {
315     HANDLE handle;
316     BOOL first;
317     WIN32_FIND_DATA data;
318     } directory_type;
319    
320     int
321     isdirectory(char *filename)
322     {
323     DWORD attr = GetFileAttributes(filename);
324     if (attr == INVALID_FILE_ATTRIBUTES)
325     return 0;
326     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
327     }
328    
329     directory_type *
330     opendirectory(char *filename)
331     {
332     size_t len;
333     char *pattern;
334     directory_type *dir;
335     DWORD err;
336     len = strlen(filename);
337     pattern = (char *) malloc(len + 3);
338     dir = (directory_type *) malloc(sizeof(*dir));
339     if ((pattern == NULL) || (dir == NULL))
340     {
341     fprintf(stderr, "pcregrep: malloc failed\n");
342     exit(2);
343     }
344     memcpy(pattern, filename, len);
345     memcpy(&(pattern[len]), "\\*", 3);
346     dir->handle = FindFirstFile(pattern, &(dir->data));
347     if (dir->handle != INVALID_HANDLE_VALUE)
348     {
349     free(pattern);
350     dir->first = TRUE;
351     return dir;
352     }
353     err = GetLastError();
354     free(pattern);
355     free(dir);
356     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
357     return NULL;
358     }
359    
360     char *
361     readdirectory(directory_type *dir)
362     {
363     for (;;)
364     {
365     if (!dir->first)
366     {
367     if (!FindNextFile(dir->handle, &(dir->data)))
368     return NULL;
369     }
370     else
371     {
372     dir->first = FALSE;
373     }
374     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
375     return dir->data.cFileName;
376     }
377     #ifndef _MSC_VER
378     return NULL; /* Keep compiler happy; never executed */
379     #endif
380     }
381    
382     void
383     closedirectory(directory_type *dir)
384     {
385     FindClose(dir->handle);
386     free(dir);
387     }
388    
389    
390 nigel 87 /************* Test for regular file in Win32 **********/
391    
392     /* I don't know how to do this, or if it can be done; assume all paths are
393     regular if they are not directories. */
394    
395     int isregfile(char *filename)
396     {
397     return !isdirectory(filename)
398     }
399    
400    
401     /************* Test stdout for being a terminal in Win32 **********/
402    
403     /* I don't know how to do this; assume never */
404    
405     static BOOL
406     is_stdout_tty(void)
407     {
408     FALSE;
409     }
410    
411    
412 nigel 53 /************* Directory scanning when we can't do it ***********/
413    
414     /* The type is void, and apart from isdirectory(), the functions do nothing. */
415    
416 nigel 63 #else
417    
418 nigel 53 typedef void directory_type;
419    
420 nigel 87 int isdirectory(char *filename) { return 0; }
421 nigel 53 directory_type * opendirectory(char *filename) {}
422     char *readdirectory(directory_type *dir) {}
423     void closedirectory(directory_type *dir) {}
424    
425 nigel 87
426     /************* Test for regular when we can't do it **********/
427    
428     /* Assume all files are regular. */
429    
430     int isregfile(char *filename) { return 1; }
431    
432    
433     /************* Test stdout for being a terminal when we can't do it **********/
434    
435     static BOOL
436     is_stdout_tty(void)
437     {
438     return FALSE;
439     }
440    
441    
442 nigel 53 #endif
443    
444    
445    
446 nigel 49 #if ! HAVE_STRERROR
447     /*************************************************
448     * Provide strerror() for non-ANSI libraries *
449     *************************************************/
450    
451     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
452     in their libraries, but can provide the same facility by this simple
453     alternative function. */
454    
455     extern int sys_nerr;
456     extern char *sys_errlist[];
457    
458     char *
459     strerror(int n)
460     {
461     if (n < 0 || n >= sys_nerr) return "unknown error number";
462     return sys_errlist[n];
463     }
464     #endif /* HAVE_STRERROR */
465    
466    
467    
468     /*************************************************
469 nigel 77 * Print the previous "after" lines *
470 nigel 49 *************************************************/
471    
472 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
473 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
474     that a binary zero does not terminate it.
475 nigel 77
476     Arguments:
477     lastmatchnumber the number of the last matching line, plus one
478     lastmatchrestart where we restarted after the last match
479     endptr end of available data
480     printname filename for printing
481    
482     Returns: nothing
483     */
484    
485     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
486     char *endptr, char *printname)
487     {
488     if (after_context > 0 && lastmatchnumber > 0)
489     {
490     int count = 0;
491     while (lastmatchrestart < endptr && count++ < after_context)
492     {
493     char *pp = lastmatchrestart;
494     if (printname != NULL) fprintf(stdout, "%s-", printname);
495     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
496     while (*pp != '\n') pp++;
497 nigel 87 fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
498 nigel 77 lastmatchrestart = pp + 1;
499     }
500     hyphenpending = TRUE;
501     }
502     }
503    
504    
505    
506     /*************************************************
507     * Grep an individual file *
508     *************************************************/
509    
510     /* This is called from grep_or_recurse() below. It uses a buffer that is three
511     times the value of MBUFTHIRD. The matching point is never allowed to stray into
512     the top third of the buffer, thus keeping more of the file available for
513     context printing or for multiline scanning. For large files, the pointer will
514     be in the middle third most of the time, so the bottom third is available for
515     "before" context printing.
516    
517     Arguments:
518     in the fopened FILE stream
519     printname the file name if it is to be printed for each match
520     or NULL if the file name is not to be printed
521     it cannot be NULL if filenames[_nomatch]_only is set
522    
523     Returns: 0 if there was at least one match
524     1 otherwise (no matches)
525     */
526    
527 nigel 49 static int
528 nigel 77 pcregrep(FILE *in, char *printname)
529 nigel 49 {
530     int rc = 1;
531 nigel 77 int linenumber = 1;
532     int lastmatchnumber = 0;
533 nigel 49 int count = 0;
534     int offsets[99];
535 nigel 77 char *lastmatchrestart = NULL;
536     char buffer[3*MBUFTHIRD];
537     char *ptr = buffer;
538     char *endptr;
539     size_t bufflength;
540     BOOL endhyphenpending = FALSE;
541 nigel 49
542 nigel 77 /* Do the first read into the start of the buffer and set up the pointer to
543     end of what we have. */
544    
545     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
546     endptr = buffer + bufflength;
547    
548     /* Loop while the current pointer is not at the end of the file. For large
549     files, endptr will be at the end of the buffer when we are in the middle of the
550     file, but ptr will never get there, because as soon as it gets over 2/3 of the
551     way, the buffer is shifted left and re-filled. */
552    
553     while (ptr < endptr)
554 nigel 49 {
555 nigel 77 int i;
556 nigel 87 int mrc = 0;
557 nigel 53 BOOL match = FALSE;
558 nigel 77 char *t = ptr;
559     size_t length, linelength;
560 nigel 49
561 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
562     of the subject string to pass to pcre_exec(). In multiline mode, it is the
563     length remainder of the data in the buffer. Otherwise, it is the length of
564     the next line. After matching, we always advance by the length of the next
565     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
566     that any match is constrained to be in the first line. */
567    
568     linelength = 0;
569     while (t < endptr && *t++ != '\n') linelength++;
570     length = multiline? endptr - ptr : linelength;
571    
572 nigel 89
573     /* Extra processing for Jeffrey Friedl's debugging. */
574    
575     #ifdef JFRIEDL_DEBUG
576     if (jfriedl_XT || jfriedl_XR)
577     {
578     #include <sys/time.h>
579     #include <time.h>
580     struct timeval start_time, end_time;
581     struct timezone dummy;
582    
583     if (jfriedl_XT)
584     {
585     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
586     const char *orig = ptr;
587     ptr = malloc(newlen + 1);
588     if (!ptr) {
589     printf("out of memory");
590     exit(2);
591     }
592     endptr = ptr;
593     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
594     for (i = 0; i < jfriedl_XT; i++) {
595     strncpy(endptr, orig, length);
596     endptr += length;
597     }
598     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
599     length = newlen;
600     }
601    
602     if (gettimeofday(&start_time, &dummy) != 0)
603     perror("bad gettimeofday");
604    
605    
606     for (i = 0; i < jfriedl_XR; i++)
607     match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
608    
609     if (gettimeofday(&end_time, &dummy) != 0)
610     perror("bad gettimeofday");
611    
612     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
613     -
614     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
615    
616     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
617     return 0;
618     }
619     #endif
620    
621    
622 nigel 77 /* Run through all the patterns until one matches. Note that we don't include
623     the final newline in the subject string. */
624    
625 nigel 87 for (i = 0; i < pattern_count; i++)
626 nigel 53 {
627 nigel 87 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
628     offsets, 99);
629     if (mrc >= 0) { match = TRUE; break; }
630     if (mrc != PCRE_ERROR_NOMATCH)
631     {
632     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
633     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
634     fprintf(stderr, "this line:\n");
635     fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
636     fprintf(stderr, "\n");
637     if (error_count == 0 &&
638     (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
639     {
640     fprintf(stderr, "pcregrep: error %d means that a resource limit "
641     "was exceeded\n", mrc);
642     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
643     }
644     if (error_count++ > 20)
645     {
646     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
647     exit(2);
648     }
649     match = invert; /* No more matching; don't show the line again */
650     break;
651     }
652 nigel 53 }
653 nigel 49
654 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
655 nigel 77
656 nigel 49 if (match != invert)
657     {
658 nigel 77 BOOL hyphenprinted = FALSE;
659    
660 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
661 nigel 77
662 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
663    
664     /* Just count if just counting is wanted. */
665    
666 nigel 49 if (count_only) count++;
667    
668 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
669     in the file. */
670    
671     else if (filenames == FN_ONLY)
672 nigel 49 {
673 nigel 77 fprintf(stdout, "%s\n", printname);
674 nigel 49 return 0;
675     }
676    
677 nigel 87 /* Likewise, if all we want is a yes/no answer. */
678    
679 nigel 77 else if (quiet) return 0;
680 nigel 49
681 nigel 87 /* The --only-matching option prints just the substring that matched, and
682     does not pring any context. */
683    
684     else if (only_matching)
685     {
686     if (printname != NULL) fprintf(stdout, "%s:", printname);
687     if (number) fprintf(stdout, "%d:", linenumber);
688     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
689     fprintf(stdout, "\n");
690     }
691    
692     /* This is the default case when none of the above options is set. We print
693     the matching lines(s), possibly preceded and/or followed by other lines of
694     context. */
695    
696 nigel 49 else
697     {
698 nigel 77 /* See if there is a requirement to print some "after" lines from a
699     previous match. We never print any overlaps. */
700    
701     if (after_context > 0 && lastmatchnumber > 0)
702     {
703     int linecount = 0;
704     char *p = lastmatchrestart;
705    
706     while (p < ptr && linecount < after_context)
707     {
708     while (*p != '\n') p++;
709     p++;
710     linecount++;
711     }
712    
713     /* It is important to advance lastmatchrestart during this printing so
714 nigel 87 that it interacts correctly with any "before" printing below. Print
715     each line's data using fwrite() in case there are binary zeroes. */
716 nigel 77
717     while (lastmatchrestart < p)
718     {
719     char *pp = lastmatchrestart;
720     if (printname != NULL) fprintf(stdout, "%s-", printname);
721     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
722     while (*pp != '\n') pp++;
723 nigel 87 fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
724 nigel 77 lastmatchrestart = pp + 1;
725     }
726     if (lastmatchrestart != ptr) hyphenpending = TRUE;
727     }
728    
729     /* If there were non-contiguous lines printed above, insert hyphens. */
730    
731     if (hyphenpending)
732     {
733     fprintf(stdout, "--\n");
734     hyphenpending = FALSE;
735     hyphenprinted = TRUE;
736     }
737    
738     /* See if there is a requirement to print some "before" lines for this
739     match. Again, don't print overlaps. */
740    
741     if (before_context > 0)
742     {
743     int linecount = 0;
744     char *p = ptr;
745    
746     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
747 nigel 87 linecount < before_context)
748 nigel 77 {
749 nigel 87 linecount++;
750 nigel 77 p--;
751     while (p > buffer && p[-1] != '\n') p--;
752     }
753    
754     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
755     fprintf(stdout, "--\n");
756    
757     while (p < ptr)
758     {
759     char *pp = p;
760     if (printname != NULL) fprintf(stdout, "%s-", printname);
761     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
762     while (*pp != '\n') pp++;
763 nigel 87 fwrite(p, 1, pp - p + 1, stdout); /* In case binary zero */
764 nigel 77 p = pp + 1;
765     }
766     }
767    
768     /* Now print the matching line(s); ensure we set hyphenpending at the end
769 nigel 85 of the file if any context lines are being output. */
770 nigel 77
771 nigel 85 if (after_context > 0 || before_context > 0)
772     endhyphenpending = TRUE;
773    
774 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
775 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
776 nigel 77
777     /* In multiline mode, we want to print to the end of the line in which
778     the end of the matched string is found, so we adjust linelength and the
779     line number appropriately. Because the PCRE_FIRSTLINE option is set, the
780     start of the match will always be before the first \n character. */
781    
782     if (multiline)
783     {
784     char *endmatch = ptr + offsets[1];
785     t = ptr;
786     while (t < endmatch) { if (*t++ == '\n') linenumber++; }
787     while (endmatch < endptr && *endmatch != '\n') endmatch++;
788     linelength = endmatch - ptr;
789     }
790    
791 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
792     zeroes are treated as just another data character. */
793    
794     /* This extra option, for Jeffrey Friedl's debugging requirements,
795     replaces the matched string, or a specific captured string if it exists,
796     with X. When this happens, colouring is ignored. */
797    
798     #ifdef JFRIEDL_DEBUG
799     if (S_arg >= 0 && S_arg < mrc)
800     {
801     int first = S_arg * 2;
802     int last = first + 1;
803     fwrite(ptr, 1, offsets[first], stdout);
804     fprintf(stdout, "X");
805     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
806     }
807     else
808     #endif
809    
810     /* We have to split the line(s) up if colouring. */
811    
812     if (do_colour)
813     {
814     fwrite(ptr, 1, offsets[0], stdout);
815     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
816     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
817     fprintf(stdout, "%c[00m", 0x1b);
818     fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
819     }
820     else fwrite(ptr, 1, linelength, stdout);
821    
822     fprintf(stdout, "\n");
823 nigel 49 }
824    
825 nigel 87 /* End of doing what has to be done for a match */
826    
827 nigel 77 rc = 0; /* Had some success */
828    
829     /* Remember where the last match happened for after_context. We remember
830     where we are about to restart, and that line's number. */
831    
832     lastmatchrestart = ptr + linelength + 1;
833     lastmatchnumber = linenumber + 1;
834 nigel 49 }
835 nigel 77
836     /* Advance to after the newline and increment the line number. */
837    
838     ptr += linelength + 1;
839     linenumber++;
840    
841     /* If we haven't yet reached the end of the file (the buffer is full), and
842     the current point is in the top 1/3 of the buffer, slide the buffer down by
843     1/3 and refill it. Before we do this, if some unprinted "after" lines are
844     about to be lost, print them. */
845    
846     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
847     {
848     if (after_context > 0 &&
849     lastmatchnumber > 0 &&
850     lastmatchrestart < buffer + MBUFTHIRD)
851     {
852     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
853     lastmatchnumber = 0;
854     }
855    
856     /* Now do the shuffle */
857    
858     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
859     ptr -= MBUFTHIRD;
860     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
861     endptr = buffer + bufflength;
862    
863     /* Adjust any last match point */
864    
865     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
866     }
867     } /* Loop through the whole file */
868    
869     /* End of file; print final "after" lines if wanted; do_after_lines sets
870     hyphenpending if it prints something. */
871    
872 nigel 87 if (!only_matching && !count_only)
873     {
874     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
875     hyphenpending |= endhyphenpending;
876     }
877 nigel 77
878     /* Print the file name if we are looking for those without matches and there
879     were none. If we found a match, we won't have got this far. */
880    
881 nigel 87 if (filenames == FN_NOMATCH_ONLY)
882 nigel 77 {
883     fprintf(stdout, "%s\n", printname);
884     return 0;
885 nigel 49 }
886    
887 nigel 77 /* Print the match count if wanted */
888    
889 nigel 49 if (count_only)
890     {
891 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
892 nigel 49 fprintf(stdout, "%d\n", count);
893     }
894    
895     return rc;
896     }
897    
898    
899    
900     /*************************************************
901 nigel 53 * Grep a file or recurse into a directory *
902     *************************************************/
903    
904 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
905     recursing; if it's a file, grep it.
906    
907     Arguments:
908     pathname the path to investigate
909 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
910 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
911    
912     Returns: 0 if there was at least one match
913     1 if there were no matches
914     2 there was some kind of error
915    
916     However, file opening failures are suppressed if "silent" is set.
917     */
918    
919 nigel 53 static int
920 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
921 nigel 53 {
922     int rc = 1;
923     int sep;
924     FILE *in;
925    
926 nigel 77 /* If the file name is "-" we scan stdin */
927 nigel 53
928 nigel 77 if (strcmp(pathname, "-") == 0)
929 nigel 53 {
930 nigel 77 return pcregrep(stdin,
931 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
932 nigel 77 stdin_name : NULL);
933     }
934    
935    
936 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
937     each file within it, subject to any include or exclude patterns that were set.
938     The scanning code is localized so it can be made system-specific. */
939    
940     if ((sep = isdirectory(pathname)) != 0)
941 nigel 77 {
942 nigel 87 if (dee_action == dee_SKIP) return 1;
943     if (dee_action == dee_RECURSE)
944 nigel 53 {
945 nigel 87 char buffer[1024];
946     char *nextfile;
947     directory_type *dir = opendirectory(pathname);
948 nigel 53
949 nigel 87 if (dir == NULL)
950     {
951     if (!silent)
952     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
953     strerror(errno));
954     return 2;
955     }
956 nigel 77
957 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
958     {
959     int frc, blen;
960     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
961     blen = strlen(buffer);
962 nigel 77
963 nigel 87 if (exclude_compiled != NULL &&
964     pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
965     continue;
966 nigel 77
967 nigel 87 if (include_compiled != NULL &&
968     pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
969     continue;
970    
971     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
972     if (frc > 1) rc = frc;
973     else if (frc == 0 && rc == 1) rc = 0;
974     }
975    
976     closedirectory(dir);
977     return rc;
978 nigel 53 }
979     }
980    
981 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
982     been requested. */
983 nigel 53
984 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
985    
986     /* Control reaches here if we have a regular file, or if we have a directory
987     and recursion or skipping was not requested, or if we have anything else and
988     skipping was not requested. The scan proceeds. If this is the first and only
989     argument at top level, we don't show the file name, unless we are only showing
990     the file name, or the filename was forced (-H). */
991    
992 nigel 77 in = fopen(pathname, "r");
993 nigel 53 if (in == NULL)
994     {
995 nigel 77 if (!silent)
996     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
997     strerror(errno));
998 nigel 53 return 2;
999     }
1000    
1001 nigel 87 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1002     (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1003 nigel 77
1004 nigel 53 fclose(in);
1005     return rc;
1006     }
1007    
1008    
1009    
1010    
1011     /*************************************************
1012 nigel 49 * Usage function *
1013     *************************************************/
1014    
1015     static int
1016     usage(int rc)
1017     {
1018 nigel 87 option_item *op;
1019     fprintf(stderr, "Usage: pcregrep [-");
1020     for (op = optionlist; op->one_char != 0; op++)
1021     {
1022     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1023     }
1024     fprintf(stderr, "] [long options] [pattern] [files]\n");
1025 nigel 53 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1026 nigel 49 return rc;
1027     }
1028    
1029    
1030    
1031    
1032     /*************************************************
1033 nigel 53 * Help function *
1034     *************************************************/
1035    
1036     static void
1037     help(void)
1038     {
1039     option_item *op;
1040    
1041 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1042 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1043 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1044     printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1045 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1046    
1047     printf("Options:\n");
1048    
1049     for (op = optionlist; op->one_char != 0; op++)
1050     {
1051     int n;
1052     char s[4];
1053     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1054     printf(" %s --%s%n", s, op->long_name, &n);
1055     n = 30 - n;
1056     if (n < 1) n = 1;
1057     printf("%.*s%s\n", n, " ", op->help_text);
1058     }
1059    
1060 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1061     printf("trailing white space is removed and blank lines are ignored.\n");
1062     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1063 nigel 53
1064 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1065 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1066     }
1067    
1068    
1069    
1070    
1071     /*************************************************
1072 nigel 77 * Handle a single-letter, no data option *
1073 nigel 53 *************************************************/
1074    
1075     static int
1076     handle_option(int letter, int options)
1077     {
1078     switch(letter)
1079     {
1080 nigel 87 case N_HELP: help(); exit(0);
1081 nigel 53 case 'c': count_only = TRUE; break;
1082 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1083     case 'H': filenames = FN_FORCE; break;
1084     case 'h': filenames = FN_NONE; break;
1085 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1086 nigel 87 case 'l': filenames = FN_ONLY; break;
1087     case 'L': filenames = FN_NOMATCH_ONLY; break;
1088 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1089 nigel 53 case 'n': number = TRUE; break;
1090 nigel 87 case 'o': only_matching = TRUE; break;
1091 nigel 77 case 'q': quiet = TRUE; break;
1092 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1093 nigel 53 case 's': silent = TRUE; break;
1094 nigel 63 case 'u': options |= PCRE_UTF8; break;
1095 nigel 53 case 'v': invert = TRUE; break;
1096 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1097     case 'x': process_options |= PO_LINE_MATCH; break;
1098 nigel 53
1099     case 'V':
1100     fprintf(stderr, "pcregrep version %s using ", VERSION);
1101     fprintf(stderr, "PCRE version %s\n", pcre_version());
1102     exit(0);
1103     break;
1104    
1105     default:
1106     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1107     exit(usage(2));
1108     }
1109    
1110     return options;
1111     }
1112    
1113    
1114    
1115    
1116     /*************************************************
1117 nigel 87 * Construct printed ordinal *
1118     *************************************************/
1119    
1120     /* This turns a number into "1st", "3rd", etc. */
1121    
1122     static char *
1123     ordin(int n)
1124     {
1125     static char buffer[8];
1126     char *p = buffer;
1127     sprintf(p, "%d", n);
1128     while (*p != 0) p++;
1129     switch (n%10)
1130     {
1131     case 1: strcpy(p, "st"); break;
1132     case 2: strcpy(p, "nd"); break;
1133     case 3: strcpy(p, "rd"); break;
1134     default: strcpy(p, "th"); break;
1135     }
1136     return buffer;
1137     }
1138    
1139    
1140    
1141     /*************************************************
1142     * Compile a single pattern *
1143     *************************************************/
1144    
1145     /* When the -F option has been used, this is called for each substring.
1146     Otherwise it's called for each supplied pattern.
1147    
1148     Arguments:
1149     pattern the pattern string
1150     options the PCRE options
1151     filename the file name, or NULL for a command-line pattern
1152     count 0 if this is the only command line pattern, or
1153     number of the command line pattern, or
1154     linenumber for a pattern from a file
1155    
1156     Returns: TRUE on success, FALSE after an error
1157     */
1158    
1159     static BOOL
1160     compile_single_pattern(char *pattern, int options, char *filename, int count)
1161     {
1162     char buffer[MBUFTHIRD + 16];
1163     const char *error;
1164     int errptr;
1165    
1166     if (pattern_count >= MAX_PATTERN_COUNT)
1167     {
1168     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1169     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1170     return FALSE;
1171     }
1172    
1173     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1174     suffix[process_options]);
1175     pattern_list[pattern_count] =
1176     pcre_compile(buffer, options, &error, &errptr, pcretables);
1177     if (pattern_list[pattern_count++] != NULL) return TRUE;
1178    
1179     /* Handle compile errors */
1180    
1181     errptr -= (int)strlen(prefix[process_options]);
1182     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1183    
1184     if (filename == NULL)
1185     {
1186     if (count == 0)
1187     fprintf(stderr, "pcregrep: Error in command-line regex "
1188     "at offset %d: %s\n", errptr, error);
1189     else
1190     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1191     "at offset %d: %s\n", ordin(count), errptr, error);
1192     }
1193     else
1194     {
1195     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1196     "at offset %d: %s\n", count, filename, errptr, error);
1197     }
1198    
1199     return FALSE;
1200     }
1201    
1202    
1203    
1204     /*************************************************
1205     * Compile one supplied pattern *
1206     *************************************************/
1207    
1208     /* When the -F option has been used, each string may be a list of strings,
1209     separated by newlines. They will be matched literally.
1210    
1211     Arguments:
1212     pattern the pattern string
1213     options the PCRE options
1214     filename the file name, or NULL for a command-line pattern
1215     count 0 if this is the only command line pattern, or
1216     number of the command line pattern, or
1217     linenumber for a pattern from a file
1218    
1219     Returns: TRUE on success, FALSE after an error
1220     */
1221    
1222     static BOOL
1223     compile_pattern(char *pattern, int options, char *filename, int count)
1224     {
1225     if ((process_options & PO_FIXED_STRINGS) != 0)
1226     {
1227     char buffer[MBUFTHIRD];
1228     for(;;)
1229     {
1230     char *p = strchr(pattern, '\n');
1231     if (p == NULL)
1232     return compile_single_pattern(pattern, options, filename, count);
1233     sprintf(buffer, "%.*s", p - pattern, pattern);
1234     pattern = p + 1;
1235     if (!compile_single_pattern(buffer, options, filename, count))
1236     return FALSE;
1237     }
1238     }
1239     else return compile_single_pattern(pattern, options, filename, count);
1240     }
1241    
1242    
1243    
1244     /*************************************************
1245 nigel 49 * Main program *
1246     *************************************************/
1247    
1248 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1249    
1250 nigel 49 int
1251     main(int argc, char **argv)
1252     {
1253 nigel 53 int i, j;
1254 nigel 49 int rc = 1;
1255 nigel 87 int pcre_options = 0;
1256     int cmd_pattern_count = 0;
1257 nigel 49 int errptr;
1258 nigel 87 BOOL only_one_at_top;
1259     char *patterns[MAX_PATTERN_COUNT];
1260     const char *locale_from = "--locale";
1261 nigel 49 const char *error;
1262    
1263     /* Process the options */
1264    
1265     for (i = 1; i < argc; i++)
1266     {
1267 nigel 77 option_item *op = NULL;
1268     char *option_data = (char *)""; /* default to keep compiler happy */
1269     BOOL longop;
1270     BOOL longopwasequals = FALSE;
1271    
1272 nigel 49 if (argv[i][0] != '-') break;
1273 nigel 53
1274 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1275 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1276 nigel 63
1277 nigel 77 if (argv[i][1] == 0)
1278     {
1279 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1280 nigel 77 else exit(usage(2));
1281     }
1282 nigel 63
1283 nigel 77 /* Handle a long name option, or -- to terminate the options */
1284 nigel 53
1285     if (argv[i][1] == '-')
1286 nigel 49 {
1287 nigel 77 char *arg = argv[i] + 2;
1288     char *argequals = strchr(arg, '=');
1289 nigel 53
1290 nigel 77 if (*arg == 0) /* -- terminates options */
1291 nigel 49 {
1292 nigel 77 i++;
1293     break; /* out of the options-handling loop */
1294 nigel 53 }
1295 nigel 49
1296 nigel 77 longop = TRUE;
1297    
1298     /* Some long options have data that follows after =, for example file=name.
1299     Some options have variations in the long name spelling: specifically, we
1300     allow "regexp" because GNU grep allows it, though I personally go along
1301 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1302     These options are entered in the table as "regex(p)". No option is in both
1303     these categories, fortunately. */
1304 nigel 77
1305 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1306     {
1307 nigel 77 char *opbra = strchr(op->long_name, '(');
1308     char *equals = strchr(op->long_name, '=');
1309     if (opbra == NULL) /* Not a (p) case */
1310 nigel 53 {
1311 nigel 77 if (equals == NULL) /* Not thing=data case */
1312     {
1313     if (strcmp(arg, op->long_name) == 0) break;
1314     }
1315     else /* Special case xxx=data */
1316     {
1317     int oplen = equals - op->long_name;
1318     int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1319     if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1320     {
1321     option_data = arg + arglen;
1322     if (*option_data == '=')
1323     {
1324     option_data++;
1325     longopwasequals = TRUE;
1326     }
1327     break;
1328     }
1329     }
1330 nigel 53 }
1331 nigel 77 else /* Special case xxxx(p) */
1332     {
1333     char buff1[24];
1334     char buff2[24];
1335     int baselen = opbra - op->long_name;
1336     sprintf(buff1, "%.*s", baselen, op->long_name);
1337     sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1338     opbra + 1);
1339     if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1340     break;
1341     }
1342 nigel 53 }
1343 nigel 77
1344 nigel 53 if (op->one_char == 0)
1345     {
1346     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1347     exit(usage(2));
1348     }
1349     }
1350 nigel 49
1351 nigel 89
1352     /* Jeffrey Friedl's debugging harness uses these additional options which
1353     are not in the right form for putting in the option table because they use
1354     only one hyphen, yet are more than one character long. By putting them
1355     separately here, they will not get displayed as part of the help() output,
1356     but I don't think Jeffrey will care about that. */
1357    
1358     #ifdef JFRIEDL_DEBUG
1359     else if (strcmp(argv[i], "-pre") == 0) {
1360     jfriedl_prefix = argv[++i];
1361     continue;
1362     } else if (strcmp(argv[i], "-post") == 0) {
1363     jfriedl_postfix = argv[++i];
1364     continue;
1365     } else if (strcmp(argv[i], "-XT") == 0) {
1366     sscanf(argv[++i], "%d", &jfriedl_XT);
1367     continue;
1368     } else if (strcmp(argv[i], "-XR") == 0) {
1369     sscanf(argv[++i], "%d", &jfriedl_XR);
1370     continue;
1371     }
1372     #endif
1373    
1374    
1375 nigel 77 /* One-char options; many that have no data may be in a single argument; we
1376     continue till we hit the last one or one that needs data. */
1377 nigel 53
1378     else
1379     {
1380     char *s = argv[i] + 1;
1381 nigel 77 longop = FALSE;
1382 nigel 53 while (*s != 0)
1383     {
1384 nigel 77 for (op = optionlist; op->one_char != 0; op++)
1385     { if (*s == op->one_char) break; }
1386     if (op->one_char == 0)
1387 nigel 53 {
1388 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1389     *s, argv[i]);
1390     exit(usage(2));
1391     }
1392     if (op->type != OP_NODATA || s[1] == 0)
1393     {
1394     option_data = s+1;
1395 nigel 53 break;
1396     }
1397 nigel 87 pcre_options = handle_option(*s++, pcre_options);
1398 nigel 49 }
1399     }
1400 nigel 77
1401 nigel 87 /* At this point we should have op pointing to a matched option. If the type
1402     is NO_DATA, it means that there is no data, and the option might set
1403     something in the PCRE options. */
1404 nigel 77
1405     if (op->type == OP_NODATA)
1406     {
1407 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
1408     continue;
1409     }
1410    
1411     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1412     either has a value or defaults to something. It cannot have data in a
1413     separate item. At the moment, the only such options are "colo(u)r" and
1414 nigel 89 Jeffrey Friedl's special -S debugging option. */
1415 nigel 87
1416     if (*option_data == 0 &&
1417     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1418     {
1419     switch (op->one_char)
1420 nigel 77 {
1421 nigel 87 case N_COLOUR:
1422     colour_option = (char *)"auto";
1423     break;
1424     #ifdef JFRIEDL_DEBUG
1425     case 'S':
1426     S_arg = 0;
1427     break;
1428     #endif
1429 nigel 77 }
1430 nigel 87 continue;
1431     }
1432 nigel 77
1433 nigel 87 /* Otherwise, find the data string for the option. */
1434    
1435     if (*option_data == 0)
1436     {
1437     if (i >= argc - 1 || longopwasequals)
1438 nigel 77 {
1439 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1440     exit(usage(2));
1441     }
1442     option_data = argv[++i];
1443     }
1444    
1445     /* If the option type is OP_PATLIST, it's the -e option, which can be called
1446     multiple times to create a list of patterns. */
1447    
1448     if (op->type == OP_PATLIST)
1449     {
1450     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1451     {
1452     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1453     MAX_PATTERN_COUNT);
1454     return 2;
1455     }
1456     patterns[cmd_pattern_count++] = option_data;
1457     }
1458    
1459     /* Otherwise, deal with single string or numeric data values. */
1460    
1461     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1462     {
1463     *((char **)op->dataptr) = option_data;
1464     }
1465     else
1466     {
1467     char *endptr;
1468     int n = strtoul(option_data, &endptr, 10);
1469     if (*endptr != 0)
1470     {
1471     if (longop)
1472 nigel 77 {
1473 nigel 87 char *equals = strchr(op->long_name, '=');
1474     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1475     equals - op->long_name;
1476     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1477     option_data, nlen, op->long_name);
1478 nigel 77 }
1479 nigel 87 else
1480     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1481     option_data, op->one_char);
1482     exit(usage(2));
1483 nigel 77 }
1484 nigel 87 *((int *)op->dataptr) = n;
1485 nigel 77 }
1486 nigel 49 }
1487    
1488 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
1489     for -A and -B. */
1490    
1491     if (both_context > 0)
1492     {
1493     if (after_context == 0) after_context = both_context;
1494     if (before_context == 0) before_context = both_context;
1495     }
1496    
1497 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1498     LC_ALL environment variable is set, and if so, use it. */
1499 nigel 49
1500 nigel 87 if (locale == NULL)
1501 nigel 53 {
1502 nigel 87 locale = getenv("LC_ALL");
1503     locale_from = "LCC_ALL";
1504 nigel 53 }
1505 nigel 49
1506 nigel 87 if (locale == NULL)
1507     {
1508     locale = getenv("LC_CTYPE");
1509     locale_from = "LC_CTYPE";
1510     }
1511 nigel 49
1512 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
1513     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1514    
1515     if (locale != NULL)
1516 nigel 49 {
1517 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
1518 nigel 53 {
1519 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1520     locale, locale_from);
1521 nigel 53 return 2;
1522     }
1523 nigel 87 pcretables = pcre_maketables();
1524     }
1525 nigel 77
1526 nigel 87 /* Sort out colouring */
1527    
1528     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1529     {
1530     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1531     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1532     else
1533 nigel 53 {
1534 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1535     colour_option);
1536     return 2;
1537 nigel 77 }
1538 nigel 87 if (do_colour)
1539 nigel 77 {
1540 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
1541     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1542     if (cs != NULL) colour_string = cs;
1543 nigel 77 }
1544 nigel 87 }
1545 nigel 77
1546 nigel 87 /* Interpret the text values for -d and -D */
1547    
1548     if (dee_option != NULL)
1549     {
1550     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1551     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1552     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1553     else
1554 nigel 77 {
1555 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1556     return 2;
1557 nigel 53 }
1558 nigel 49 }
1559    
1560 nigel 87 if (DEE_option != NULL)
1561     {
1562     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1563     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1564     else
1565     {
1566     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1567     return 2;
1568     }
1569     }
1570 nigel 49
1571 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
1572 nigel 87
1573     #ifdef JFRIEDL_DEBUG
1574     if (S_arg > 9)
1575 nigel 49 {
1576 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
1577     return 2;
1578     }
1579 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1580     {
1581     if (jfriedl_XT == 0) jfriedl_XT = 1;
1582     if (jfriedl_XR == 0) jfriedl_XR = 1;
1583     }
1584 nigel 87 #endif
1585 nigel 77
1586 nigel 87 /* Get memory to store the pattern and hints lists. */
1587    
1588     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1589     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1590    
1591     if (pattern_list == NULL || hints_list == NULL)
1592     {
1593     fprintf(stderr, "pcregrep: malloc failed\n");
1594     return 2;
1595     }
1596    
1597     /* If no patterns were provided by -e, and there is no file provided by -f,
1598     the first argument is the one and only pattern, and it must exist. */
1599    
1600     if (cmd_pattern_count == 0 && pattern_filename == NULL)
1601     {
1602 nigel 63 if (i >= argc) return usage(2);
1603 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
1604     }
1605 nigel 77
1606 nigel 87 /* Compile the patterns that were provided on the command line, either by
1607     multiple uses of -e or as a single unkeyed pattern. */
1608    
1609     for (j = 0; j < cmd_pattern_count; j++)
1610     {
1611     if (!compile_pattern(patterns[j], pcre_options, NULL,
1612     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1613     return 2;
1614     }
1615    
1616     /* Compile the regular expressions that are provided in a file. */
1617    
1618     if (pattern_filename != NULL)
1619     {
1620     int linenumber = 0;
1621     FILE *f;
1622     char *filename;
1623     char buffer[MBUFTHIRD];
1624    
1625     if (strcmp(pattern_filename, "-") == 0)
1626 nigel 77 {
1627 nigel 87 f = stdin;
1628     filename = stdin_name;
1629 nigel 77 }
1630 nigel 87 else
1631 nigel 77 {
1632 nigel 87 f = fopen(pattern_filename, "r");
1633     if (f == NULL)
1634     {
1635     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1636     strerror(errno));
1637     return 2;
1638     }
1639     filename = pattern_filename;
1640 nigel 77 }
1641    
1642 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
1643 nigel 53 {
1644 nigel 87 char *s = buffer + (int)strlen(buffer);
1645     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1646     *s = 0;
1647     linenumber++;
1648     if (buffer[0] == 0) continue; /* Skip blank lines */
1649     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1650     return 2;
1651 nigel 53 }
1652 nigel 87
1653     if (f != stdin) fclose(f);
1654 nigel 49 }
1655    
1656 nigel 77 /* Study the regular expressions, as we will be running them many times */
1657 nigel 53
1658     for (j = 0; j < pattern_count; j++)
1659     {
1660     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1661     if (error != NULL)
1662     {
1663     char s[16];
1664     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1665     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1666     return 2;
1667     }
1668     }
1669    
1670 nigel 77 /* If there are include or exclude patterns, compile them. */
1671    
1672     if (exclude_pattern != NULL)
1673     {
1674 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1675     pcretables);
1676 nigel 77 if (exclude_compiled == NULL)
1677     {
1678     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1679     errptr, error);
1680     return 2;
1681     }
1682     }
1683    
1684     if (include_pattern != NULL)
1685     {
1686 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1687     pcretables);
1688 nigel 77 if (include_compiled == NULL)
1689     {
1690     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1691     errptr, error);
1692     return 2;
1693     }
1694     }
1695    
1696 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
1697 nigel 49
1698 nigel 87 if (i >= argc)
1699     return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1700 nigel 49
1701 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
1702     Pass in the fact that there is only one argument at top level - this suppresses
1703 nigel 87 the file name if the argument is not a directory and filenames are not
1704     otherwise forced. */
1705 nigel 49
1706 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
1707 nigel 49
1708     for (; i < argc; i++)
1709     {
1710 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1711     only_one_at_top);
1712 nigel 77 if (frc > 1) rc = frc;
1713     else if (frc == 0 && rc == 1) rc = 0;
1714 nigel 49 }
1715    
1716     return rc;
1717     }
1718    
1719 nigel 77 /* End of pcregrep */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12