/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 87 - (hide annotations) (download)
Sat Feb 24 21:41:21 2007 UTC (7 years, 5 months ago) by nigel
File MIME type: text/plain
File size: 48071 byte(s)
Load pcre-6.5 into code/trunk.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 nigel 87 Copyright (c) 1997-2006 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 nigel 53 #include <ctype.h>
41 nigel 87 #include <locale.h>
42 nigel 49 #include <stdio.h>
43     #include <string.h>
44     #include <stdlib.h>
45     #include <errno.h>
46 nigel 77
47     #include <sys/types.h>
48     #include <sys/stat.h>
49     #include <unistd.h>
50    
51 nigel 49 #include "config.h"
52     #include "pcre.h"
53    
54     #define FALSE 0
55     #define TRUE 1
56    
57     typedef int BOOL;
58    
59 nigel 87 #define VERSION "4.2 09-Jan-2006"
60 nigel 53 #define MAX_PATTERN_COUNT 100
61 nigel 49
62 nigel 77 #if BUFSIZ > 8192
63     #define MBUFTHIRD BUFSIZ
64     #else
65     #define MBUFTHIRD 8192
66     #endif
67 nigel 49
68 nigel 77
69 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
70     output. The order is important; it is assumed that a file name is wanted for
71     all values greater than FN_DEFAULT. */
72 nigel 77
73 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
74    
75     /* Actions for the -d and -D options */
76    
77     enum { dee_READ, dee_SKIP, dee_RECURSE };
78     enum { DEE_READ, DEE_SKIP };
79    
80     /* Actions for special processing options (flag bits) */
81    
82     #define PO_WORD_MATCH 0x0001
83     #define PO_LINE_MATCH 0x0002
84     #define PO_FIXED_STRINGS 0x0004
85    
86    
87    
88 nigel 49 /*************************************************
89     * Global variables *
90     *************************************************/
91    
92 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
93     regular code. */
94    
95     #ifdef JFRIEDL_DEBUG
96     static int S_arg = -1;
97     #endif
98    
99     static char *colour_string = (char *)"1;31";
100     static char *colour_option = NULL;
101     static char *dee_option = NULL;
102     static char *DEE_option = NULL;
103 nigel 53 static char *pattern_filename = NULL;
104 nigel 77 static char *stdin_name = (char *)"(standard input)";
105 nigel 87 static char *locale = NULL;
106    
107     static const unsigned char *pcretables = NULL;
108    
109 nigel 53 static int pattern_count = 0;
110     static pcre **pattern_list;
111     static pcre_extra **hints_list;
112 nigel 49
113 nigel 77 static char *include_pattern = NULL;
114     static char *exclude_pattern = NULL;
115    
116     static pcre *include_compiled = NULL;
117     static pcre *exclude_compiled = NULL;
118    
119     static int after_context = 0;
120     static int before_context = 0;
121     static int both_context = 0;
122 nigel 87 static int dee_action = dee_READ;
123     static int DEE_action = DEE_READ;
124     static int error_count = 0;
125     static int filenames = FN_DEFAULT;
126     static int process_options = 0;
127 nigel 77
128 nigel 49 static BOOL count_only = FALSE;
129 nigel 87 static BOOL do_colour = FALSE;
130 nigel 77 static BOOL hyphenpending = FALSE;
131 nigel 49 static BOOL invert = FALSE;
132 nigel 77 static BOOL multiline = FALSE;
133 nigel 49 static BOOL number = FALSE;
134 nigel 87 static BOOL only_matching = FALSE;
135 nigel 77 static BOOL quiet = FALSE;
136 nigel 49 static BOOL silent = FALSE;
137    
138 nigel 53 /* Structure for options and list of them */
139 nigel 49
140 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
141     OP_PATLIST };
142 nigel 77
143 nigel 53 typedef struct option_item {
144 nigel 77 int type;
145 nigel 53 int one_char;
146 nigel 77 void *dataptr;
147 nigel 67 const char *long_name;
148     const char *help_text;
149 nigel 53 } option_item;
150 nigel 49
151 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
152     used to identify them. */
153    
154     #define N_COLOUR (-1)
155     #define N_EXCLUDE (-2)
156     #define N_HELP (-3)
157     #define N_INCLUDE (-4)
158     #define N_LABEL (-5)
159     #define N_LOCALE (-6)
160     #define N_NULL (-7)
161    
162 nigel 53 static option_item optionlist[] = {
163 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
164     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
165     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
166     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
167     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
168     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
169     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
170     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
171     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
172     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
173     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
174     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
175     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
176     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
177     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
178     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
179     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
180     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
181     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
182     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
183     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
184     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
185     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
186     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
187     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
188     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
189     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
190     #ifdef JFRIEDL_DEBUG
191     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
192     #endif
193     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
194     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
195     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
196     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
197     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
198     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
199     { OP_NODATA, 0, NULL, NULL, NULL }
200 nigel 53 };
201    
202 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
203     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
204     that the combination of -w and -x has the same effect as -x on its own, so we
205     can treat them as the same. */
206 nigel 53
207 nigel 87 static const char *prefix[] = {
208     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
209    
210     static const char *suffix[] = {
211     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
212    
213    
214    
215 nigel 53 /*************************************************
216 nigel 87 * OS-specific functions *
217 nigel 53 *************************************************/
218    
219     /* These functions are defined so that they can be made system specific,
220 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
221 nigel 53
222    
223     /************* Directory scanning in Unix ***********/
224    
225     #if IS_UNIX
226     #include <sys/types.h>
227     #include <sys/stat.h>
228     #include <dirent.h>
229    
230     typedef DIR directory_type;
231    
232 nigel 67 static int
233 nigel 53 isdirectory(char *filename)
234     {
235     struct stat statbuf;
236     if (stat(filename, &statbuf) < 0)
237     return 0; /* In the expectation that opening as a file will fail */
238     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
239     }
240    
241 nigel 67 static directory_type *
242 nigel 53 opendirectory(char *filename)
243     {
244     return opendir(filename);
245     }
246    
247 nigel 67 static char *
248 nigel 53 readdirectory(directory_type *dir)
249     {
250     for (;;)
251     {
252     struct dirent *dent = readdir(dir);
253     if (dent == NULL) return NULL;
254     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
255     return dent->d_name;
256     }
257     return NULL; /* Keep compiler happy; never executed */
258     }
259    
260 nigel 67 static void
261 nigel 53 closedirectory(directory_type *dir)
262     {
263     closedir(dir);
264     }
265    
266    
267 nigel 87 /************* Test for regular file in Unix **********/
268    
269     static int
270     isregfile(char *filename)
271     {
272     struct stat statbuf;
273     if (stat(filename, &statbuf) < 0)
274     return 1; /* In the expectation that opening as a file will fail */
275     return (statbuf.st_mode & S_IFMT) == S_IFREG;
276     }
277    
278    
279     /************* Test stdout for being a terminal in Unix **********/
280    
281     static BOOL
282     is_stdout_tty(void)
283     {
284     return isatty(fileno(stdout));
285     }
286    
287    
288 nigel 63 /************* Directory scanning in Win32 ***********/
289 nigel 53
290 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
291 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
292     when it did not exist. */
293 nigel 53
294 nigel 63
295     #elif HAVE_WIN32API
296    
297     #ifndef STRICT
298     # define STRICT
299     #endif
300     #ifndef WIN32_LEAN_AND_MEAN
301     # define WIN32_LEAN_AND_MEAN
302     #endif
303 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
304     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
305     #endif
306    
307 nigel 63 #include <windows.h>
308    
309     typedef struct directory_type
310     {
311     HANDLE handle;
312     BOOL first;
313     WIN32_FIND_DATA data;
314     } directory_type;
315    
316     int
317     isdirectory(char *filename)
318     {
319     DWORD attr = GetFileAttributes(filename);
320     if (attr == INVALID_FILE_ATTRIBUTES)
321     return 0;
322     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
323     }
324    
325     directory_type *
326     opendirectory(char *filename)
327     {
328     size_t len;
329     char *pattern;
330     directory_type *dir;
331     DWORD err;
332     len = strlen(filename);
333     pattern = (char *) malloc(len + 3);
334     dir = (directory_type *) malloc(sizeof(*dir));
335     if ((pattern == NULL) || (dir == NULL))
336     {
337     fprintf(stderr, "pcregrep: malloc failed\n");
338     exit(2);
339     }
340     memcpy(pattern, filename, len);
341     memcpy(&(pattern[len]), "\\*", 3);
342     dir->handle = FindFirstFile(pattern, &(dir->data));
343     if (dir->handle != INVALID_HANDLE_VALUE)
344     {
345     free(pattern);
346     dir->first = TRUE;
347     return dir;
348     }
349     err = GetLastError();
350     free(pattern);
351     free(dir);
352     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
353     return NULL;
354     }
355    
356     char *
357     readdirectory(directory_type *dir)
358     {
359     for (;;)
360     {
361     if (!dir->first)
362     {
363     if (!FindNextFile(dir->handle, &(dir->data)))
364     return NULL;
365     }
366     else
367     {
368     dir->first = FALSE;
369     }
370     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
371     return dir->data.cFileName;
372     }
373     #ifndef _MSC_VER
374     return NULL; /* Keep compiler happy; never executed */
375     #endif
376     }
377    
378     void
379     closedirectory(directory_type *dir)
380     {
381     FindClose(dir->handle);
382     free(dir);
383     }
384    
385    
386 nigel 87 /************* Test for regular file in Win32 **********/
387    
388     /* I don't know how to do this, or if it can be done; assume all paths are
389     regular if they are not directories. */
390    
391     int isregfile(char *filename)
392     {
393     return !isdirectory(filename)
394     }
395    
396    
397     /************* Test stdout for being a terminal in Win32 **********/
398    
399     /* I don't know how to do this; assume never */
400    
401     static BOOL
402     is_stdout_tty(void)
403     {
404     FALSE;
405     }
406    
407    
408 nigel 53 /************* Directory scanning when we can't do it ***********/
409    
410     /* The type is void, and apart from isdirectory(), the functions do nothing. */
411    
412 nigel 63 #else
413    
414 nigel 53 typedef void directory_type;
415    
416 nigel 87 int isdirectory(char *filename) { return 0; }
417 nigel 53 directory_type * opendirectory(char *filename) {}
418     char *readdirectory(directory_type *dir) {}
419     void closedirectory(directory_type *dir) {}
420    
421 nigel 87
422     /************* Test for regular when we can't do it **********/
423    
424     /* Assume all files are regular. */
425    
426     int isregfile(char *filename) { return 1; }
427    
428    
429     /************* Test stdout for being a terminal when we can't do it **********/
430    
431     static BOOL
432     is_stdout_tty(void)
433     {
434     return FALSE;
435     }
436    
437    
438 nigel 53 #endif
439    
440    
441    
442 nigel 49 #if ! HAVE_STRERROR
443     /*************************************************
444     * Provide strerror() for non-ANSI libraries *
445     *************************************************/
446    
447     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
448     in their libraries, but can provide the same facility by this simple
449     alternative function. */
450    
451     extern int sys_nerr;
452     extern char *sys_errlist[];
453    
454     char *
455     strerror(int n)
456     {
457     if (n < 0 || n >= sys_nerr) return "unknown error number";
458     return sys_errlist[n];
459     }
460     #endif /* HAVE_STRERROR */
461    
462    
463    
464     /*************************************************
465 nigel 77 * Print the previous "after" lines *
466 nigel 49 *************************************************/
467    
468 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
469 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
470     that a binary zero does not terminate it.
471 nigel 77
472     Arguments:
473     lastmatchnumber the number of the last matching line, plus one
474     lastmatchrestart where we restarted after the last match
475     endptr end of available data
476     printname filename for printing
477    
478     Returns: nothing
479     */
480    
481     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
482     char *endptr, char *printname)
483     {
484     if (after_context > 0 && lastmatchnumber > 0)
485     {
486     int count = 0;
487     while (lastmatchrestart < endptr && count++ < after_context)
488     {
489     char *pp = lastmatchrestart;
490     if (printname != NULL) fprintf(stdout, "%s-", printname);
491     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
492     while (*pp != '\n') pp++;
493 nigel 87 fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
494 nigel 77 lastmatchrestart = pp + 1;
495     }
496     hyphenpending = TRUE;
497     }
498     }
499    
500    
501    
502     /*************************************************
503     * Grep an individual file *
504     *************************************************/
505    
506     /* This is called from grep_or_recurse() below. It uses a buffer that is three
507     times the value of MBUFTHIRD. The matching point is never allowed to stray into
508     the top third of the buffer, thus keeping more of the file available for
509     context printing or for multiline scanning. For large files, the pointer will
510     be in the middle third most of the time, so the bottom third is available for
511     "before" context printing.
512    
513     Arguments:
514     in the fopened FILE stream
515     printname the file name if it is to be printed for each match
516     or NULL if the file name is not to be printed
517     it cannot be NULL if filenames[_nomatch]_only is set
518    
519     Returns: 0 if there was at least one match
520     1 otherwise (no matches)
521     */
522    
523 nigel 49 static int
524 nigel 77 pcregrep(FILE *in, char *printname)
525 nigel 49 {
526     int rc = 1;
527 nigel 77 int linenumber = 1;
528     int lastmatchnumber = 0;
529 nigel 49 int count = 0;
530     int offsets[99];
531 nigel 77 char *lastmatchrestart = NULL;
532     char buffer[3*MBUFTHIRD];
533     char *ptr = buffer;
534     char *endptr;
535     size_t bufflength;
536     BOOL endhyphenpending = FALSE;
537 nigel 49
538 nigel 77 /* Do the first read into the start of the buffer and set up the pointer to
539     end of what we have. */
540    
541     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
542     endptr = buffer + bufflength;
543    
544     /* Loop while the current pointer is not at the end of the file. For large
545     files, endptr will be at the end of the buffer when we are in the middle of the
546     file, but ptr will never get there, because as soon as it gets over 2/3 of the
547     way, the buffer is shifted left and re-filled. */
548    
549     while (ptr < endptr)
550 nigel 49 {
551 nigel 77 int i;
552 nigel 87 int mrc = 0;
553 nigel 53 BOOL match = FALSE;
554 nigel 77 char *t = ptr;
555     size_t length, linelength;
556 nigel 49
557 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
558     of the subject string to pass to pcre_exec(). In multiline mode, it is the
559     length remainder of the data in the buffer. Otherwise, it is the length of
560     the next line. After matching, we always advance by the length of the next
561     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
562     that any match is constrained to be in the first line. */
563    
564     linelength = 0;
565     while (t < endptr && *t++ != '\n') linelength++;
566     length = multiline? endptr - ptr : linelength;
567    
568     /* Run through all the patterns until one matches. Note that we don't include
569     the final newline in the subject string. */
570    
571 nigel 87 for (i = 0; i < pattern_count; i++)
572 nigel 53 {
573 nigel 87 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
574     offsets, 99);
575     if (mrc >= 0) { match = TRUE; break; }
576     if (mrc != PCRE_ERROR_NOMATCH)
577     {
578     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
579     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
580     fprintf(stderr, "this line:\n");
581     fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
582     fprintf(stderr, "\n");
583     if (error_count == 0 &&
584     (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
585     {
586     fprintf(stderr, "pcregrep: error %d means that a resource limit "
587     "was exceeded\n", mrc);
588     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
589     }
590     if (error_count++ > 20)
591     {
592     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
593     exit(2);
594     }
595     match = invert; /* No more matching; don't show the line again */
596     break;
597     }
598 nigel 53 }
599 nigel 49
600 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
601 nigel 77
602 nigel 49 if (match != invert)
603     {
604 nigel 77 BOOL hyphenprinted = FALSE;
605    
606 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
607 nigel 77
608 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
609    
610     /* Just count if just counting is wanted. */
611    
612 nigel 49 if (count_only) count++;
613    
614 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
615     in the file. */
616    
617     else if (filenames == FN_ONLY)
618 nigel 49 {
619 nigel 77 fprintf(stdout, "%s\n", printname);
620 nigel 49 return 0;
621     }
622    
623 nigel 87 /* Likewise, if all we want is a yes/no answer. */
624    
625 nigel 77 else if (quiet) return 0;
626 nigel 49
627 nigel 87 /* The --only-matching option prints just the substring that matched, and
628     does not pring any context. */
629    
630     else if (only_matching)
631     {
632     if (printname != NULL) fprintf(stdout, "%s:", printname);
633     if (number) fprintf(stdout, "%d:", linenumber);
634     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
635     fprintf(stdout, "\n");
636     }
637    
638     /* This is the default case when none of the above options is set. We print
639     the matching lines(s), possibly preceded and/or followed by other lines of
640     context. */
641    
642 nigel 49 else
643     {
644 nigel 77 /* See if there is a requirement to print some "after" lines from a
645     previous match. We never print any overlaps. */
646    
647     if (after_context > 0 && lastmatchnumber > 0)
648     {
649     int linecount = 0;
650     char *p = lastmatchrestart;
651    
652     while (p < ptr && linecount < after_context)
653     {
654     while (*p != '\n') p++;
655     p++;
656     linecount++;
657     }
658    
659     /* It is important to advance lastmatchrestart during this printing so
660 nigel 87 that it interacts correctly with any "before" printing below. Print
661     each line's data using fwrite() in case there are binary zeroes. */
662 nigel 77
663     while (lastmatchrestart < p)
664     {
665     char *pp = lastmatchrestart;
666     if (printname != NULL) fprintf(stdout, "%s-", printname);
667     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
668     while (*pp != '\n') pp++;
669 nigel 87 fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
670 nigel 77 lastmatchrestart = pp + 1;
671     }
672     if (lastmatchrestart != ptr) hyphenpending = TRUE;
673     }
674    
675     /* If there were non-contiguous lines printed above, insert hyphens. */
676    
677     if (hyphenpending)
678     {
679     fprintf(stdout, "--\n");
680     hyphenpending = FALSE;
681     hyphenprinted = TRUE;
682     }
683    
684     /* See if there is a requirement to print some "before" lines for this
685     match. Again, don't print overlaps. */
686    
687     if (before_context > 0)
688     {
689     int linecount = 0;
690     char *p = ptr;
691    
692     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
693 nigel 87 linecount < before_context)
694 nigel 77 {
695 nigel 87 linecount++;
696 nigel 77 p--;
697     while (p > buffer && p[-1] != '\n') p--;
698     }
699    
700     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
701     fprintf(stdout, "--\n");
702    
703     while (p < ptr)
704     {
705     char *pp = p;
706     if (printname != NULL) fprintf(stdout, "%s-", printname);
707     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
708     while (*pp != '\n') pp++;
709 nigel 87 fwrite(p, 1, pp - p + 1, stdout); /* In case binary zero */
710 nigel 77 p = pp + 1;
711     }
712     }
713    
714     /* Now print the matching line(s); ensure we set hyphenpending at the end
715 nigel 85 of the file if any context lines are being output. */
716 nigel 77
717 nigel 85 if (after_context > 0 || before_context > 0)
718     endhyphenpending = TRUE;
719    
720 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
721 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
722 nigel 77
723     /* In multiline mode, we want to print to the end of the line in which
724     the end of the matched string is found, so we adjust linelength and the
725     line number appropriately. Because the PCRE_FIRSTLINE option is set, the
726     start of the match will always be before the first \n character. */
727    
728     if (multiline)
729     {
730     char *endmatch = ptr + offsets[1];
731     t = ptr;
732     while (t < endmatch) { if (*t++ == '\n') linenumber++; }
733     while (endmatch < endptr && *endmatch != '\n') endmatch++;
734     linelength = endmatch - ptr;
735     }
736    
737 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
738     zeroes are treated as just another data character. */
739    
740     /* This extra option, for Jeffrey Friedl's debugging requirements,
741     replaces the matched string, or a specific captured string if it exists,
742     with X. When this happens, colouring is ignored. */
743    
744     #ifdef JFRIEDL_DEBUG
745     if (S_arg >= 0 && S_arg < mrc)
746     {
747     int first = S_arg * 2;
748     int last = first + 1;
749     fwrite(ptr, 1, offsets[first], stdout);
750     fprintf(stdout, "X");
751     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
752     }
753     else
754     #endif
755    
756     /* We have to split the line(s) up if colouring. */
757    
758     if (do_colour)
759     {
760     fwrite(ptr, 1, offsets[0], stdout);
761     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
762     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
763     fprintf(stdout, "%c[00m", 0x1b);
764     fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
765     }
766     else fwrite(ptr, 1, linelength, stdout);
767    
768     fprintf(stdout, "\n");
769 nigel 49 }
770    
771 nigel 87 /* End of doing what has to be done for a match */
772    
773 nigel 77 rc = 0; /* Had some success */
774    
775     /* Remember where the last match happened for after_context. We remember
776     where we are about to restart, and that line's number. */
777    
778     lastmatchrestart = ptr + linelength + 1;
779     lastmatchnumber = linenumber + 1;
780 nigel 49 }
781 nigel 77
782     /* Advance to after the newline and increment the line number. */
783    
784     ptr += linelength + 1;
785     linenumber++;
786    
787     /* If we haven't yet reached the end of the file (the buffer is full), and
788     the current point is in the top 1/3 of the buffer, slide the buffer down by
789     1/3 and refill it. Before we do this, if some unprinted "after" lines are
790     about to be lost, print them. */
791    
792     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
793     {
794     if (after_context > 0 &&
795     lastmatchnumber > 0 &&
796     lastmatchrestart < buffer + MBUFTHIRD)
797     {
798     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
799     lastmatchnumber = 0;
800     }
801    
802     /* Now do the shuffle */
803    
804     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
805     ptr -= MBUFTHIRD;
806     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
807     endptr = buffer + bufflength;
808    
809     /* Adjust any last match point */
810    
811     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
812     }
813     } /* Loop through the whole file */
814    
815     /* End of file; print final "after" lines if wanted; do_after_lines sets
816     hyphenpending if it prints something. */
817    
818 nigel 87 if (!only_matching && !count_only)
819     {
820     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
821     hyphenpending |= endhyphenpending;
822     }
823 nigel 77
824     /* Print the file name if we are looking for those without matches and there
825     were none. If we found a match, we won't have got this far. */
826    
827 nigel 87 if (filenames == FN_NOMATCH_ONLY)
828 nigel 77 {
829     fprintf(stdout, "%s\n", printname);
830     return 0;
831 nigel 49 }
832    
833 nigel 77 /* Print the match count if wanted */
834    
835 nigel 49 if (count_only)
836     {
837 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
838 nigel 49 fprintf(stdout, "%d\n", count);
839     }
840    
841     return rc;
842     }
843    
844    
845    
846     /*************************************************
847 nigel 53 * Grep a file or recurse into a directory *
848     *************************************************/
849    
850 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
851     recursing; if it's a file, grep it.
852    
853     Arguments:
854     pathname the path to investigate
855 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
856 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
857    
858     Returns: 0 if there was at least one match
859     1 if there were no matches
860     2 there was some kind of error
861    
862     However, file opening failures are suppressed if "silent" is set.
863     */
864    
865 nigel 53 static int
866 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
867 nigel 53 {
868     int rc = 1;
869     int sep;
870     FILE *in;
871    
872 nigel 77 /* If the file name is "-" we scan stdin */
873 nigel 53
874 nigel 77 if (strcmp(pathname, "-") == 0)
875 nigel 53 {
876 nigel 77 return pcregrep(stdin,
877 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
878 nigel 77 stdin_name : NULL);
879     }
880    
881    
882 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
883     each file within it, subject to any include or exclude patterns that were set.
884     The scanning code is localized so it can be made system-specific. */
885    
886     if ((sep = isdirectory(pathname)) != 0)
887 nigel 77 {
888 nigel 87 if (dee_action == dee_SKIP) return 1;
889     if (dee_action == dee_RECURSE)
890 nigel 53 {
891 nigel 87 char buffer[1024];
892     char *nextfile;
893     directory_type *dir = opendirectory(pathname);
894 nigel 53
895 nigel 87 if (dir == NULL)
896     {
897     if (!silent)
898     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
899     strerror(errno));
900     return 2;
901     }
902 nigel 77
903 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
904     {
905     int frc, blen;
906     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
907     blen = strlen(buffer);
908 nigel 77
909 nigel 87 if (exclude_compiled != NULL &&
910     pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
911     continue;
912 nigel 77
913 nigel 87 if (include_compiled != NULL &&
914     pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
915     continue;
916    
917     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
918     if (frc > 1) rc = frc;
919     else if (frc == 0 && rc == 1) rc = 0;
920     }
921    
922     closedirectory(dir);
923     return rc;
924 nigel 53 }
925     }
926    
927 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
928     been requested. */
929 nigel 53
930 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
931    
932     /* Control reaches here if we have a regular file, or if we have a directory
933     and recursion or skipping was not requested, or if we have anything else and
934     skipping was not requested. The scan proceeds. If this is the first and only
935     argument at top level, we don't show the file name, unless we are only showing
936     the file name, or the filename was forced (-H). */
937    
938 nigel 77 in = fopen(pathname, "r");
939 nigel 53 if (in == NULL)
940     {
941 nigel 77 if (!silent)
942     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
943     strerror(errno));
944 nigel 53 return 2;
945     }
946    
947 nigel 87 rc = pcregrep(in, (filenames > FN_DEFAULT ||
948     (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
949 nigel 77
950 nigel 53 fclose(in);
951     return rc;
952     }
953    
954    
955    
956    
957     /*************************************************
958 nigel 49 * Usage function *
959     *************************************************/
960    
961     static int
962     usage(int rc)
963     {
964 nigel 87 option_item *op;
965     fprintf(stderr, "Usage: pcregrep [-");
966     for (op = optionlist; op->one_char != 0; op++)
967     {
968     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
969     }
970     fprintf(stderr, "] [long options] [pattern] [files]\n");
971 nigel 53 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
972 nigel 49 return rc;
973     }
974    
975    
976    
977    
978     /*************************************************
979 nigel 53 * Help function *
980     *************************************************/
981    
982     static void
983     help(void)
984     {
985     option_item *op;
986    
987 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
988 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
989 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
990     printf("\"-\" can be used as a file name to mean STDIN.\n\n");
991 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
992    
993     printf("Options:\n");
994    
995     for (op = optionlist; op->one_char != 0; op++)
996     {
997     int n;
998     char s[4];
999     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1000     printf(" %s --%s%n", s, op->long_name, &n);
1001     n = 30 - n;
1002     if (n < 1) n = 1;
1003     printf("%.*s%s\n", n, " ", op->help_text);
1004     }
1005    
1006 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1007     printf("trailing white space is removed and blank lines are ignored.\n");
1008     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1009 nigel 53
1010 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1011 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1012     }
1013    
1014    
1015    
1016    
1017     /*************************************************
1018 nigel 77 * Handle a single-letter, no data option *
1019 nigel 53 *************************************************/
1020    
1021     static int
1022     handle_option(int letter, int options)
1023     {
1024     switch(letter)
1025     {
1026 nigel 87 case N_HELP: help(); exit(0);
1027 nigel 53 case 'c': count_only = TRUE; break;
1028 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1029     case 'H': filenames = FN_FORCE; break;
1030     case 'h': filenames = FN_NONE; break;
1031 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1032 nigel 87 case 'l': filenames = FN_ONLY; break;
1033     case 'L': filenames = FN_NOMATCH_ONLY; break;
1034 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1035 nigel 53 case 'n': number = TRUE; break;
1036 nigel 87 case 'o': only_matching = TRUE; break;
1037 nigel 77 case 'q': quiet = TRUE; break;
1038 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1039 nigel 53 case 's': silent = TRUE; break;
1040 nigel 63 case 'u': options |= PCRE_UTF8; break;
1041 nigel 53 case 'v': invert = TRUE; break;
1042 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1043     case 'x': process_options |= PO_LINE_MATCH; break;
1044 nigel 53
1045     case 'V':
1046     fprintf(stderr, "pcregrep version %s using ", VERSION);
1047     fprintf(stderr, "PCRE version %s\n", pcre_version());
1048     exit(0);
1049     break;
1050    
1051     default:
1052     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1053     exit(usage(2));
1054     }
1055    
1056     return options;
1057     }
1058    
1059    
1060    
1061    
1062     /*************************************************
1063 nigel 87 * Construct printed ordinal *
1064     *************************************************/
1065    
1066     /* This turns a number into "1st", "3rd", etc. */
1067    
1068     static char *
1069     ordin(int n)
1070     {
1071     static char buffer[8];
1072     char *p = buffer;
1073     sprintf(p, "%d", n);
1074     while (*p != 0) p++;
1075     switch (n%10)
1076     {
1077     case 1: strcpy(p, "st"); break;
1078     case 2: strcpy(p, "nd"); break;
1079     case 3: strcpy(p, "rd"); break;
1080     default: strcpy(p, "th"); break;
1081     }
1082     return buffer;
1083     }
1084    
1085    
1086    
1087     /*************************************************
1088     * Compile a single pattern *
1089     *************************************************/
1090    
1091     /* When the -F option has been used, this is called for each substring.
1092     Otherwise it's called for each supplied pattern.
1093    
1094     Arguments:
1095     pattern the pattern string
1096     options the PCRE options
1097     filename the file name, or NULL for a command-line pattern
1098     count 0 if this is the only command line pattern, or
1099     number of the command line pattern, or
1100     linenumber for a pattern from a file
1101    
1102     Returns: TRUE on success, FALSE after an error
1103     */
1104    
1105     static BOOL
1106     compile_single_pattern(char *pattern, int options, char *filename, int count)
1107     {
1108     char buffer[MBUFTHIRD + 16];
1109     const char *error;
1110     int errptr;
1111    
1112     if (pattern_count >= MAX_PATTERN_COUNT)
1113     {
1114     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1115     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1116     return FALSE;
1117     }
1118    
1119     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1120     suffix[process_options]);
1121     pattern_list[pattern_count] =
1122     pcre_compile(buffer, options, &error, &errptr, pcretables);
1123     if (pattern_list[pattern_count++] != NULL) return TRUE;
1124    
1125     /* Handle compile errors */
1126    
1127     errptr -= (int)strlen(prefix[process_options]);
1128     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1129    
1130     if (filename == NULL)
1131     {
1132     if (count == 0)
1133     fprintf(stderr, "pcregrep: Error in command-line regex "
1134     "at offset %d: %s\n", errptr, error);
1135     else
1136     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1137     "at offset %d: %s\n", ordin(count), errptr, error);
1138     }
1139     else
1140     {
1141     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1142     "at offset %d: %s\n", count, filename, errptr, error);
1143     }
1144    
1145     return FALSE;
1146     }
1147    
1148    
1149    
1150     /*************************************************
1151     * Compile one supplied pattern *
1152     *************************************************/
1153    
1154     /* When the -F option has been used, each string may be a list of strings,
1155     separated by newlines. They will be matched literally.
1156    
1157     Arguments:
1158     pattern the pattern string
1159     options the PCRE options
1160     filename the file name, or NULL for a command-line pattern
1161     count 0 if this is the only command line pattern, or
1162     number of the command line pattern, or
1163     linenumber for a pattern from a file
1164    
1165     Returns: TRUE on success, FALSE after an error
1166     */
1167    
1168     static BOOL
1169     compile_pattern(char *pattern, int options, char *filename, int count)
1170     {
1171     if ((process_options & PO_FIXED_STRINGS) != 0)
1172     {
1173     char buffer[MBUFTHIRD];
1174     for(;;)
1175     {
1176     char *p = strchr(pattern, '\n');
1177     if (p == NULL)
1178     return compile_single_pattern(pattern, options, filename, count);
1179     sprintf(buffer, "%.*s", p - pattern, pattern);
1180     pattern = p + 1;
1181     if (!compile_single_pattern(buffer, options, filename, count))
1182     return FALSE;
1183     }
1184     }
1185     else return compile_single_pattern(pattern, options, filename, count);
1186     }
1187    
1188    
1189    
1190     /*************************************************
1191 nigel 49 * Main program *
1192     *************************************************/
1193    
1194 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1195    
1196 nigel 49 int
1197     main(int argc, char **argv)
1198     {
1199 nigel 53 int i, j;
1200 nigel 49 int rc = 1;
1201 nigel 87 int pcre_options = 0;
1202     int cmd_pattern_count = 0;
1203 nigel 49 int errptr;
1204 nigel 87 BOOL only_one_at_top;
1205     char *patterns[MAX_PATTERN_COUNT];
1206     const char *locale_from = "--locale";
1207 nigel 49 const char *error;
1208    
1209     /* Process the options */
1210    
1211     for (i = 1; i < argc; i++)
1212     {
1213 nigel 77 option_item *op = NULL;
1214     char *option_data = (char *)""; /* default to keep compiler happy */
1215     BOOL longop;
1216     BOOL longopwasequals = FALSE;
1217    
1218 nigel 49 if (argv[i][0] != '-') break;
1219 nigel 53
1220 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1221 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1222 nigel 63
1223 nigel 77 if (argv[i][1] == 0)
1224     {
1225 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1226 nigel 77 else exit(usage(2));
1227     }
1228 nigel 63
1229 nigel 77 /* Handle a long name option, or -- to terminate the options */
1230 nigel 53
1231     if (argv[i][1] == '-')
1232 nigel 49 {
1233 nigel 77 char *arg = argv[i] + 2;
1234     char *argequals = strchr(arg, '=');
1235 nigel 53
1236 nigel 77 if (*arg == 0) /* -- terminates options */
1237 nigel 49 {
1238 nigel 77 i++;
1239     break; /* out of the options-handling loop */
1240 nigel 53 }
1241 nigel 49
1242 nigel 77 longop = TRUE;
1243    
1244     /* Some long options have data that follows after =, for example file=name.
1245     Some options have variations in the long name spelling: specifically, we
1246     allow "regexp" because GNU grep allows it, though I personally go along
1247 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1248     These options are entered in the table as "regex(p)". No option is in both
1249     these categories, fortunately. */
1250 nigel 77
1251 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1252     {
1253 nigel 77 char *opbra = strchr(op->long_name, '(');
1254     char *equals = strchr(op->long_name, '=');
1255     if (opbra == NULL) /* Not a (p) case */
1256 nigel 53 {
1257 nigel 77 if (equals == NULL) /* Not thing=data case */
1258     {
1259     if (strcmp(arg, op->long_name) == 0) break;
1260     }
1261     else /* Special case xxx=data */
1262     {
1263     int oplen = equals - op->long_name;
1264     int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1265     if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1266     {
1267     option_data = arg + arglen;
1268     if (*option_data == '=')
1269     {
1270     option_data++;
1271     longopwasequals = TRUE;
1272     }
1273     break;
1274     }
1275     }
1276 nigel 53 }
1277 nigel 77 else /* Special case xxxx(p) */
1278     {
1279     char buff1[24];
1280     char buff2[24];
1281     int baselen = opbra - op->long_name;
1282     sprintf(buff1, "%.*s", baselen, op->long_name);
1283     sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1284     opbra + 1);
1285     if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1286     break;
1287     }
1288 nigel 53 }
1289 nigel 77
1290 nigel 53 if (op->one_char == 0)
1291     {
1292     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1293     exit(usage(2));
1294     }
1295     }
1296 nigel 49
1297 nigel 77 /* One-char options; many that have no data may be in a single argument; we
1298     continue till we hit the last one or one that needs data. */
1299 nigel 53
1300     else
1301     {
1302     char *s = argv[i] + 1;
1303 nigel 77 longop = FALSE;
1304 nigel 53 while (*s != 0)
1305     {
1306 nigel 77 for (op = optionlist; op->one_char != 0; op++)
1307     { if (*s == op->one_char) break; }
1308     if (op->one_char == 0)
1309 nigel 53 {
1310 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1311     *s, argv[i]);
1312     exit(usage(2));
1313     }
1314     if (op->type != OP_NODATA || s[1] == 0)
1315     {
1316     option_data = s+1;
1317 nigel 53 break;
1318     }
1319 nigel 87 pcre_options = handle_option(*s++, pcre_options);
1320 nigel 49 }
1321     }
1322 nigel 77
1323 nigel 87 /* At this point we should have op pointing to a matched option. If the type
1324     is NO_DATA, it means that there is no data, and the option might set
1325     something in the PCRE options. */
1326 nigel 77
1327     if (op->type == OP_NODATA)
1328     {
1329 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
1330     continue;
1331     }
1332    
1333     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1334     either has a value or defaults to something. It cannot have data in a
1335     separate item. At the moment, the only such options are "colo(u)r" and
1336     Jeffrey Friedl's special debugging option. */
1337    
1338     if (*option_data == 0 &&
1339     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1340     {
1341     switch (op->one_char)
1342 nigel 77 {
1343 nigel 87 case N_COLOUR:
1344     colour_option = (char *)"auto";
1345     break;
1346     #ifdef JFRIEDL_DEBUG
1347     case 'S':
1348     S_arg = 0;
1349     break;
1350     #endif
1351 nigel 77 }
1352 nigel 87 continue;
1353     }
1354 nigel 77
1355 nigel 87 /* Otherwise, find the data string for the option. */
1356    
1357     if (*option_data == 0)
1358     {
1359     if (i >= argc - 1 || longopwasequals)
1360 nigel 77 {
1361 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1362     exit(usage(2));
1363     }
1364     option_data = argv[++i];
1365     }
1366    
1367     /* If the option type is OP_PATLIST, it's the -e option, which can be called
1368     multiple times to create a list of patterns. */
1369    
1370     if (op->type == OP_PATLIST)
1371     {
1372     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1373     {
1374     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1375     MAX_PATTERN_COUNT);
1376     return 2;
1377     }
1378     patterns[cmd_pattern_count++] = option_data;
1379     }
1380    
1381     /* Otherwise, deal with single string or numeric data values. */
1382    
1383     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1384     {
1385     *((char **)op->dataptr) = option_data;
1386     }
1387     else
1388     {
1389     char *endptr;
1390     int n = strtoul(option_data, &endptr, 10);
1391     if (*endptr != 0)
1392     {
1393     if (longop)
1394 nigel 77 {
1395 nigel 87 char *equals = strchr(op->long_name, '=');
1396     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1397     equals - op->long_name;
1398     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1399     option_data, nlen, op->long_name);
1400 nigel 77 }
1401 nigel 87 else
1402     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1403     option_data, op->one_char);
1404     exit(usage(2));
1405 nigel 77 }
1406 nigel 87 *((int *)op->dataptr) = n;
1407 nigel 77 }
1408 nigel 49 }
1409    
1410 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
1411     for -A and -B. */
1412    
1413     if (both_context > 0)
1414     {
1415     if (after_context == 0) after_context = both_context;
1416     if (before_context == 0) before_context = both_context;
1417     }
1418    
1419 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1420     LC_ALL environment variable is set, and if so, use it. */
1421 nigel 49
1422 nigel 87 if (locale == NULL)
1423 nigel 53 {
1424 nigel 87 locale = getenv("LC_ALL");
1425     locale_from = "LCC_ALL";
1426 nigel 53 }
1427 nigel 49
1428 nigel 87 if (locale == NULL)
1429     {
1430     locale = getenv("LC_CTYPE");
1431     locale_from = "LC_CTYPE";
1432     }
1433 nigel 49
1434 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
1435     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1436    
1437     if (locale != NULL)
1438 nigel 49 {
1439 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
1440 nigel 53 {
1441 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1442     locale, locale_from);
1443 nigel 53 return 2;
1444     }
1445 nigel 87 pcretables = pcre_maketables();
1446     }
1447 nigel 77
1448 nigel 87 /* Sort out colouring */
1449    
1450     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1451     {
1452     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1453     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1454     else
1455 nigel 53 {
1456 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1457     colour_option);
1458     return 2;
1459 nigel 77 }
1460 nigel 87 if (do_colour)
1461 nigel 77 {
1462 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
1463     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1464     if (cs != NULL) colour_string = cs;
1465 nigel 77 }
1466 nigel 87 }
1467 nigel 77
1468 nigel 87 /* Interpret the text values for -d and -D */
1469    
1470     if (dee_option != NULL)
1471     {
1472     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1473     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1474     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1475     else
1476 nigel 77 {
1477 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1478     return 2;
1479 nigel 53 }
1480 nigel 49 }
1481    
1482 nigel 87 if (DEE_option != NULL)
1483     {
1484     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1485     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1486     else
1487     {
1488     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1489     return 2;
1490     }
1491     }
1492 nigel 49
1493 nigel 87 /* Check the value for Jeff Friedl's debugging option. */
1494    
1495     #ifdef JFRIEDL_DEBUG
1496     if (S_arg > 9)
1497 nigel 49 {
1498 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
1499     return 2;
1500     }
1501     #endif
1502 nigel 77
1503 nigel 87 /* Get memory to store the pattern and hints lists. */
1504    
1505     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1506     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1507    
1508     if (pattern_list == NULL || hints_list == NULL)
1509     {
1510     fprintf(stderr, "pcregrep: malloc failed\n");
1511     return 2;
1512     }
1513    
1514     /* If no patterns were provided by -e, and there is no file provided by -f,
1515     the first argument is the one and only pattern, and it must exist. */
1516    
1517     if (cmd_pattern_count == 0 && pattern_filename == NULL)
1518     {
1519 nigel 63 if (i >= argc) return usage(2);
1520 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
1521     }
1522 nigel 77
1523 nigel 87 /* Compile the patterns that were provided on the command line, either by
1524     multiple uses of -e or as a single unkeyed pattern. */
1525    
1526     for (j = 0; j < cmd_pattern_count; j++)
1527     {
1528     if (!compile_pattern(patterns[j], pcre_options, NULL,
1529     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1530     return 2;
1531     }
1532    
1533     /* Compile the regular expressions that are provided in a file. */
1534    
1535     if (pattern_filename != NULL)
1536     {
1537     int linenumber = 0;
1538     FILE *f;
1539     char *filename;
1540     char buffer[MBUFTHIRD];
1541    
1542     if (strcmp(pattern_filename, "-") == 0)
1543 nigel 77 {
1544 nigel 87 f = stdin;
1545     filename = stdin_name;
1546 nigel 77 }
1547 nigel 87 else
1548 nigel 77 {
1549 nigel 87 f = fopen(pattern_filename, "r");
1550     if (f == NULL)
1551     {
1552     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1553     strerror(errno));
1554     return 2;
1555     }
1556     filename = pattern_filename;
1557 nigel 77 }
1558    
1559 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
1560 nigel 53 {
1561 nigel 87 char *s = buffer + (int)strlen(buffer);
1562     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1563     *s = 0;
1564     linenumber++;
1565     if (buffer[0] == 0) continue; /* Skip blank lines */
1566     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1567     return 2;
1568 nigel 53 }
1569 nigel 87
1570     if (f != stdin) fclose(f);
1571 nigel 49 }
1572    
1573 nigel 77 /* Study the regular expressions, as we will be running them many times */
1574 nigel 53
1575     for (j = 0; j < pattern_count; j++)
1576     {
1577     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1578     if (error != NULL)
1579     {
1580     char s[16];
1581     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1582     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1583     return 2;
1584     }
1585     }
1586    
1587 nigel 77 /* If there are include or exclude patterns, compile them. */
1588    
1589     if (exclude_pattern != NULL)
1590     {
1591 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1592     pcretables);
1593 nigel 77 if (exclude_compiled == NULL)
1594     {
1595     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1596     errptr, error);
1597     return 2;
1598     }
1599     }
1600    
1601     if (include_pattern != NULL)
1602     {
1603 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1604     pcretables);
1605 nigel 77 if (include_compiled == NULL)
1606     {
1607     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1608     errptr, error);
1609     return 2;
1610     }
1611     }
1612    
1613 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
1614 nigel 49
1615 nigel 87 if (i >= argc)
1616     return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1617 nigel 49
1618 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
1619     Pass in the fact that there is only one argument at top level - this suppresses
1620 nigel 87 the file name if the argument is not a directory and filenames are not
1621     otherwise forced. */
1622 nigel 49
1623 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
1624 nigel 49
1625     for (; i < argc; i++)
1626     {
1627 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1628     only_one_at_top);
1629 nigel 77 if (frc > 1) rc = frc;
1630     else if (frc == 0 && rc == 1) rc = 0;
1631 nigel 49 }
1632    
1633     return rc;
1634     }
1635    
1636 nigel 77 /* End of pcregrep */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12