/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 91 - (hide annotations) (download)
Sat Feb 24 21:41:34 2007 UTC (7 years, 7 months ago) by nigel
File MIME type: text/plain
File size: 52037 byte(s)
Load pcre-6.7 into code/trunk.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 nigel 87 Copyright (c) 1997-2006 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 nigel 53 #include <ctype.h>
41 nigel 87 #include <locale.h>
42 nigel 49 #include <stdio.h>
43     #include <string.h>
44     #include <stdlib.h>
45     #include <errno.h>
46 nigel 77
47     #include <sys/types.h>
48     #include <sys/stat.h>
49     #include <unistd.h>
50    
51 nigel 49 #include "config.h"
52     #include "pcre.h"
53    
54     #define FALSE 0
55     #define TRUE 1
56    
57     typedef int BOOL;
58    
59 nigel 91 #define VERSION "4.3 01-Jun-2006"
60 nigel 53 #define MAX_PATTERN_COUNT 100
61 nigel 49
62 nigel 77 #if BUFSIZ > 8192
63     #define MBUFTHIRD BUFSIZ
64     #else
65     #define MBUFTHIRD 8192
66     #endif
67 nigel 49
68 nigel 77
69 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
70     output. The order is important; it is assumed that a file name is wanted for
71     all values greater than FN_DEFAULT. */
72 nigel 77
73 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
74    
75     /* Actions for the -d and -D options */
76    
77     enum { dee_READ, dee_SKIP, dee_RECURSE };
78     enum { DEE_READ, DEE_SKIP };
79    
80     /* Actions for special processing options (flag bits) */
81    
82     #define PO_WORD_MATCH 0x0001
83     #define PO_LINE_MATCH 0x0002
84     #define PO_FIXED_STRINGS 0x0004
85    
86    
87    
88 nigel 49 /*************************************************
89     * Global variables *
90     *************************************************/
91    
92 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
93     regular code. */
94    
95     #ifdef JFRIEDL_DEBUG
96     static int S_arg = -1;
97 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
98     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
99     static const char *jfriedl_prefix = "";
100     static const char *jfriedl_postfix = "";
101 nigel 87 #endif
102    
103 nigel 91 static int endlinebyte = '\n'; /* Last byte of endline sequence */
104     static int endlineextra = 0; /* Extra bytes for endline sequence */
105    
106 nigel 87 static char *colour_string = (char *)"1;31";
107     static char *colour_option = NULL;
108     static char *dee_option = NULL;
109     static char *DEE_option = NULL;
110 nigel 91 static char *newline = NULL;
111 nigel 53 static char *pattern_filename = NULL;
112 nigel 77 static char *stdin_name = (char *)"(standard input)";
113 nigel 87 static char *locale = NULL;
114    
115     static const unsigned char *pcretables = NULL;
116    
117 nigel 53 static int pattern_count = 0;
118     static pcre **pattern_list;
119     static pcre_extra **hints_list;
120 nigel 49
121 nigel 77 static char *include_pattern = NULL;
122     static char *exclude_pattern = NULL;
123    
124     static pcre *include_compiled = NULL;
125     static pcre *exclude_compiled = NULL;
126    
127     static int after_context = 0;
128     static int before_context = 0;
129     static int both_context = 0;
130 nigel 87 static int dee_action = dee_READ;
131     static int DEE_action = DEE_READ;
132     static int error_count = 0;
133     static int filenames = FN_DEFAULT;
134     static int process_options = 0;
135 nigel 77
136 nigel 49 static BOOL count_only = FALSE;
137 nigel 87 static BOOL do_colour = FALSE;
138 nigel 77 static BOOL hyphenpending = FALSE;
139 nigel 49 static BOOL invert = FALSE;
140 nigel 77 static BOOL multiline = FALSE;
141 nigel 49 static BOOL number = FALSE;
142 nigel 87 static BOOL only_matching = FALSE;
143 nigel 77 static BOOL quiet = FALSE;
144 nigel 49 static BOOL silent = FALSE;
145    
146 nigel 53 /* Structure for options and list of them */
147 nigel 49
148 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
149     OP_PATLIST };
150 nigel 77
151 nigel 53 typedef struct option_item {
152 nigel 77 int type;
153 nigel 53 int one_char;
154 nigel 77 void *dataptr;
155 nigel 67 const char *long_name;
156     const char *help_text;
157 nigel 53 } option_item;
158 nigel 49
159 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
160     used to identify them. */
161    
162     #define N_COLOUR (-1)
163     #define N_EXCLUDE (-2)
164     #define N_HELP (-3)
165     #define N_INCLUDE (-4)
166     #define N_LABEL (-5)
167     #define N_LOCALE (-6)
168     #define N_NULL (-7)
169    
170 nigel 53 static option_item optionlist[] = {
171 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
172     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
173     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
174     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
175     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
176     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
177     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
178     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
179     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
180     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
181     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
182     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
183     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
184     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
185     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
186     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
187     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
188     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
189     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
190     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
191     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
192 nigel 91 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LR, CRLF)" },
193 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
194     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
195     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
196     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
197     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
198     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
199     #ifdef JFRIEDL_DEBUG
200     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
201     #endif
202     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
203     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
204     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
205     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
206     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
207     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
208     { OP_NODATA, 0, NULL, NULL, NULL }
209 nigel 53 };
210    
211 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
212     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
213     that the combination of -w and -x has the same effect as -x on its own, so we
214     can treat them as the same. */
215 nigel 53
216 nigel 87 static const char *prefix[] = {
217     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
218    
219     static const char *suffix[] = {
220     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
221    
222    
223    
224 nigel 53 /*************************************************
225 nigel 87 * OS-specific functions *
226 nigel 53 *************************************************/
227    
228     /* These functions are defined so that they can be made system specific,
229 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
230 nigel 53
231    
232     /************* Directory scanning in Unix ***********/
233    
234     #if IS_UNIX
235     #include <sys/types.h>
236     #include <sys/stat.h>
237     #include <dirent.h>
238    
239     typedef DIR directory_type;
240    
241 nigel 67 static int
242 nigel 53 isdirectory(char *filename)
243     {
244     struct stat statbuf;
245     if (stat(filename, &statbuf) < 0)
246     return 0; /* In the expectation that opening as a file will fail */
247     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
248     }
249    
250 nigel 67 static directory_type *
251 nigel 53 opendirectory(char *filename)
252     {
253     return opendir(filename);
254     }
255    
256 nigel 67 static char *
257 nigel 53 readdirectory(directory_type *dir)
258     {
259     for (;;)
260     {
261     struct dirent *dent = readdir(dir);
262     if (dent == NULL) return NULL;
263     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
264     return dent->d_name;
265     }
266     return NULL; /* Keep compiler happy; never executed */
267     }
268    
269 nigel 67 static void
270 nigel 53 closedirectory(directory_type *dir)
271     {
272     closedir(dir);
273     }
274    
275    
276 nigel 87 /************* Test for regular file in Unix **********/
277    
278     static int
279     isregfile(char *filename)
280     {
281     struct stat statbuf;
282     if (stat(filename, &statbuf) < 0)
283     return 1; /* In the expectation that opening as a file will fail */
284     return (statbuf.st_mode & S_IFMT) == S_IFREG;
285     }
286    
287    
288     /************* Test stdout for being a terminal in Unix **********/
289    
290     static BOOL
291     is_stdout_tty(void)
292     {
293     return isatty(fileno(stdout));
294     }
295    
296    
297 nigel 63 /************* Directory scanning in Win32 ***********/
298 nigel 53
299 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
300 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
301     when it did not exist. */
302 nigel 53
303 nigel 63
304     #elif HAVE_WIN32API
305    
306     #ifndef STRICT
307     # define STRICT
308     #endif
309     #ifndef WIN32_LEAN_AND_MEAN
310     # define WIN32_LEAN_AND_MEAN
311     #endif
312 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
313     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
314     #endif
315    
316 nigel 63 #include <windows.h>
317    
318     typedef struct directory_type
319     {
320     HANDLE handle;
321     BOOL first;
322     WIN32_FIND_DATA data;
323     } directory_type;
324    
325     int
326     isdirectory(char *filename)
327     {
328     DWORD attr = GetFileAttributes(filename);
329     if (attr == INVALID_FILE_ATTRIBUTES)
330     return 0;
331     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
332     }
333    
334     directory_type *
335     opendirectory(char *filename)
336     {
337     size_t len;
338     char *pattern;
339     directory_type *dir;
340     DWORD err;
341     len = strlen(filename);
342     pattern = (char *) malloc(len + 3);
343     dir = (directory_type *) malloc(sizeof(*dir));
344     if ((pattern == NULL) || (dir == NULL))
345     {
346     fprintf(stderr, "pcregrep: malloc failed\n");
347     exit(2);
348     }
349     memcpy(pattern, filename, len);
350     memcpy(&(pattern[len]), "\\*", 3);
351     dir->handle = FindFirstFile(pattern, &(dir->data));
352     if (dir->handle != INVALID_HANDLE_VALUE)
353     {
354     free(pattern);
355     dir->first = TRUE;
356     return dir;
357     }
358     err = GetLastError();
359     free(pattern);
360     free(dir);
361     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
362     return NULL;
363     }
364    
365     char *
366     readdirectory(directory_type *dir)
367     {
368     for (;;)
369     {
370     if (!dir->first)
371     {
372     if (!FindNextFile(dir->handle, &(dir->data)))
373     return NULL;
374     }
375     else
376     {
377     dir->first = FALSE;
378     }
379     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
380     return dir->data.cFileName;
381     }
382     #ifndef _MSC_VER
383     return NULL; /* Keep compiler happy; never executed */
384     #endif
385     }
386    
387     void
388     closedirectory(directory_type *dir)
389     {
390     FindClose(dir->handle);
391     free(dir);
392     }
393    
394    
395 nigel 87 /************* Test for regular file in Win32 **********/
396    
397     /* I don't know how to do this, or if it can be done; assume all paths are
398     regular if they are not directories. */
399    
400     int isregfile(char *filename)
401     {
402     return !isdirectory(filename)
403     }
404    
405    
406     /************* Test stdout for being a terminal in Win32 **********/
407    
408     /* I don't know how to do this; assume never */
409    
410     static BOOL
411     is_stdout_tty(void)
412     {
413     FALSE;
414     }
415    
416    
417 nigel 53 /************* Directory scanning when we can't do it ***********/
418    
419     /* The type is void, and apart from isdirectory(), the functions do nothing. */
420    
421 nigel 63 #else
422    
423 nigel 53 typedef void directory_type;
424    
425 nigel 87 int isdirectory(char *filename) { return 0; }
426 nigel 53 directory_type * opendirectory(char *filename) {}
427     char *readdirectory(directory_type *dir) {}
428     void closedirectory(directory_type *dir) {}
429    
430 nigel 87
431     /************* Test for regular when we can't do it **********/
432    
433     /* Assume all files are regular. */
434    
435     int isregfile(char *filename) { return 1; }
436    
437    
438     /************* Test stdout for being a terminal when we can't do it **********/
439    
440     static BOOL
441     is_stdout_tty(void)
442     {
443     return FALSE;
444     }
445    
446    
447 nigel 53 #endif
448    
449    
450    
451 nigel 49 #if ! HAVE_STRERROR
452     /*************************************************
453     * Provide strerror() for non-ANSI libraries *
454     *************************************************/
455    
456     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
457     in their libraries, but can provide the same facility by this simple
458     alternative function. */
459    
460     extern int sys_nerr;
461     extern char *sys_errlist[];
462    
463     char *
464     strerror(int n)
465     {
466     if (n < 0 || n >= sys_nerr) return "unknown error number";
467     return sys_errlist[n];
468     }
469     #endif /* HAVE_STRERROR */
470    
471    
472    
473     /*************************************************
474 nigel 77 * Print the previous "after" lines *
475 nigel 49 *************************************************/
476    
477 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
478 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
479     that a binary zero does not terminate it.
480 nigel 77
481     Arguments:
482     lastmatchnumber the number of the last matching line, plus one
483     lastmatchrestart where we restarted after the last match
484     endptr end of available data
485     printname filename for printing
486    
487     Returns: nothing
488     */
489    
490     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
491     char *endptr, char *printname)
492     {
493     if (after_context > 0 && lastmatchnumber > 0)
494     {
495     int count = 0;
496     while (lastmatchrestart < endptr && count++ < after_context)
497     {
498     char *pp = lastmatchrestart;
499     if (printname != NULL) fprintf(stdout, "%s-", printname);
500     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
501 nigel 91 while (*pp != endlinebyte) pp++;
502     fwrite(lastmatchrestart, 1, pp - lastmatchrestart + (1 + endlineextra),
503     stdout);
504 nigel 77 lastmatchrestart = pp + 1;
505     }
506     hyphenpending = TRUE;
507     }
508     }
509    
510    
511    
512     /*************************************************
513     * Grep an individual file *
514     *************************************************/
515    
516     /* This is called from grep_or_recurse() below. It uses a buffer that is three
517     times the value of MBUFTHIRD. The matching point is never allowed to stray into
518     the top third of the buffer, thus keeping more of the file available for
519     context printing or for multiline scanning. For large files, the pointer will
520     be in the middle third most of the time, so the bottom third is available for
521     "before" context printing.
522    
523     Arguments:
524     in the fopened FILE stream
525     printname the file name if it is to be printed for each match
526     or NULL if the file name is not to be printed
527     it cannot be NULL if filenames[_nomatch]_only is set
528    
529     Returns: 0 if there was at least one match
530     1 otherwise (no matches)
531     */
532    
533 nigel 49 static int
534 nigel 77 pcregrep(FILE *in, char *printname)
535 nigel 49 {
536     int rc = 1;
537 nigel 77 int linenumber = 1;
538     int lastmatchnumber = 0;
539 nigel 49 int count = 0;
540     int offsets[99];
541 nigel 77 char *lastmatchrestart = NULL;
542     char buffer[3*MBUFTHIRD];
543     char *ptr = buffer;
544     char *endptr;
545     size_t bufflength;
546     BOOL endhyphenpending = FALSE;
547 nigel 49
548 nigel 77 /* Do the first read into the start of the buffer and set up the pointer to
549     end of what we have. */
550    
551     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
552     endptr = buffer + bufflength;
553    
554     /* Loop while the current pointer is not at the end of the file. For large
555     files, endptr will be at the end of the buffer when we are in the middle of the
556     file, but ptr will never get there, because as soon as it gets over 2/3 of the
557     way, the buffer is shifted left and re-filled. */
558    
559     while (ptr < endptr)
560 nigel 49 {
561 nigel 77 int i;
562 nigel 87 int mrc = 0;
563 nigel 53 BOOL match = FALSE;
564 nigel 77 char *t = ptr;
565     size_t length, linelength;
566 nigel 49
567 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
568     of the subject string to pass to pcre_exec(). In multiline mode, it is the
569     length remainder of the data in the buffer. Otherwise, it is the length of
570     the next line. After matching, we always advance by the length of the next
571     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
572     that any match is constrained to be in the first line. */
573    
574     linelength = 0;
575 nigel 91 while (t < endptr && *t++ != endlinebyte) linelength++;
576 nigel 77 length = multiline? endptr - ptr : linelength;
577    
578 nigel 89
579     /* Extra processing for Jeffrey Friedl's debugging. */
580    
581     #ifdef JFRIEDL_DEBUG
582     if (jfriedl_XT || jfriedl_XR)
583     {
584     #include <sys/time.h>
585     #include <time.h>
586     struct timeval start_time, end_time;
587     struct timezone dummy;
588    
589     if (jfriedl_XT)
590     {
591     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
592     const char *orig = ptr;
593     ptr = malloc(newlen + 1);
594     if (!ptr) {
595     printf("out of memory");
596     exit(2);
597     }
598     endptr = ptr;
599     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
600     for (i = 0; i < jfriedl_XT; i++) {
601     strncpy(endptr, orig, length);
602     endptr += length;
603     }
604     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
605     length = newlen;
606     }
607    
608     if (gettimeofday(&start_time, &dummy) != 0)
609     perror("bad gettimeofday");
610    
611    
612     for (i = 0; i < jfriedl_XR; i++)
613     match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
614    
615     if (gettimeofday(&end_time, &dummy) != 0)
616     perror("bad gettimeofday");
617    
618     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
619     -
620     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
621    
622     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
623     return 0;
624     }
625     #endif
626    
627    
628 nigel 77 /* Run through all the patterns until one matches. Note that we don't include
629     the final newline in the subject string. */
630    
631 nigel 87 for (i = 0; i < pattern_count; i++)
632 nigel 53 {
633 nigel 87 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
634     offsets, 99);
635     if (mrc >= 0) { match = TRUE; break; }
636     if (mrc != PCRE_ERROR_NOMATCH)
637     {
638     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
639     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
640     fprintf(stderr, "this line:\n");
641     fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
642     fprintf(stderr, "\n");
643     if (error_count == 0 &&
644     (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
645     {
646     fprintf(stderr, "pcregrep: error %d means that a resource limit "
647     "was exceeded\n", mrc);
648     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
649     }
650     if (error_count++ > 20)
651     {
652     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
653     exit(2);
654     }
655     match = invert; /* No more matching; don't show the line again */
656     break;
657     }
658 nigel 53 }
659 nigel 49
660 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
661 nigel 77
662 nigel 49 if (match != invert)
663     {
664 nigel 77 BOOL hyphenprinted = FALSE;
665    
666 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
667 nigel 77
668 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
669    
670     /* Just count if just counting is wanted. */
671    
672 nigel 49 if (count_only) count++;
673    
674 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
675     in the file. */
676    
677     else if (filenames == FN_ONLY)
678 nigel 49 {
679 nigel 77 fprintf(stdout, "%s\n", printname);
680 nigel 49 return 0;
681     }
682    
683 nigel 87 /* Likewise, if all we want is a yes/no answer. */
684    
685 nigel 77 else if (quiet) return 0;
686 nigel 49
687 nigel 87 /* The --only-matching option prints just the substring that matched, and
688     does not pring any context. */
689    
690     else if (only_matching)
691     {
692     if (printname != NULL) fprintf(stdout, "%s:", printname);
693     if (number) fprintf(stdout, "%d:", linenumber);
694     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
695     fprintf(stdout, "\n");
696     }
697    
698     /* This is the default case when none of the above options is set. We print
699     the matching lines(s), possibly preceded and/or followed by other lines of
700     context. */
701    
702 nigel 49 else
703     {
704 nigel 77 /* See if there is a requirement to print some "after" lines from a
705     previous match. We never print any overlaps. */
706    
707     if (after_context > 0 && lastmatchnumber > 0)
708     {
709     int linecount = 0;
710     char *p = lastmatchrestart;
711    
712     while (p < ptr && linecount < after_context)
713     {
714 nigel 91 while (*p != endlinebyte) p++;
715 nigel 77 p++;
716     linecount++;
717     }
718    
719     /* It is important to advance lastmatchrestart during this printing so
720 nigel 87 that it interacts correctly with any "before" printing below. Print
721     each line's data using fwrite() in case there are binary zeroes. */
722 nigel 77
723     while (lastmatchrestart < p)
724     {
725     char *pp = lastmatchrestart;
726     if (printname != NULL) fprintf(stdout, "%s-", printname);
727     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
728 nigel 91 while (*pp != endlinebyte) pp++;
729     fwrite(lastmatchrestart, 1, pp - lastmatchrestart +
730     (1 + endlineextra), stdout);
731 nigel 77 lastmatchrestart = pp + 1;
732     }
733     if (lastmatchrestart != ptr) hyphenpending = TRUE;
734     }
735    
736     /* If there were non-contiguous lines printed above, insert hyphens. */
737    
738     if (hyphenpending)
739     {
740     fprintf(stdout, "--\n");
741     hyphenpending = FALSE;
742     hyphenprinted = TRUE;
743     }
744    
745     /* See if there is a requirement to print some "before" lines for this
746     match. Again, don't print overlaps. */
747    
748     if (before_context > 0)
749     {
750     int linecount = 0;
751     char *p = ptr;
752    
753     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
754 nigel 87 linecount < before_context)
755 nigel 77 {
756 nigel 87 linecount++;
757 nigel 77 p--;
758 nigel 91 while (p > buffer && p[-1] != endlinebyte) p--;
759 nigel 77 }
760    
761     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
762     fprintf(stdout, "--\n");
763    
764     while (p < ptr)
765     {
766     char *pp = p;
767     if (printname != NULL) fprintf(stdout, "%s-", printname);
768     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
769 nigel 91 while (*pp != endlinebyte) pp++;
770     fwrite(p, 1, pp - p + (1 + endlineextra), stdout);
771 nigel 77 p = pp + 1;
772     }
773     }
774    
775     /* Now print the matching line(s); ensure we set hyphenpending at the end
776 nigel 85 of the file if any context lines are being output. */
777 nigel 77
778 nigel 85 if (after_context > 0 || before_context > 0)
779     endhyphenpending = TRUE;
780    
781 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
782 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
783 nigel 77
784     /* In multiline mode, we want to print to the end of the line in which
785     the end of the matched string is found, so we adjust linelength and the
786     line number appropriately. Because the PCRE_FIRSTLINE option is set, the
787 nigel 91 start of the match will always be before the first newline sequence. */
788 nigel 77
789     if (multiline)
790     {
791     char *endmatch = ptr + offsets[1];
792     t = ptr;
793 nigel 91 while (t < endmatch) { if (*t++ == endlinebyte) linenumber++; }
794     while (endmatch < endptr && *endmatch != endlinebyte) endmatch++;
795 nigel 77 linelength = endmatch - ptr;
796     }
797    
798 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
799     zeroes are treated as just another data character. */
800    
801     /* This extra option, for Jeffrey Friedl's debugging requirements,
802     replaces the matched string, or a specific captured string if it exists,
803     with X. When this happens, colouring is ignored. */
804    
805     #ifdef JFRIEDL_DEBUG
806     if (S_arg >= 0 && S_arg < mrc)
807     {
808     int first = S_arg * 2;
809     int last = first + 1;
810     fwrite(ptr, 1, offsets[first], stdout);
811     fprintf(stdout, "X");
812     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
813     }
814     else
815     #endif
816    
817     /* We have to split the line(s) up if colouring. */
818    
819     if (do_colour)
820     {
821     fwrite(ptr, 1, offsets[0], stdout);
822     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
823     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
824     fprintf(stdout, "%c[00m", 0x1b);
825     fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
826     }
827     else fwrite(ptr, 1, linelength, stdout);
828    
829     fprintf(stdout, "\n");
830 nigel 49 }
831    
832 nigel 87 /* End of doing what has to be done for a match */
833    
834 nigel 77 rc = 0; /* Had some success */
835    
836     /* Remember where the last match happened for after_context. We remember
837     where we are about to restart, and that line's number. */
838    
839     lastmatchrestart = ptr + linelength + 1;
840     lastmatchnumber = linenumber + 1;
841 nigel 49 }
842 nigel 77
843     /* Advance to after the newline and increment the line number. */
844    
845     ptr += linelength + 1;
846     linenumber++;
847    
848     /* If we haven't yet reached the end of the file (the buffer is full), and
849     the current point is in the top 1/3 of the buffer, slide the buffer down by
850     1/3 and refill it. Before we do this, if some unprinted "after" lines are
851     about to be lost, print them. */
852    
853     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
854     {
855     if (after_context > 0 &&
856     lastmatchnumber > 0 &&
857     lastmatchrestart < buffer + MBUFTHIRD)
858     {
859     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
860     lastmatchnumber = 0;
861     }
862    
863     /* Now do the shuffle */
864    
865     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
866     ptr -= MBUFTHIRD;
867     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
868     endptr = buffer + bufflength;
869    
870     /* Adjust any last match point */
871    
872     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
873     }
874     } /* Loop through the whole file */
875    
876     /* End of file; print final "after" lines if wanted; do_after_lines sets
877     hyphenpending if it prints something. */
878    
879 nigel 87 if (!only_matching && !count_only)
880     {
881     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
882     hyphenpending |= endhyphenpending;
883     }
884 nigel 77
885     /* Print the file name if we are looking for those without matches and there
886     were none. If we found a match, we won't have got this far. */
887    
888 nigel 87 if (filenames == FN_NOMATCH_ONLY)
889 nigel 77 {
890     fprintf(stdout, "%s\n", printname);
891     return 0;
892 nigel 49 }
893    
894 nigel 77 /* Print the match count if wanted */
895    
896 nigel 49 if (count_only)
897     {
898 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
899 nigel 49 fprintf(stdout, "%d\n", count);
900     }
901    
902     return rc;
903     }
904    
905    
906    
907     /*************************************************
908 nigel 53 * Grep a file or recurse into a directory *
909     *************************************************/
910    
911 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
912     recursing; if it's a file, grep it.
913    
914     Arguments:
915     pathname the path to investigate
916 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
917 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
918    
919     Returns: 0 if there was at least one match
920     1 if there were no matches
921     2 there was some kind of error
922    
923     However, file opening failures are suppressed if "silent" is set.
924     */
925    
926 nigel 53 static int
927 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
928 nigel 53 {
929     int rc = 1;
930     int sep;
931     FILE *in;
932    
933 nigel 77 /* If the file name is "-" we scan stdin */
934 nigel 53
935 nigel 77 if (strcmp(pathname, "-") == 0)
936 nigel 53 {
937 nigel 77 return pcregrep(stdin,
938 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
939 nigel 77 stdin_name : NULL);
940     }
941    
942    
943 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
944     each file within it, subject to any include or exclude patterns that were set.
945     The scanning code is localized so it can be made system-specific. */
946    
947     if ((sep = isdirectory(pathname)) != 0)
948 nigel 77 {
949 nigel 87 if (dee_action == dee_SKIP) return 1;
950     if (dee_action == dee_RECURSE)
951 nigel 53 {
952 nigel 87 char buffer[1024];
953     char *nextfile;
954     directory_type *dir = opendirectory(pathname);
955 nigel 53
956 nigel 87 if (dir == NULL)
957     {
958     if (!silent)
959     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
960     strerror(errno));
961     return 2;
962     }
963 nigel 77
964 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
965     {
966     int frc, blen;
967     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
968     blen = strlen(buffer);
969 nigel 77
970 nigel 87 if (exclude_compiled != NULL &&
971     pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
972     continue;
973 nigel 77
974 nigel 87 if (include_compiled != NULL &&
975     pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
976     continue;
977    
978     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
979     if (frc > 1) rc = frc;
980     else if (frc == 0 && rc == 1) rc = 0;
981     }
982    
983     closedirectory(dir);
984     return rc;
985 nigel 53 }
986     }
987    
988 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
989     been requested. */
990 nigel 53
991 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
992    
993     /* Control reaches here if we have a regular file, or if we have a directory
994     and recursion or skipping was not requested, or if we have anything else and
995     skipping was not requested. The scan proceeds. If this is the first and only
996     argument at top level, we don't show the file name, unless we are only showing
997     the file name, or the filename was forced (-H). */
998    
999 nigel 77 in = fopen(pathname, "r");
1000 nigel 53 if (in == NULL)
1001     {
1002 nigel 77 if (!silent)
1003     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1004     strerror(errno));
1005 nigel 53 return 2;
1006     }
1007    
1008 nigel 87 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1009     (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1010 nigel 77
1011 nigel 53 fclose(in);
1012     return rc;
1013     }
1014    
1015    
1016    
1017    
1018     /*************************************************
1019 nigel 49 * Usage function *
1020     *************************************************/
1021    
1022     static int
1023     usage(int rc)
1024     {
1025 nigel 87 option_item *op;
1026     fprintf(stderr, "Usage: pcregrep [-");
1027     for (op = optionlist; op->one_char != 0; op++)
1028     {
1029     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1030     }
1031     fprintf(stderr, "] [long options] [pattern] [files]\n");
1032 nigel 53 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1033 nigel 49 return rc;
1034     }
1035    
1036    
1037    
1038    
1039     /*************************************************
1040 nigel 53 * Help function *
1041     *************************************************/
1042    
1043     static void
1044     help(void)
1045     {
1046     option_item *op;
1047    
1048 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1049 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1050 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1051     printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1052 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1053    
1054     printf("Options:\n");
1055    
1056     for (op = optionlist; op->one_char != 0; op++)
1057     {
1058     int n;
1059     char s[4];
1060     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1061     printf(" %s --%s%n", s, op->long_name, &n);
1062     n = 30 - n;
1063     if (n < 1) n = 1;
1064     printf("%.*s%s\n", n, " ", op->help_text);
1065     }
1066    
1067 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1068     printf("trailing white space is removed and blank lines are ignored.\n");
1069     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1070 nigel 53
1071 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1072 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1073     }
1074    
1075    
1076    
1077    
1078     /*************************************************
1079 nigel 77 * Handle a single-letter, no data option *
1080 nigel 53 *************************************************/
1081    
1082     static int
1083     handle_option(int letter, int options)
1084     {
1085     switch(letter)
1086     {
1087 nigel 87 case N_HELP: help(); exit(0);
1088 nigel 53 case 'c': count_only = TRUE; break;
1089 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1090     case 'H': filenames = FN_FORCE; break;
1091     case 'h': filenames = FN_NONE; break;
1092 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1093 nigel 87 case 'l': filenames = FN_ONLY; break;
1094     case 'L': filenames = FN_NOMATCH_ONLY; break;
1095 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1096 nigel 53 case 'n': number = TRUE; break;
1097 nigel 87 case 'o': only_matching = TRUE; break;
1098 nigel 77 case 'q': quiet = TRUE; break;
1099 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1100 nigel 53 case 's': silent = TRUE; break;
1101 nigel 63 case 'u': options |= PCRE_UTF8; break;
1102 nigel 53 case 'v': invert = TRUE; break;
1103 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1104     case 'x': process_options |= PO_LINE_MATCH; break;
1105 nigel 53
1106     case 'V':
1107     fprintf(stderr, "pcregrep version %s using ", VERSION);
1108     fprintf(stderr, "PCRE version %s\n", pcre_version());
1109     exit(0);
1110     break;
1111    
1112     default:
1113     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1114     exit(usage(2));
1115     }
1116    
1117     return options;
1118     }
1119    
1120    
1121    
1122    
1123     /*************************************************
1124 nigel 87 * Construct printed ordinal *
1125     *************************************************/
1126    
1127     /* This turns a number into "1st", "3rd", etc. */
1128    
1129     static char *
1130     ordin(int n)
1131     {
1132     static char buffer[8];
1133     char *p = buffer;
1134     sprintf(p, "%d", n);
1135     while (*p != 0) p++;
1136     switch (n%10)
1137     {
1138     case 1: strcpy(p, "st"); break;
1139     case 2: strcpy(p, "nd"); break;
1140     case 3: strcpy(p, "rd"); break;
1141     default: strcpy(p, "th"); break;
1142     }
1143     return buffer;
1144     }
1145    
1146    
1147    
1148     /*************************************************
1149     * Compile a single pattern *
1150     *************************************************/
1151    
1152     /* When the -F option has been used, this is called for each substring.
1153     Otherwise it's called for each supplied pattern.
1154    
1155     Arguments:
1156     pattern the pattern string
1157     options the PCRE options
1158     filename the file name, or NULL for a command-line pattern
1159     count 0 if this is the only command line pattern, or
1160     number of the command line pattern, or
1161     linenumber for a pattern from a file
1162    
1163     Returns: TRUE on success, FALSE after an error
1164     */
1165    
1166     static BOOL
1167     compile_single_pattern(char *pattern, int options, char *filename, int count)
1168     {
1169     char buffer[MBUFTHIRD + 16];
1170     const char *error;
1171     int errptr;
1172    
1173     if (pattern_count >= MAX_PATTERN_COUNT)
1174     {
1175     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1176     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1177     return FALSE;
1178     }
1179    
1180     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1181     suffix[process_options]);
1182     pattern_list[pattern_count] =
1183     pcre_compile(buffer, options, &error, &errptr, pcretables);
1184     if (pattern_list[pattern_count++] != NULL) return TRUE;
1185    
1186     /* Handle compile errors */
1187    
1188     errptr -= (int)strlen(prefix[process_options]);
1189     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1190    
1191     if (filename == NULL)
1192     {
1193     if (count == 0)
1194     fprintf(stderr, "pcregrep: Error in command-line regex "
1195     "at offset %d: %s\n", errptr, error);
1196     else
1197     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1198     "at offset %d: %s\n", ordin(count), errptr, error);
1199     }
1200     else
1201     {
1202     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1203     "at offset %d: %s\n", count, filename, errptr, error);
1204     }
1205    
1206     return FALSE;
1207     }
1208    
1209    
1210    
1211     /*************************************************
1212     * Compile one supplied pattern *
1213     *************************************************/
1214    
1215     /* When the -F option has been used, each string may be a list of strings,
1216 nigel 91 separated by line breaks. They will be matched literally.
1217 nigel 87
1218     Arguments:
1219     pattern the pattern string
1220     options the PCRE options
1221     filename the file name, or NULL for a command-line pattern
1222     count 0 if this is the only command line pattern, or
1223     number of the command line pattern, or
1224     linenumber for a pattern from a file
1225    
1226     Returns: TRUE on success, FALSE after an error
1227     */
1228    
1229     static BOOL
1230     compile_pattern(char *pattern, int options, char *filename, int count)
1231     {
1232     if ((process_options & PO_FIXED_STRINGS) != 0)
1233     {
1234     char buffer[MBUFTHIRD];
1235     for(;;)
1236     {
1237 nigel 91 char *p = strchr(pattern, endlinebyte);
1238 nigel 87 if (p == NULL)
1239     return compile_single_pattern(pattern, options, filename, count);
1240 nigel 91 sprintf(buffer, "%.*s", p - pattern - endlineextra, pattern);
1241 nigel 87 pattern = p + 1;
1242     if (!compile_single_pattern(buffer, options, filename, count))
1243     return FALSE;
1244     }
1245     }
1246     else return compile_single_pattern(pattern, options, filename, count);
1247     }
1248    
1249    
1250    
1251     /*************************************************
1252 nigel 49 * Main program *
1253     *************************************************/
1254    
1255 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1256    
1257 nigel 49 int
1258     main(int argc, char **argv)
1259     {
1260 nigel 53 int i, j;
1261 nigel 49 int rc = 1;
1262 nigel 87 int pcre_options = 0;
1263     int cmd_pattern_count = 0;
1264 nigel 49 int errptr;
1265 nigel 87 BOOL only_one_at_top;
1266     char *patterns[MAX_PATTERN_COUNT];
1267     const char *locale_from = "--locale";
1268 nigel 49 const char *error;
1269    
1270 nigel 91 /* Set the default line ending value from the default in the PCRE library. */
1271    
1272     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1273     switch(i)
1274     {
1275     default: newline = (char *)"lf"; break;
1276     case '\r': newline = (char *)"cr"; break;
1277     case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1278     }
1279    
1280 nigel 49 /* Process the options */
1281    
1282     for (i = 1; i < argc; i++)
1283     {
1284 nigel 77 option_item *op = NULL;
1285     char *option_data = (char *)""; /* default to keep compiler happy */
1286     BOOL longop;
1287     BOOL longopwasequals = FALSE;
1288    
1289 nigel 49 if (argv[i][0] != '-') break;
1290 nigel 53
1291 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1292 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1293 nigel 63
1294 nigel 77 if (argv[i][1] == 0)
1295     {
1296 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1297 nigel 77 else exit(usage(2));
1298     }
1299 nigel 63
1300 nigel 77 /* Handle a long name option, or -- to terminate the options */
1301 nigel 53
1302     if (argv[i][1] == '-')
1303 nigel 49 {
1304 nigel 77 char *arg = argv[i] + 2;
1305     char *argequals = strchr(arg, '=');
1306 nigel 53
1307 nigel 77 if (*arg == 0) /* -- terminates options */
1308 nigel 49 {
1309 nigel 77 i++;
1310     break; /* out of the options-handling loop */
1311 nigel 53 }
1312 nigel 49
1313 nigel 77 longop = TRUE;
1314    
1315     /* Some long options have data that follows after =, for example file=name.
1316     Some options have variations in the long name spelling: specifically, we
1317     allow "regexp" because GNU grep allows it, though I personally go along
1318 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1319     These options are entered in the table as "regex(p)". No option is in both
1320     these categories, fortunately. */
1321 nigel 77
1322 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1323     {
1324 nigel 77 char *opbra = strchr(op->long_name, '(');
1325     char *equals = strchr(op->long_name, '=');
1326     if (opbra == NULL) /* Not a (p) case */
1327 nigel 53 {
1328 nigel 77 if (equals == NULL) /* Not thing=data case */
1329     {
1330     if (strcmp(arg, op->long_name) == 0) break;
1331     }
1332     else /* Special case xxx=data */
1333     {
1334     int oplen = equals - op->long_name;
1335     int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1336     if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1337     {
1338     option_data = arg + arglen;
1339     if (*option_data == '=')
1340     {
1341     option_data++;
1342     longopwasequals = TRUE;
1343     }
1344     break;
1345     }
1346     }
1347 nigel 53 }
1348 nigel 77 else /* Special case xxxx(p) */
1349     {
1350     char buff1[24];
1351     char buff2[24];
1352     int baselen = opbra - op->long_name;
1353     sprintf(buff1, "%.*s", baselen, op->long_name);
1354     sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1355     opbra + 1);
1356     if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1357     break;
1358     }
1359 nigel 53 }
1360 nigel 77
1361 nigel 53 if (op->one_char == 0)
1362     {
1363     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1364     exit(usage(2));
1365     }
1366     }
1367 nigel 49
1368 nigel 89
1369     /* Jeffrey Friedl's debugging harness uses these additional options which
1370     are not in the right form for putting in the option table because they use
1371     only one hyphen, yet are more than one character long. By putting them
1372     separately here, they will not get displayed as part of the help() output,
1373     but I don't think Jeffrey will care about that. */
1374    
1375     #ifdef JFRIEDL_DEBUG
1376     else if (strcmp(argv[i], "-pre") == 0) {
1377     jfriedl_prefix = argv[++i];
1378     continue;
1379     } else if (strcmp(argv[i], "-post") == 0) {
1380     jfriedl_postfix = argv[++i];
1381     continue;
1382     } else if (strcmp(argv[i], "-XT") == 0) {
1383     sscanf(argv[++i], "%d", &jfriedl_XT);
1384     continue;
1385     } else if (strcmp(argv[i], "-XR") == 0) {
1386     sscanf(argv[++i], "%d", &jfriedl_XR);
1387     continue;
1388     }
1389     #endif
1390    
1391    
1392 nigel 77 /* One-char options; many that have no data may be in a single argument; we
1393     continue till we hit the last one or one that needs data. */
1394 nigel 53
1395     else
1396     {
1397     char *s = argv[i] + 1;
1398 nigel 77 longop = FALSE;
1399 nigel 53 while (*s != 0)
1400     {
1401 nigel 77 for (op = optionlist; op->one_char != 0; op++)
1402     { if (*s == op->one_char) break; }
1403     if (op->one_char == 0)
1404 nigel 53 {
1405 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1406     *s, argv[i]);
1407     exit(usage(2));
1408     }
1409     if (op->type != OP_NODATA || s[1] == 0)
1410     {
1411     option_data = s+1;
1412 nigel 53 break;
1413     }
1414 nigel 87 pcre_options = handle_option(*s++, pcre_options);
1415 nigel 49 }
1416     }
1417 nigel 77
1418 nigel 87 /* At this point we should have op pointing to a matched option. If the type
1419     is NO_DATA, it means that there is no data, and the option might set
1420     something in the PCRE options. */
1421 nigel 77
1422     if (op->type == OP_NODATA)
1423     {
1424 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
1425     continue;
1426     }
1427    
1428     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1429     either has a value or defaults to something. It cannot have data in a
1430     separate item. At the moment, the only such options are "colo(u)r" and
1431 nigel 89 Jeffrey Friedl's special -S debugging option. */
1432 nigel 87
1433     if (*option_data == 0 &&
1434     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1435     {
1436     switch (op->one_char)
1437 nigel 77 {
1438 nigel 87 case N_COLOUR:
1439     colour_option = (char *)"auto";
1440     break;
1441     #ifdef JFRIEDL_DEBUG
1442     case 'S':
1443     S_arg = 0;
1444     break;
1445     #endif
1446 nigel 77 }
1447 nigel 87 continue;
1448     }
1449 nigel 77
1450 nigel 87 /* Otherwise, find the data string for the option. */
1451    
1452     if (*option_data == 0)
1453     {
1454     if (i >= argc - 1 || longopwasequals)
1455 nigel 77 {
1456 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1457     exit(usage(2));
1458     }
1459     option_data = argv[++i];
1460     }
1461    
1462     /* If the option type is OP_PATLIST, it's the -e option, which can be called
1463     multiple times to create a list of patterns. */
1464    
1465     if (op->type == OP_PATLIST)
1466     {
1467     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1468     {
1469     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1470     MAX_PATTERN_COUNT);
1471     return 2;
1472     }
1473     patterns[cmd_pattern_count++] = option_data;
1474     }
1475    
1476     /* Otherwise, deal with single string or numeric data values. */
1477    
1478     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1479     {
1480     *((char **)op->dataptr) = option_data;
1481     }
1482     else
1483     {
1484     char *endptr;
1485     int n = strtoul(option_data, &endptr, 10);
1486     if (*endptr != 0)
1487     {
1488     if (longop)
1489 nigel 77 {
1490 nigel 87 char *equals = strchr(op->long_name, '=');
1491     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1492     equals - op->long_name;
1493     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1494     option_data, nlen, op->long_name);
1495 nigel 77 }
1496 nigel 87 else
1497     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1498     option_data, op->one_char);
1499     exit(usage(2));
1500 nigel 77 }
1501 nigel 87 *((int *)op->dataptr) = n;
1502 nigel 77 }
1503 nigel 49 }
1504    
1505 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
1506     for -A and -B. */
1507    
1508     if (both_context > 0)
1509     {
1510     if (after_context == 0) after_context = both_context;
1511     if (before_context == 0) before_context = both_context;
1512     }
1513    
1514 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1515     LC_ALL environment variable is set, and if so, use it. */
1516 nigel 49
1517 nigel 87 if (locale == NULL)
1518 nigel 53 {
1519 nigel 87 locale = getenv("LC_ALL");
1520     locale_from = "LCC_ALL";
1521 nigel 53 }
1522 nigel 49
1523 nigel 87 if (locale == NULL)
1524     {
1525     locale = getenv("LC_CTYPE");
1526     locale_from = "LC_CTYPE";
1527     }
1528 nigel 49
1529 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
1530     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1531    
1532     if (locale != NULL)
1533 nigel 49 {
1534 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
1535 nigel 53 {
1536 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1537     locale, locale_from);
1538 nigel 53 return 2;
1539     }
1540 nigel 87 pcretables = pcre_maketables();
1541     }
1542 nigel 77
1543 nigel 87 /* Sort out colouring */
1544    
1545     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1546     {
1547     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1548     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1549     else
1550 nigel 53 {
1551 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1552     colour_option);
1553     return 2;
1554 nigel 77 }
1555 nigel 87 if (do_colour)
1556 nigel 77 {
1557 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
1558     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1559     if (cs != NULL) colour_string = cs;
1560 nigel 77 }
1561 nigel 87 }
1562 nigel 77
1563 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
1564    
1565     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1566     {
1567     pcre_options |= PCRE_NEWLINE_CR;
1568     endlinebyte = '\r';
1569     }
1570     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1571     {
1572     pcre_options |= PCRE_NEWLINE_LF;
1573     }
1574     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1575     {
1576     pcre_options |= PCRE_NEWLINE_CRLF;
1577     endlineextra = 1;
1578     }
1579     else
1580     {
1581     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1582     return 2;
1583     }
1584    
1585 nigel 87 /* Interpret the text values for -d and -D */
1586    
1587     if (dee_option != NULL)
1588     {
1589     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1590     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1591     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1592     else
1593 nigel 77 {
1594 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1595     return 2;
1596 nigel 53 }
1597 nigel 49 }
1598    
1599 nigel 87 if (DEE_option != NULL)
1600     {
1601     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1602     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1603     else
1604     {
1605     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1606     return 2;
1607     }
1608     }
1609 nigel 49
1610 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
1611 nigel 87
1612     #ifdef JFRIEDL_DEBUG
1613     if (S_arg > 9)
1614 nigel 49 {
1615 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
1616     return 2;
1617     }
1618 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1619     {
1620     if (jfriedl_XT == 0) jfriedl_XT = 1;
1621     if (jfriedl_XR == 0) jfriedl_XR = 1;
1622     }
1623 nigel 87 #endif
1624 nigel 77
1625 nigel 87 /* Get memory to store the pattern and hints lists. */
1626    
1627     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1628     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1629    
1630     if (pattern_list == NULL || hints_list == NULL)
1631     {
1632     fprintf(stderr, "pcregrep: malloc failed\n");
1633     return 2;
1634     }
1635    
1636     /* If no patterns were provided by -e, and there is no file provided by -f,
1637     the first argument is the one and only pattern, and it must exist. */
1638    
1639     if (cmd_pattern_count == 0 && pattern_filename == NULL)
1640     {
1641 nigel 63 if (i >= argc) return usage(2);
1642 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
1643     }
1644 nigel 77
1645 nigel 87 /* Compile the patterns that were provided on the command line, either by
1646     multiple uses of -e or as a single unkeyed pattern. */
1647    
1648     for (j = 0; j < cmd_pattern_count; j++)
1649     {
1650     if (!compile_pattern(patterns[j], pcre_options, NULL,
1651     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1652     return 2;
1653     }
1654    
1655     /* Compile the regular expressions that are provided in a file. */
1656    
1657     if (pattern_filename != NULL)
1658     {
1659     int linenumber = 0;
1660     FILE *f;
1661     char *filename;
1662     char buffer[MBUFTHIRD];
1663    
1664     if (strcmp(pattern_filename, "-") == 0)
1665 nigel 77 {
1666 nigel 87 f = stdin;
1667     filename = stdin_name;
1668 nigel 77 }
1669 nigel 87 else
1670 nigel 77 {
1671 nigel 87 f = fopen(pattern_filename, "r");
1672     if (f == NULL)
1673     {
1674     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1675     strerror(errno));
1676     return 2;
1677     }
1678     filename = pattern_filename;
1679 nigel 77 }
1680    
1681 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
1682 nigel 53 {
1683 nigel 87 char *s = buffer + (int)strlen(buffer);
1684     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1685     *s = 0;
1686     linenumber++;
1687     if (buffer[0] == 0) continue; /* Skip blank lines */
1688     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1689     return 2;
1690 nigel 53 }
1691 nigel 87
1692     if (f != stdin) fclose(f);
1693 nigel 49 }
1694    
1695 nigel 77 /* Study the regular expressions, as we will be running them many times */
1696 nigel 53
1697     for (j = 0; j < pattern_count; j++)
1698     {
1699     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1700     if (error != NULL)
1701     {
1702     char s[16];
1703     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1704     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1705     return 2;
1706     }
1707     }
1708    
1709 nigel 77 /* If there are include or exclude patterns, compile them. */
1710    
1711     if (exclude_pattern != NULL)
1712     {
1713 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1714     pcretables);
1715 nigel 77 if (exclude_compiled == NULL)
1716     {
1717     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1718     errptr, error);
1719     return 2;
1720     }
1721     }
1722    
1723     if (include_pattern != NULL)
1724     {
1725 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1726     pcretables);
1727 nigel 77 if (include_compiled == NULL)
1728     {
1729     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1730     errptr, error);
1731     return 2;
1732     }
1733     }
1734    
1735 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
1736 nigel 49
1737 nigel 87 if (i >= argc)
1738     return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1739 nigel 49
1740 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
1741     Pass in the fact that there is only one argument at top level - this suppresses
1742 nigel 87 the file name if the argument is not a directory and filenames are not
1743     otherwise forced. */
1744 nigel 49
1745 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
1746 nigel 49
1747     for (; i < argc; i++)
1748     {
1749 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1750     only_one_at_top);
1751 nigel 77 if (frc > 1) rc = frc;
1752     else if (frc == 0 && rc == 1) rc = 0;
1753 nigel 49 }
1754    
1755     return rc;
1756     }
1757    
1758 nigel 77 /* End of pcregrep */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12