/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 280 - (hide annotations) (download)
Wed Dec 5 20:56:03 2007 UTC (6 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 61563 byte(s)
Add --line-offsets and --file-offsets to pcregrep.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 117 Copyright (c) 1997-2007 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 236 #include "pcre.h"
59 nigel 49
60     #define FALSE 0
61     #define TRUE 1
62    
63     typedef int BOOL;
64    
65 nigel 53 #define MAX_PATTERN_COUNT 100
66 nigel 49
67 nigel 77 #if BUFSIZ > 8192
68     #define MBUFTHIRD BUFSIZ
69     #else
70     #define MBUFTHIRD 8192
71     #endif
72 nigel 49
73 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
74     output. The order is important; it is assumed that a file name is wanted for
75     all values greater than FN_DEFAULT. */
76 nigel 77
77 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78    
79     /* Actions for the -d and -D options */
80    
81     enum { dee_READ, dee_SKIP, dee_RECURSE };
82     enum { DEE_READ, DEE_SKIP };
83    
84     /* Actions for special processing options (flag bits) */
85    
86     #define PO_WORD_MATCH 0x0001
87     #define PO_LINE_MATCH 0x0002
88     #define PO_FIXED_STRINGS 0x0004
89    
90 nigel 93 /* Line ending types */
91 nigel 87
92 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93 nigel 87
94 nigel 93
95    
96 nigel 49 /*************************************************
97     * Global variables *
98     *************************************************/
99    
100 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
101     regular code. */
102    
103     #ifdef JFRIEDL_DEBUG
104     static int S_arg = -1;
105 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107     static const char *jfriedl_prefix = "";
108     static const char *jfriedl_postfix = "";
109 nigel 87 #endif
110    
111 nigel 93 static int endlinetype;
112 nigel 91
113 nigel 87 static char *colour_string = (char *)"1;31";
114     static char *colour_option = NULL;
115     static char *dee_option = NULL;
116     static char *DEE_option = NULL;
117 nigel 91 static char *newline = NULL;
118 nigel 53 static char *pattern_filename = NULL;
119 nigel 77 static char *stdin_name = (char *)"(standard input)";
120 nigel 87 static char *locale = NULL;
121    
122     static const unsigned char *pcretables = NULL;
123    
124 nigel 53 static int pattern_count = 0;
125 ph10 121 static pcre **pattern_list = NULL;
126     static pcre_extra **hints_list = NULL;
127 nigel 49
128 nigel 77 static char *include_pattern = NULL;
129     static char *exclude_pattern = NULL;
130    
131     static pcre *include_compiled = NULL;
132     static pcre *exclude_compiled = NULL;
133    
134     static int after_context = 0;
135     static int before_context = 0;
136     static int both_context = 0;
137 nigel 87 static int dee_action = dee_READ;
138     static int DEE_action = DEE_READ;
139     static int error_count = 0;
140     static int filenames = FN_DEFAULT;
141     static int process_options = 0;
142 nigel 77
143 nigel 49 static BOOL count_only = FALSE;
144 nigel 87 static BOOL do_colour = FALSE;
145 ph10 280 static BOOL file_offsets = FALSE;
146 nigel 77 static BOOL hyphenpending = FALSE;
147 nigel 49 static BOOL invert = FALSE;
148 ph10 280 static BOOL line_offsets = FALSE;
149 nigel 77 static BOOL multiline = FALSE;
150 nigel 49 static BOOL number = FALSE;
151 nigel 87 static BOOL only_matching = FALSE;
152 nigel 77 static BOOL quiet = FALSE;
153 nigel 49 static BOOL silent = FALSE;
154 nigel 93 static BOOL utf8 = FALSE;
155 nigel 49
156 nigel 53 /* Structure for options and list of them */
157 nigel 49
158 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
159     OP_PATLIST };
160 nigel 77
161 nigel 53 typedef struct option_item {
162 nigel 77 int type;
163 nigel 53 int one_char;
164 nigel 77 void *dataptr;
165 nigel 67 const char *long_name;
166     const char *help_text;
167 nigel 53 } option_item;
168 nigel 49
169 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
170     used to identify them. */
171    
172     #define N_COLOUR (-1)
173     #define N_EXCLUDE (-2)
174     #define N_HELP (-3)
175     #define N_INCLUDE (-4)
176     #define N_LABEL (-5)
177     #define N_LOCALE (-6)
178     #define N_NULL (-7)
179 ph10 280 #define N_LOFFSETS (-8)
180     #define N_FOFFSETS (-9)
181 nigel 87
182 nigel 53 static option_item optionlist[] = {
183 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
184     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
185     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
186     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
187     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
188     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
189     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
190     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
191     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
192     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
193     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
194     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
195     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
196 ph10 280 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
197 nigel 87 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
198     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
199     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
200     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
201     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
202     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
203 ph10 280 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
204 nigel 87 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
205     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
206 ph10 280 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
207 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
208     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
209     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
210     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
211     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
212     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
213     #ifdef JFRIEDL_DEBUG
214     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
215     #endif
216     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
217     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
218     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
219     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
220     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
221     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
222     { OP_NODATA, 0, NULL, NULL, NULL }
223 nigel 53 };
224    
225 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
226     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
227     that the combination of -w and -x has the same effect as -x on its own, so we
228     can treat them as the same. */
229 nigel 53
230 nigel 87 static const char *prefix[] = {
231     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
232    
233     static const char *suffix[] = {
234     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
235    
236 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
237 nigel 87
238 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
239 nigel 87
240 nigel 93 const char utf8_table4[] = {
241     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
242     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
243     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
244     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
245    
246    
247    
248 nigel 53 /*************************************************
249 nigel 87 * OS-specific functions *
250 nigel 53 *************************************************/
251    
252     /* These functions are defined so that they can be made system specific,
253 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
254 nigel 53
255    
256     /************* Directory scanning in Unix ***********/
257    
258 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
259 nigel 53 #include <sys/types.h>
260     #include <sys/stat.h>
261     #include <dirent.h>
262    
263     typedef DIR directory_type;
264    
265 nigel 67 static int
266 nigel 53 isdirectory(char *filename)
267     {
268     struct stat statbuf;
269     if (stat(filename, &statbuf) < 0)
270     return 0; /* In the expectation that opening as a file will fail */
271     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
272     }
273    
274 nigel 67 static directory_type *
275 nigel 53 opendirectory(char *filename)
276     {
277     return opendir(filename);
278     }
279    
280 nigel 67 static char *
281 nigel 53 readdirectory(directory_type *dir)
282     {
283     for (;;)
284     {
285     struct dirent *dent = readdir(dir);
286     if (dent == NULL) return NULL;
287     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
288     return dent->d_name;
289     }
290 ph10 151 /* Control never reaches here */
291 nigel 53 }
292    
293 nigel 67 static void
294 nigel 53 closedirectory(directory_type *dir)
295     {
296     closedir(dir);
297     }
298    
299    
300 nigel 87 /************* Test for regular file in Unix **********/
301    
302     static int
303     isregfile(char *filename)
304     {
305     struct stat statbuf;
306     if (stat(filename, &statbuf) < 0)
307     return 1; /* In the expectation that opening as a file will fail */
308     return (statbuf.st_mode & S_IFMT) == S_IFREG;
309     }
310    
311    
312     /************* Test stdout for being a terminal in Unix **********/
313    
314     static BOOL
315     is_stdout_tty(void)
316     {
317     return isatty(fileno(stdout));
318     }
319    
320    
321 nigel 63 /************* Directory scanning in Win32 ***********/
322 nigel 53
323 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
324 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
325     when it did not exist. */
326 nigel 53
327 nigel 63
328 ph10 97 #elif HAVE_WINDOWS_H
329 nigel 63
330     #ifndef STRICT
331     # define STRICT
332     #endif
333     #ifndef WIN32_LEAN_AND_MEAN
334     # define WIN32_LEAN_AND_MEAN
335     #endif
336 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
337     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
338     #endif
339    
340 nigel 63 #include <windows.h>
341    
342     typedef struct directory_type
343     {
344     HANDLE handle;
345     BOOL first;
346     WIN32_FIND_DATA data;
347     } directory_type;
348    
349     int
350     isdirectory(char *filename)
351     {
352     DWORD attr = GetFileAttributes(filename);
353     if (attr == INVALID_FILE_ATTRIBUTES)
354     return 0;
355     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
356     }
357    
358     directory_type *
359     opendirectory(char *filename)
360     {
361     size_t len;
362     char *pattern;
363     directory_type *dir;
364     DWORD err;
365     len = strlen(filename);
366     pattern = (char *) malloc(len + 3);
367     dir = (directory_type *) malloc(sizeof(*dir));
368     if ((pattern == NULL) || (dir == NULL))
369     {
370     fprintf(stderr, "pcregrep: malloc failed\n");
371     exit(2);
372     }
373     memcpy(pattern, filename, len);
374     memcpy(&(pattern[len]), "\\*", 3);
375     dir->handle = FindFirstFile(pattern, &(dir->data));
376     if (dir->handle != INVALID_HANDLE_VALUE)
377     {
378     free(pattern);
379     dir->first = TRUE;
380     return dir;
381     }
382     err = GetLastError();
383     free(pattern);
384     free(dir);
385     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
386     return NULL;
387     }
388    
389     char *
390     readdirectory(directory_type *dir)
391     {
392     for (;;)
393     {
394     if (!dir->first)
395     {
396     if (!FindNextFile(dir->handle, &(dir->data)))
397     return NULL;
398     }
399     else
400     {
401     dir->first = FALSE;
402     }
403     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
404     return dir->data.cFileName;
405     }
406     #ifndef _MSC_VER
407     return NULL; /* Keep compiler happy; never executed */
408     #endif
409     }
410    
411     void
412     closedirectory(directory_type *dir)
413     {
414     FindClose(dir->handle);
415     free(dir);
416     }
417    
418    
419 nigel 87 /************* Test for regular file in Win32 **********/
420    
421     /* I don't know how to do this, or if it can be done; assume all paths are
422     regular if they are not directories. */
423    
424     int isregfile(char *filename)
425     {
426     return !isdirectory(filename)
427     }
428    
429    
430     /************* Test stdout for being a terminal in Win32 **********/
431    
432     /* I don't know how to do this; assume never */
433    
434     static BOOL
435     is_stdout_tty(void)
436     {
437     FALSE;
438     }
439    
440    
441 nigel 53 /************* Directory scanning when we can't do it ***********/
442    
443     /* The type is void, and apart from isdirectory(), the functions do nothing. */
444    
445 nigel 63 #else
446    
447 nigel 53 typedef void directory_type;
448    
449 nigel 87 int isdirectory(char *filename) { return 0; }
450 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
451     char *readdirectory(directory_type *dir) { return (char*)0;}
452 nigel 53 void closedirectory(directory_type *dir) {}
453    
454 nigel 87
455     /************* Test for regular when we can't do it **********/
456    
457     /* Assume all files are regular. */
458    
459     int isregfile(char *filename) { return 1; }
460    
461    
462     /************* Test stdout for being a terminal when we can't do it **********/
463    
464     static BOOL
465     is_stdout_tty(void)
466     {
467     return FALSE;
468     }
469    
470    
471 nigel 53 #endif
472    
473    
474    
475 ph10 137 #ifndef HAVE_STRERROR
476 nigel 49 /*************************************************
477     * Provide strerror() for non-ANSI libraries *
478     *************************************************/
479    
480     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
481     in their libraries, but can provide the same facility by this simple
482     alternative function. */
483    
484     extern int sys_nerr;
485     extern char *sys_errlist[];
486    
487     char *
488     strerror(int n)
489     {
490     if (n < 0 || n >= sys_nerr) return "unknown error number";
491     return sys_errlist[n];
492     }
493     #endif /* HAVE_STRERROR */
494    
495    
496    
497     /*************************************************
498 nigel 93 * Find end of line *
499     *************************************************/
500    
501     /* The length of the endline sequence that is found is set via lenptr. This may
502     be zero at the very end of the file if there is no line-ending sequence there.
503    
504     Arguments:
505     p current position in line
506     endptr end of available data
507     lenptr where to put the length of the eol sequence
508    
509     Returns: pointer to the last byte of the line
510     */
511    
512     static char *
513     end_of_line(char *p, char *endptr, int *lenptr)
514     {
515     switch(endlinetype)
516     {
517     default: /* Just in case */
518     case EL_LF:
519     while (p < endptr && *p != '\n') p++;
520     if (p < endptr)
521     {
522     *lenptr = 1;
523     return p + 1;
524     }
525     *lenptr = 0;
526     return endptr;
527    
528     case EL_CR:
529     while (p < endptr && *p != '\r') p++;
530     if (p < endptr)
531     {
532     *lenptr = 1;
533     return p + 1;
534     }
535     *lenptr = 0;
536     return endptr;
537    
538     case EL_CRLF:
539     for (;;)
540     {
541     while (p < endptr && *p != '\r') p++;
542     if (++p >= endptr)
543     {
544     *lenptr = 0;
545     return endptr;
546     }
547     if (*p == '\n')
548     {
549     *lenptr = 2;
550     return p + 1;
551     }
552     }
553     break;
554    
555 ph10 149 case EL_ANYCRLF:
556     while (p < endptr)
557     {
558     int extra = 0;
559     register int c = *((unsigned char *)p);
560    
561     if (utf8 && c >= 0xc0)
562     {
563     int gcii, gcss;
564     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
565     gcss = 6*extra;
566     c = (c & utf8_table3[extra]) << gcss;
567     for (gcii = 1; gcii <= extra; gcii++)
568     {
569     gcss -= 6;
570     c |= (p[gcii] & 0x3f) << gcss;
571     }
572     }
573    
574     p += 1 + extra;
575    
576     switch (c)
577     {
578     case 0x0a: /* LF */
579     *lenptr = 1;
580     return p;
581    
582     case 0x0d: /* CR */
583     if (p < endptr && *p == 0x0a)
584     {
585     *lenptr = 2;
586     p++;
587     }
588     else *lenptr = 1;
589     return p;
590 ph10 150
591 ph10 149 default:
592     break;
593     }
594     } /* End of loop for ANYCRLF case */
595 ph10 150
596 ph10 149 *lenptr = 0; /* Must have hit the end */
597     return endptr;
598    
599 nigel 93 case EL_ANY:
600     while (p < endptr)
601     {
602     int extra = 0;
603     register int c = *((unsigned char *)p);
604    
605     if (utf8 && c >= 0xc0)
606     {
607     int gcii, gcss;
608     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
609     gcss = 6*extra;
610     c = (c & utf8_table3[extra]) << gcss;
611     for (gcii = 1; gcii <= extra; gcii++)
612     {
613     gcss -= 6;
614     c |= (p[gcii] & 0x3f) << gcss;
615     }
616     }
617    
618     p += 1 + extra;
619    
620     switch (c)
621     {
622     case 0x0a: /* LF */
623     case 0x0b: /* VT */
624     case 0x0c: /* FF */
625     *lenptr = 1;
626     return p;
627    
628     case 0x0d: /* CR */
629     if (p < endptr && *p == 0x0a)
630     {
631     *lenptr = 2;
632     p++;
633     }
634     else *lenptr = 1;
635     return p;
636    
637     case 0x85: /* NEL */
638     *lenptr = utf8? 2 : 1;
639     return p;
640    
641     case 0x2028: /* LS */
642     case 0x2029: /* PS */
643     *lenptr = 3;
644     return p;
645    
646     default:
647     break;
648     }
649     } /* End of loop for ANY case */
650    
651     *lenptr = 0; /* Must have hit the end */
652     return endptr;
653     } /* End of overall switch */
654     }
655    
656    
657    
658     /*************************************************
659     * Find start of previous line *
660     *************************************************/
661    
662     /* This is called when looking back for before lines to print.
663    
664     Arguments:
665     p start of the subsequent line
666     startptr start of available data
667    
668     Returns: pointer to the start of the previous line
669     */
670    
671     static char *
672     previous_line(char *p, char *startptr)
673     {
674     switch(endlinetype)
675     {
676     default: /* Just in case */
677     case EL_LF:
678     p--;
679     while (p > startptr && p[-1] != '\n') p--;
680     return p;
681    
682     case EL_CR:
683     p--;
684     while (p > startptr && p[-1] != '\n') p--;
685     return p;
686    
687     case EL_CRLF:
688     for (;;)
689     {
690     p -= 2;
691     while (p > startptr && p[-1] != '\n') p--;
692     if (p <= startptr + 1 || p[-2] == '\r') return p;
693     }
694     return p; /* But control should never get here */
695    
696     case EL_ANY:
697 ph10 150 case EL_ANYCRLF:
698 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
699     if (utf8) while ((*p & 0xc0) == 0x80) p--;
700    
701     while (p > startptr)
702     {
703     register int c;
704     char *pp = p - 1;
705    
706     if (utf8)
707     {
708     int extra = 0;
709     while ((*pp & 0xc0) == 0x80) pp--;
710     c = *((unsigned char *)pp);
711     if (c >= 0xc0)
712     {
713     int gcii, gcss;
714     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
715     gcss = 6*extra;
716     c = (c & utf8_table3[extra]) << gcss;
717     for (gcii = 1; gcii <= extra; gcii++)
718     {
719     gcss -= 6;
720     c |= (pp[gcii] & 0x3f) << gcss;
721     }
722     }
723     }
724     else c = *((unsigned char *)pp);
725    
726 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
727 nigel 93 {
728     case 0x0a: /* LF */
729 ph10 149 case 0x0d: /* CR */
730     return p;
731 ph10 150
732 ph10 149 default:
733     break;
734 ph10 150 }
735 ph10 149
736     else switch (c)
737     {
738     case 0x0a: /* LF */
739 nigel 93 case 0x0b: /* VT */
740     case 0x0c: /* FF */
741     case 0x0d: /* CR */
742     case 0x85: /* NEL */
743     case 0x2028: /* LS */
744     case 0x2029: /* PS */
745     return p;
746    
747     default:
748     break;
749     }
750    
751     p = pp; /* Back one character */
752     } /* End of loop for ANY case */
753    
754     return startptr; /* Hit start of data */
755     } /* End of overall switch */
756     }
757    
758    
759    
760    
761    
762     /*************************************************
763 nigel 77 * Print the previous "after" lines *
764 nigel 49 *************************************************/
765    
766 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
767 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
768     that a binary zero does not terminate it.
769 nigel 77
770     Arguments:
771     lastmatchnumber the number of the last matching line, plus one
772     lastmatchrestart where we restarted after the last match
773     endptr end of available data
774     printname filename for printing
775    
776     Returns: nothing
777     */
778    
779     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
780     char *endptr, char *printname)
781     {
782     if (after_context > 0 && lastmatchnumber > 0)
783     {
784     int count = 0;
785     while (lastmatchrestart < endptr && count++ < after_context)
786     {
787 nigel 93 int ellength;
788 nigel 77 char *pp = lastmatchrestart;
789     if (printname != NULL) fprintf(stdout, "%s-", printname);
790     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
791 nigel 93 pp = end_of_line(pp, endptr, &ellength);
792     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
793     lastmatchrestart = pp;
794 nigel 77 }
795     hyphenpending = TRUE;
796     }
797     }
798    
799    
800    
801     /*************************************************
802     * Grep an individual file *
803     *************************************************/
804    
805     /* This is called from grep_or_recurse() below. It uses a buffer that is three
806     times the value of MBUFTHIRD. The matching point is never allowed to stray into
807     the top third of the buffer, thus keeping more of the file available for
808     context printing or for multiline scanning. For large files, the pointer will
809     be in the middle third most of the time, so the bottom third is available for
810     "before" context printing.
811    
812     Arguments:
813     in the fopened FILE stream
814     printname the file name if it is to be printed for each match
815     or NULL if the file name is not to be printed
816     it cannot be NULL if filenames[_nomatch]_only is set
817    
818     Returns: 0 if there was at least one match
819     1 otherwise (no matches)
820     */
821    
822 nigel 49 static int
823 nigel 77 pcregrep(FILE *in, char *printname)
824 nigel 49 {
825     int rc = 1;
826 nigel 77 int linenumber = 1;
827     int lastmatchnumber = 0;
828 nigel 49 int count = 0;
829 ph10 280 int filepos = 0;
830 nigel 49 int offsets[99];
831 nigel 77 char *lastmatchrestart = NULL;
832     char buffer[3*MBUFTHIRD];
833     char *ptr = buffer;
834     char *endptr;
835     size_t bufflength;
836     BOOL endhyphenpending = FALSE;
837 nigel 49
838 nigel 77 /* Do the first read into the start of the buffer and set up the pointer to
839     end of what we have. */
840    
841     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
842     endptr = buffer + bufflength;
843    
844     /* Loop while the current pointer is not at the end of the file. For large
845     files, endptr will be at the end of the buffer when we are in the middle of the
846     file, but ptr will never get there, because as soon as it gets over 2/3 of the
847     way, the buffer is shifted left and re-filled. */
848    
849     while (ptr < endptr)
850 nigel 49 {
851 nigel 93 int i, endlinelength;
852 nigel 87 int mrc = 0;
853 nigel 53 BOOL match = FALSE;
854 ph10 279 char *matchptr = ptr;
855 nigel 77 char *t = ptr;
856     size_t length, linelength;
857 nigel 49
858 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
859     of the subject string to pass to pcre_exec(). In multiline mode, it is the
860     length remainder of the data in the buffer. Otherwise, it is the length of
861     the next line. After matching, we always advance by the length of the next
862     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
863     that any match is constrained to be in the first line. */
864    
865 nigel 93 t = end_of_line(t, endptr, &endlinelength);
866     linelength = t - ptr - endlinelength;
867 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
868 nigel 77
869 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
870    
871     #ifdef JFRIEDL_DEBUG
872     if (jfriedl_XT || jfriedl_XR)
873     {
874     #include <sys/time.h>
875     #include <time.h>
876     struct timeval start_time, end_time;
877     struct timezone dummy;
878    
879     if (jfriedl_XT)
880     {
881     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
882     const char *orig = ptr;
883     ptr = malloc(newlen + 1);
884     if (!ptr) {
885     printf("out of memory");
886     exit(2);
887     }
888     endptr = ptr;
889     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
890     for (i = 0; i < jfriedl_XT; i++) {
891     strncpy(endptr, orig, length);
892     endptr += length;
893     }
894     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
895     length = newlen;
896     }
897    
898     if (gettimeofday(&start_time, &dummy) != 0)
899     perror("bad gettimeofday");
900    
901    
902     for (i = 0; i < jfriedl_XR; i++)
903     match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
904    
905     if (gettimeofday(&end_time, &dummy) != 0)
906     perror("bad gettimeofday");
907    
908     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
909     -
910     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
911    
912     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
913     return 0;
914     }
915     #endif
916    
917 ph10 279 /* We come back here after a match when the -o option (only_matching) is set,
918     in order to find any further matches in the same line. */
919    
920     ONLY_MATCHING_RESTART:
921 nigel 89
922 nigel 77 /* Run through all the patterns until one matches. Note that we don't include
923     the final newline in the subject string. */
924    
925 nigel 87 for (i = 0; i < pattern_count; i++)
926 nigel 53 {
927 ph10 279 mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
928 nigel 87 offsets, 99);
929     if (mrc >= 0) { match = TRUE; break; }
930     if (mrc != PCRE_ERROR_NOMATCH)
931     {
932     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
933     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
934     fprintf(stderr, "this line:\n");
935 ph10 279 fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
936 nigel 87 fprintf(stderr, "\n");
937     if (error_count == 0 &&
938     (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
939     {
940     fprintf(stderr, "pcregrep: error %d means that a resource limit "
941     "was exceeded\n", mrc);
942     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
943     }
944     if (error_count++ > 20)
945     {
946     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
947     exit(2);
948     }
949     match = invert; /* No more matching; don't show the line again */
950     break;
951     }
952 nigel 53 }
953 nigel 49
954 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
955 nigel 77
956 nigel 49 if (match != invert)
957     {
958 nigel 77 BOOL hyphenprinted = FALSE;
959    
960 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
961 nigel 77
962 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
963    
964     /* Just count if just counting is wanted. */
965    
966 nigel 49 if (count_only) count++;
967    
968 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
969     in the file. */
970    
971     else if (filenames == FN_ONLY)
972 nigel 49 {
973 nigel 77 fprintf(stdout, "%s\n", printname);
974 nigel 49 return 0;
975     }
976    
977 nigel 87 /* Likewise, if all we want is a yes/no answer. */
978    
979 nigel 77 else if (quiet) return 0;
980 nigel 49
981 nigel 87 /* The --only-matching option prints just the substring that matched, and
982 ph10 280 the --file-offsets and --line-offsets options output offsets for the
983     matching substring (they both force --only-matching). None of these options
984     prints any context. Afterwards, adjust the start and length, and then jump
985     back to look for further matches in the same line. If we are in invert
986     mode, however, nothing is printed - this could be still useful because the
987     return code is set. */
988 nigel 87
989     else if (only_matching)
990     {
991 ph10 279 if (!invert)
992     {
993     if (printname != NULL) fprintf(stdout, "%s:", printname);
994     if (number) fprintf(stdout, "%d:", linenumber);
995 ph10 280 if (line_offsets)
996     fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
997     offsets[1] - offsets[0]);
998     else if (file_offsets)
999     fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1000     offsets[1] - offsets[0]);
1001     else
1002     fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1003 ph10 279 fprintf(stdout, "\n");
1004     matchptr += offsets[1];
1005     length -= offsets[1];
1006     match = FALSE;
1007     goto ONLY_MATCHING_RESTART;
1008     }
1009 nigel 87 }
1010    
1011     /* This is the default case when none of the above options is set. We print
1012     the matching lines(s), possibly preceded and/or followed by other lines of
1013     context. */
1014    
1015 nigel 49 else
1016     {
1017 nigel 77 /* See if there is a requirement to print some "after" lines from a
1018     previous match. We never print any overlaps. */
1019    
1020     if (after_context > 0 && lastmatchnumber > 0)
1021     {
1022 nigel 93 int ellength;
1023 nigel 77 int linecount = 0;
1024     char *p = lastmatchrestart;
1025    
1026     while (p < ptr && linecount < after_context)
1027     {
1028 nigel 93 p = end_of_line(p, ptr, &ellength);
1029 nigel 77 linecount++;
1030     }
1031    
1032     /* It is important to advance lastmatchrestart during this printing so
1033 nigel 87 that it interacts correctly with any "before" printing below. Print
1034     each line's data using fwrite() in case there are binary zeroes. */
1035 nigel 77
1036     while (lastmatchrestart < p)
1037     {
1038     char *pp = lastmatchrestart;
1039     if (printname != NULL) fprintf(stdout, "%s-", printname);
1040     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1041 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1042     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1043     lastmatchrestart = pp;
1044 nigel 77 }
1045     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1046     }
1047    
1048     /* If there were non-contiguous lines printed above, insert hyphens. */
1049    
1050     if (hyphenpending)
1051     {
1052     fprintf(stdout, "--\n");
1053     hyphenpending = FALSE;
1054     hyphenprinted = TRUE;
1055     }
1056    
1057     /* See if there is a requirement to print some "before" lines for this
1058     match. Again, don't print overlaps. */
1059    
1060     if (before_context > 0)
1061     {
1062     int linecount = 0;
1063     char *p = ptr;
1064    
1065     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1066 nigel 87 linecount < before_context)
1067 nigel 77 {
1068 nigel 87 linecount++;
1069 nigel 93 p = previous_line(p, buffer);
1070 nigel 77 }
1071    
1072     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1073     fprintf(stdout, "--\n");
1074    
1075     while (p < ptr)
1076     {
1077 nigel 93 int ellength;
1078 nigel 77 char *pp = p;
1079     if (printname != NULL) fprintf(stdout, "%s-", printname);
1080     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1081 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1082     fwrite(p, 1, pp - p, stdout);
1083     p = pp;
1084 nigel 77 }
1085     }
1086    
1087     /* Now print the matching line(s); ensure we set hyphenpending at the end
1088 nigel 85 of the file if any context lines are being output. */
1089 nigel 77
1090 nigel 85 if (after_context > 0 || before_context > 0)
1091     endhyphenpending = TRUE;
1092    
1093 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1094 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1095 nigel 77
1096     /* In multiline mode, we want to print to the end of the line in which
1097     the end of the matched string is found, so we adjust linelength and the
1098 ph10 222 line number appropriately, but only when there actually was a match
1099     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1100     the match will always be before the first newline sequence. */
1101 nigel 77
1102     if (multiline)
1103     {
1104 nigel 93 int ellength;
1105 ph10 222 char *endmatch = ptr;
1106     if (!invert)
1107 nigel 93 {
1108 ph10 222 endmatch += offsets[1];
1109     t = ptr;
1110     while (t < endmatch)
1111     {
1112     t = end_of_line(t, endptr, &ellength);
1113     if (t <= endmatch) linenumber++; else break;
1114     }
1115 nigel 93 }
1116     endmatch = end_of_line(endmatch, endptr, &ellength);
1117     linelength = endmatch - ptr - ellength;
1118 nigel 77 }
1119    
1120 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1121     zeroes are treated as just another data character. */
1122    
1123     /* This extra option, for Jeffrey Friedl's debugging requirements,
1124     replaces the matched string, or a specific captured string if it exists,
1125     with X. When this happens, colouring is ignored. */
1126    
1127     #ifdef JFRIEDL_DEBUG
1128     if (S_arg >= 0 && S_arg < mrc)
1129     {
1130     int first = S_arg * 2;
1131     int last = first + 1;
1132     fwrite(ptr, 1, offsets[first], stdout);
1133     fprintf(stdout, "X");
1134     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1135     }
1136     else
1137     #endif
1138    
1139     /* We have to split the line(s) up if colouring. */
1140    
1141     if (do_colour)
1142     {
1143     fwrite(ptr, 1, offsets[0], stdout);
1144     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1145     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1146     fprintf(stdout, "%c[00m", 0x1b);
1147 ph10 243 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1148 ph10 239 stdout);
1149 nigel 87 }
1150 nigel 93 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1151 nigel 49 }
1152    
1153 nigel 87 /* End of doing what has to be done for a match */
1154    
1155 nigel 77 rc = 0; /* Had some success */
1156    
1157     /* Remember where the last match happened for after_context. We remember
1158     where we are about to restart, and that line's number. */
1159    
1160 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1161 nigel 77 lastmatchnumber = linenumber + 1;
1162 nigel 49 }
1163 nigel 77
1164 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1165     anything to be printed), we have to move on to the end of the match before
1166     proceeding. */
1167    
1168     if (multiline && invert && match)
1169     {
1170     int ellength;
1171     char *endmatch = ptr + offsets[1];
1172     t = ptr;
1173     while (t < endmatch)
1174     {
1175     t = end_of_line(t, endptr, &ellength);
1176     if (t <= endmatch) linenumber++; else break;
1177     }
1178     endmatch = end_of_line(endmatch, endptr, &ellength);
1179     linelength = endmatch - ptr - ellength;
1180     }
1181    
1182 ph10 280 /* Advance to after the newline and increment the line number. The file
1183     offset to the current line is maintained in filepos. */
1184 nigel 77
1185 nigel 93 ptr += linelength + endlinelength;
1186 ph10 280 filepos += linelength + endlinelength;
1187 nigel 77 linenumber++;
1188    
1189     /* If we haven't yet reached the end of the file (the buffer is full), and
1190     the current point is in the top 1/3 of the buffer, slide the buffer down by
1191     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1192     about to be lost, print them. */
1193    
1194     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1195     {
1196     if (after_context > 0 &&
1197     lastmatchnumber > 0 &&
1198     lastmatchrestart < buffer + MBUFTHIRD)
1199     {
1200     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1201     lastmatchnumber = 0;
1202     }
1203    
1204     /* Now do the shuffle */
1205    
1206     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1207     ptr -= MBUFTHIRD;
1208     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1209     endptr = buffer + bufflength;
1210    
1211     /* Adjust any last match point */
1212    
1213     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1214     }
1215     } /* Loop through the whole file */
1216    
1217     /* End of file; print final "after" lines if wanted; do_after_lines sets
1218     hyphenpending if it prints something. */
1219    
1220 nigel 87 if (!only_matching && !count_only)
1221     {
1222     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1223     hyphenpending |= endhyphenpending;
1224     }
1225 nigel 77
1226     /* Print the file name if we are looking for those without matches and there
1227     were none. If we found a match, we won't have got this far. */
1228    
1229 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1230 nigel 77 {
1231     fprintf(stdout, "%s\n", printname);
1232     return 0;
1233 nigel 49 }
1234    
1235 nigel 77 /* Print the match count if wanted */
1236    
1237 nigel 49 if (count_only)
1238     {
1239 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1240 nigel 49 fprintf(stdout, "%d\n", count);
1241     }
1242    
1243     return rc;
1244     }
1245    
1246    
1247    
1248     /*************************************************
1249 nigel 53 * Grep a file or recurse into a directory *
1250     *************************************************/
1251    
1252 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1253     recursing; if it's a file, grep it.
1254    
1255     Arguments:
1256     pathname the path to investigate
1257 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1258 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1259    
1260     Returns: 0 if there was at least one match
1261     1 if there were no matches
1262     2 there was some kind of error
1263    
1264     However, file opening failures are suppressed if "silent" is set.
1265     */
1266    
1267 nigel 53 static int
1268 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1269 nigel 53 {
1270     int rc = 1;
1271     int sep;
1272     FILE *in;
1273    
1274 nigel 77 /* If the file name is "-" we scan stdin */
1275 nigel 53
1276 nigel 77 if (strcmp(pathname, "-") == 0)
1277 nigel 53 {
1278 nigel 77 return pcregrep(stdin,
1279 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1280 nigel 77 stdin_name : NULL);
1281     }
1282    
1283    
1284 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1285     each file within it, subject to any include or exclude patterns that were set.
1286     The scanning code is localized so it can be made system-specific. */
1287    
1288     if ((sep = isdirectory(pathname)) != 0)
1289 nigel 77 {
1290 nigel 87 if (dee_action == dee_SKIP) return 1;
1291     if (dee_action == dee_RECURSE)
1292 nigel 53 {
1293 nigel 87 char buffer[1024];
1294     char *nextfile;
1295     directory_type *dir = opendirectory(pathname);
1296 nigel 53
1297 nigel 87 if (dir == NULL)
1298     {
1299     if (!silent)
1300     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1301     strerror(errno));
1302     return 2;
1303     }
1304 nigel 77
1305 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1306     {
1307     int frc, blen;
1308     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1309     blen = strlen(buffer);
1310 nigel 77
1311 nigel 87 if (exclude_compiled != NULL &&
1312     pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1313     continue;
1314 nigel 77
1315 nigel 87 if (include_compiled != NULL &&
1316     pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1317     continue;
1318    
1319     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1320     if (frc > 1) rc = frc;
1321     else if (frc == 0 && rc == 1) rc = 0;
1322     }
1323    
1324     closedirectory(dir);
1325     return rc;
1326 nigel 53 }
1327     }
1328    
1329 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1330     been requested. */
1331 nigel 53
1332 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1333    
1334     /* Control reaches here if we have a regular file, or if we have a directory
1335     and recursion or skipping was not requested, or if we have anything else and
1336     skipping was not requested. The scan proceeds. If this is the first and only
1337     argument at top level, we don't show the file name, unless we are only showing
1338     the file name, or the filename was forced (-H). */
1339    
1340 nigel 77 in = fopen(pathname, "r");
1341 nigel 53 if (in == NULL)
1342     {
1343 nigel 77 if (!silent)
1344     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1345     strerror(errno));
1346 nigel 53 return 2;
1347     }
1348    
1349 nigel 87 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1350     (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1351 nigel 77
1352 nigel 53 fclose(in);
1353     return rc;
1354     }
1355    
1356    
1357    
1358    
1359     /*************************************************
1360 nigel 49 * Usage function *
1361     *************************************************/
1362    
1363     static int
1364     usage(int rc)
1365     {
1366 nigel 87 option_item *op;
1367     fprintf(stderr, "Usage: pcregrep [-");
1368     for (op = optionlist; op->one_char != 0; op++)
1369     {
1370     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1371     }
1372     fprintf(stderr, "] [long options] [pattern] [files]\n");
1373 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1374     "options.\n");
1375 nigel 49 return rc;
1376     }
1377    
1378    
1379    
1380    
1381     /*************************************************
1382 nigel 53 * Help function *
1383     *************************************************/
1384    
1385     static void
1386     help(void)
1387     {
1388     option_item *op;
1389    
1390 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1391 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1392 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1393     printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1394 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1395    
1396     printf("Options:\n");
1397    
1398     for (op = optionlist; op->one_char != 0; op++)
1399     {
1400     int n;
1401     char s[4];
1402     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1403     printf(" %s --%s%n", s, op->long_name, &n);
1404     n = 30 - n;
1405     if (n < 1) n = 1;
1406     printf("%.*s%s\n", n, " ", op->help_text);
1407     }
1408    
1409 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1410     printf("trailing white space is removed and blank lines are ignored.\n");
1411     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1412 nigel 53
1413 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1414 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1415     }
1416    
1417    
1418    
1419    
1420     /*************************************************
1421 nigel 77 * Handle a single-letter, no data option *
1422 nigel 53 *************************************************/
1423    
1424     static int
1425     handle_option(int letter, int options)
1426     {
1427     switch(letter)
1428     {
1429 ph10 280 case N_FOFFSETS: file_offsets = TRUE; break;
1430 nigel 87 case N_HELP: help(); exit(0);
1431 ph10 280 case N_LOFFSETS: line_offsets = number = TRUE; break;
1432 nigel 53 case 'c': count_only = TRUE; break;
1433 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1434     case 'H': filenames = FN_FORCE; break;
1435     case 'h': filenames = FN_NONE; break;
1436 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1437 nigel 87 case 'l': filenames = FN_ONLY; break;
1438     case 'L': filenames = FN_NOMATCH_ONLY; break;
1439 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1440 nigel 53 case 'n': number = TRUE; break;
1441 nigel 87 case 'o': only_matching = TRUE; break;
1442 nigel 77 case 'q': quiet = TRUE; break;
1443 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1444 nigel 53 case 's': silent = TRUE; break;
1445 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1446 nigel 53 case 'v': invert = TRUE; break;
1447 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1448     case 'x': process_options |= PO_LINE_MATCH; break;
1449 nigel 53
1450     case 'V':
1451 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1452 nigel 53 exit(0);
1453     break;
1454    
1455     default:
1456     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1457     exit(usage(2));
1458     }
1459    
1460     return options;
1461     }
1462    
1463    
1464    
1465    
1466     /*************************************************
1467 nigel 87 * Construct printed ordinal *
1468     *************************************************/
1469    
1470     /* This turns a number into "1st", "3rd", etc. */
1471    
1472     static char *
1473     ordin(int n)
1474     {
1475     static char buffer[8];
1476     char *p = buffer;
1477     sprintf(p, "%d", n);
1478     while (*p != 0) p++;
1479     switch (n%10)
1480     {
1481     case 1: strcpy(p, "st"); break;
1482     case 2: strcpy(p, "nd"); break;
1483     case 3: strcpy(p, "rd"); break;
1484     default: strcpy(p, "th"); break;
1485     }
1486     return buffer;
1487     }
1488    
1489    
1490    
1491     /*************************************************
1492     * Compile a single pattern *
1493     *************************************************/
1494    
1495     /* When the -F option has been used, this is called for each substring.
1496     Otherwise it's called for each supplied pattern.
1497    
1498     Arguments:
1499     pattern the pattern string
1500     options the PCRE options
1501     filename the file name, or NULL for a command-line pattern
1502     count 0 if this is the only command line pattern, or
1503     number of the command line pattern, or
1504     linenumber for a pattern from a file
1505    
1506     Returns: TRUE on success, FALSE after an error
1507     */
1508    
1509     static BOOL
1510     compile_single_pattern(char *pattern, int options, char *filename, int count)
1511     {
1512     char buffer[MBUFTHIRD + 16];
1513     const char *error;
1514     int errptr;
1515    
1516     if (pattern_count >= MAX_PATTERN_COUNT)
1517     {
1518     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1519     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1520     return FALSE;
1521     }
1522    
1523     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1524     suffix[process_options]);
1525     pattern_list[pattern_count] =
1526     pcre_compile(buffer, options, &error, &errptr, pcretables);
1527 ph10 142 if (pattern_list[pattern_count] != NULL)
1528 ph10 141 {
1529 ph10 142 pattern_count++;
1530 ph10 141 return TRUE;
1531 ph10 142 }
1532 nigel 87
1533     /* Handle compile errors */
1534    
1535     errptr -= (int)strlen(prefix[process_options]);
1536     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1537    
1538     if (filename == NULL)
1539     {
1540     if (count == 0)
1541     fprintf(stderr, "pcregrep: Error in command-line regex "
1542     "at offset %d: %s\n", errptr, error);
1543     else
1544     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1545     "at offset %d: %s\n", ordin(count), errptr, error);
1546     }
1547     else
1548     {
1549     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1550     "at offset %d: %s\n", count, filename, errptr, error);
1551     }
1552    
1553     return FALSE;
1554     }
1555    
1556    
1557    
1558     /*************************************************
1559     * Compile one supplied pattern *
1560     *************************************************/
1561    
1562     /* When the -F option has been used, each string may be a list of strings,
1563 nigel 91 separated by line breaks. They will be matched literally.
1564 nigel 87
1565     Arguments:
1566     pattern the pattern string
1567     options the PCRE options
1568     filename the file name, or NULL for a command-line pattern
1569     count 0 if this is the only command line pattern, or
1570     number of the command line pattern, or
1571     linenumber for a pattern from a file
1572    
1573     Returns: TRUE on success, FALSE after an error
1574     */
1575    
1576     static BOOL
1577     compile_pattern(char *pattern, int options, char *filename, int count)
1578     {
1579     if ((process_options & PO_FIXED_STRINGS) != 0)
1580     {
1581 nigel 93 char *eop = pattern + strlen(pattern);
1582 nigel 87 char buffer[MBUFTHIRD];
1583     for(;;)
1584     {
1585 nigel 93 int ellength;
1586     char *p = end_of_line(pattern, eop, &ellength);
1587     if (ellength == 0)
1588 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1589 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1590 nigel 93 pattern = p;
1591 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1592     return FALSE;
1593     }
1594     }
1595     else return compile_single_pattern(pattern, options, filename, count);
1596     }
1597    
1598    
1599    
1600     /*************************************************
1601 nigel 49 * Main program *
1602     *************************************************/
1603    
1604 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1605    
1606 nigel 49 int
1607     main(int argc, char **argv)
1608     {
1609 nigel 53 int i, j;
1610 nigel 49 int rc = 1;
1611 nigel 87 int pcre_options = 0;
1612     int cmd_pattern_count = 0;
1613 ph10 141 int hint_count = 0;
1614 nigel 49 int errptr;
1615 nigel 87 BOOL only_one_at_top;
1616     char *patterns[MAX_PATTERN_COUNT];
1617     const char *locale_from = "--locale";
1618 nigel 49 const char *error;
1619    
1620 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1621     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1622     */
1623 nigel 91
1624     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1625     switch(i)
1626     {
1627     default: newline = (char *)"lf"; break;
1628     case '\r': newline = (char *)"cr"; break;
1629     case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1630 nigel 93 case -1: newline = (char *)"any"; break;
1631 ph10 150 case -2: newline = (char *)"anycrlf"; break;
1632 nigel 91 }
1633    
1634 nigel 49 /* Process the options */
1635    
1636     for (i = 1; i < argc; i++)
1637     {
1638 nigel 77 option_item *op = NULL;
1639     char *option_data = (char *)""; /* default to keep compiler happy */
1640     BOOL longop;
1641     BOOL longopwasequals = FALSE;
1642    
1643 nigel 49 if (argv[i][0] != '-') break;
1644 nigel 53
1645 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1646 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1647 nigel 63
1648 nigel 77 if (argv[i][1] == 0)
1649     {
1650 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1651 nigel 77 else exit(usage(2));
1652     }
1653 nigel 63
1654 nigel 77 /* Handle a long name option, or -- to terminate the options */
1655 nigel 53
1656     if (argv[i][1] == '-')
1657 nigel 49 {
1658 nigel 77 char *arg = argv[i] + 2;
1659     char *argequals = strchr(arg, '=');
1660 nigel 53
1661 nigel 77 if (*arg == 0) /* -- terminates options */
1662 nigel 49 {
1663 nigel 77 i++;
1664     break; /* out of the options-handling loop */
1665 nigel 53 }
1666 nigel 49
1667 nigel 77 longop = TRUE;
1668    
1669     /* Some long options have data that follows after =, for example file=name.
1670     Some options have variations in the long name spelling: specifically, we
1671     allow "regexp" because GNU grep allows it, though I personally go along
1672 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1673     These options are entered in the table as "regex(p)". No option is in both
1674     these categories, fortunately. */
1675 nigel 77
1676 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1677     {
1678 nigel 77 char *opbra = strchr(op->long_name, '(');
1679     char *equals = strchr(op->long_name, '=');
1680     if (opbra == NULL) /* Not a (p) case */
1681 nigel 53 {
1682 nigel 77 if (equals == NULL) /* Not thing=data case */
1683     {
1684     if (strcmp(arg, op->long_name) == 0) break;
1685     }
1686     else /* Special case xxx=data */
1687     {
1688     int oplen = equals - op->long_name;
1689 ph10 199 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1690 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1691     {
1692     option_data = arg + arglen;
1693     if (*option_data == '=')
1694     {
1695     option_data++;
1696     longopwasequals = TRUE;
1697     }
1698     break;
1699     }
1700     }
1701 nigel 53 }
1702 nigel 77 else /* Special case xxxx(p) */
1703     {
1704     char buff1[24];
1705     char buff2[24];
1706     int baselen = opbra - op->long_name;
1707     sprintf(buff1, "%.*s", baselen, op->long_name);
1708 ph10 152 sprintf(buff2, "%s%.*s", buff1,
1709 ph10 151 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1710 nigel 77 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1711     break;
1712     }
1713 nigel 53 }
1714 nigel 77
1715 nigel 53 if (op->one_char == 0)
1716     {
1717     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1718     exit(usage(2));
1719     }
1720     }
1721 nigel 49
1722 nigel 89
1723     /* Jeffrey Friedl's debugging harness uses these additional options which
1724     are not in the right form for putting in the option table because they use
1725     only one hyphen, yet are more than one character long. By putting them
1726     separately here, they will not get displayed as part of the help() output,
1727     but I don't think Jeffrey will care about that. */
1728    
1729     #ifdef JFRIEDL_DEBUG
1730     else if (strcmp(argv[i], "-pre") == 0) {
1731     jfriedl_prefix = argv[++i];
1732     continue;
1733     } else if (strcmp(argv[i], "-post") == 0) {
1734     jfriedl_postfix = argv[++i];
1735     continue;
1736     } else if (strcmp(argv[i], "-XT") == 0) {
1737     sscanf(argv[++i], "%d", &jfriedl_XT);
1738     continue;
1739     } else if (strcmp(argv[i], "-XR") == 0) {
1740     sscanf(argv[++i], "%d", &jfriedl_XR);
1741     continue;
1742     }
1743     #endif
1744    
1745    
1746 nigel 77 /* One-char options; many that have no data may be in a single argument; we
1747     continue till we hit the last one or one that needs data. */
1748 nigel 53
1749     else
1750     {
1751     char *s = argv[i] + 1;
1752 nigel 77 longop = FALSE;
1753 nigel 53 while (*s != 0)
1754     {
1755 nigel 77 for (op = optionlist; op->one_char != 0; op++)
1756     { if (*s == op->one_char) break; }
1757     if (op->one_char == 0)
1758 nigel 53 {
1759 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1760     *s, argv[i]);
1761     exit(usage(2));
1762     }
1763     if (op->type != OP_NODATA || s[1] == 0)
1764     {
1765     option_data = s+1;
1766 nigel 53 break;
1767     }
1768 nigel 87 pcre_options = handle_option(*s++, pcre_options);
1769 nigel 49 }
1770     }
1771 nigel 77
1772 nigel 87 /* At this point we should have op pointing to a matched option. If the type
1773     is NO_DATA, it means that there is no data, and the option might set
1774     something in the PCRE options. */
1775 nigel 77
1776     if (op->type == OP_NODATA)
1777     {
1778 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
1779     continue;
1780     }
1781    
1782     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1783     either has a value or defaults to something. It cannot have data in a
1784     separate item. At the moment, the only such options are "colo(u)r" and
1785 nigel 89 Jeffrey Friedl's special -S debugging option. */
1786 nigel 87
1787     if (*option_data == 0 &&
1788     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1789     {
1790     switch (op->one_char)
1791 nigel 77 {
1792 nigel 87 case N_COLOUR:
1793     colour_option = (char *)"auto";
1794     break;
1795     #ifdef JFRIEDL_DEBUG
1796     case 'S':
1797     S_arg = 0;
1798     break;
1799     #endif
1800 nigel 77 }
1801 nigel 87 continue;
1802     }
1803 nigel 77
1804 nigel 87 /* Otherwise, find the data string for the option. */
1805    
1806     if (*option_data == 0)
1807     {
1808     if (i >= argc - 1 || longopwasequals)
1809 nigel 77 {
1810 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1811     exit(usage(2));
1812     }
1813     option_data = argv[++i];
1814     }
1815    
1816     /* If the option type is OP_PATLIST, it's the -e option, which can be called
1817     multiple times to create a list of patterns. */
1818    
1819     if (op->type == OP_PATLIST)
1820     {
1821     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1822     {
1823     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1824     MAX_PATTERN_COUNT);
1825     return 2;
1826     }
1827     patterns[cmd_pattern_count++] = option_data;
1828     }
1829    
1830     /* Otherwise, deal with single string or numeric data values. */
1831    
1832     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1833     {
1834     *((char **)op->dataptr) = option_data;
1835     }
1836     else
1837     {
1838     char *endptr;
1839     int n = strtoul(option_data, &endptr, 10);
1840     if (*endptr != 0)
1841     {
1842     if (longop)
1843 nigel 77 {
1844 nigel 87 char *equals = strchr(op->long_name, '=');
1845     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1846     equals - op->long_name;
1847     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1848     option_data, nlen, op->long_name);
1849 nigel 77 }
1850 nigel 87 else
1851     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1852     option_data, op->one_char);
1853     exit(usage(2));
1854 nigel 77 }
1855 nigel 87 *((int *)op->dataptr) = n;
1856 nigel 77 }
1857 nigel 49 }
1858    
1859 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
1860     for -A and -B. */
1861    
1862     if (both_context > 0)
1863     {
1864     if (after_context == 0) after_context = both_context;
1865     if (before_context == 0) before_context = both_context;
1866     }
1867 ph10 280
1868     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
1869     However, the latter two set the only_matching flag. */
1870 nigel 77
1871 ph10 280 if ((only_matching && (file_offsets || line_offsets)) ||
1872     (file_offsets && line_offsets))
1873     {
1874     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
1875     "and/or --line-offsets\n");
1876     exit(usage(2));
1877     }
1878    
1879     if (file_offsets || line_offsets) only_matching = TRUE;
1880    
1881 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1882     LC_ALL environment variable is set, and if so, use it. */
1883 nigel 49
1884 nigel 87 if (locale == NULL)
1885 nigel 53 {
1886 nigel 87 locale = getenv("LC_ALL");
1887     locale_from = "LCC_ALL";
1888 nigel 53 }
1889 nigel 49
1890 nigel 87 if (locale == NULL)
1891     {
1892     locale = getenv("LC_CTYPE");
1893     locale_from = "LC_CTYPE";
1894     }
1895 nigel 49
1896 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
1897     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1898    
1899     if (locale != NULL)
1900 nigel 49 {
1901 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
1902 nigel 53 {
1903 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1904     locale, locale_from);
1905 nigel 53 return 2;
1906     }
1907 nigel 87 pcretables = pcre_maketables();
1908     }
1909 nigel 77
1910 nigel 87 /* Sort out colouring */
1911    
1912     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1913     {
1914     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1915     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1916     else
1917 nigel 53 {
1918 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1919     colour_option);
1920     return 2;
1921 nigel 77 }
1922 nigel 87 if (do_colour)
1923 nigel 77 {
1924 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
1925     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1926     if (cs != NULL) colour_string = cs;
1927 nigel 77 }
1928 nigel 87 }
1929 nigel 77
1930 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
1931    
1932     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1933     {
1934     pcre_options |= PCRE_NEWLINE_CR;
1935 nigel 93 endlinetype = EL_CR;
1936 nigel 91 }
1937     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1938     {
1939     pcre_options |= PCRE_NEWLINE_LF;
1940 nigel 93 endlinetype = EL_LF;
1941 nigel 91 }
1942     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1943     {
1944     pcre_options |= PCRE_NEWLINE_CRLF;
1945 nigel 93 endlinetype = EL_CRLF;
1946 nigel 91 }
1947 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1948     {
1949     pcre_options |= PCRE_NEWLINE_ANY;
1950     endlinetype = EL_ANY;
1951     }
1952 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1953     {
1954     pcre_options |= PCRE_NEWLINE_ANYCRLF;
1955     endlinetype = EL_ANYCRLF;
1956     }
1957 nigel 91 else
1958     {
1959     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1960     return 2;
1961     }
1962    
1963 nigel 87 /* Interpret the text values for -d and -D */
1964    
1965     if (dee_option != NULL)
1966     {
1967     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1968     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1969     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1970     else
1971 nigel 77 {
1972 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1973     return 2;
1974 nigel 53 }
1975 nigel 49 }
1976    
1977 nigel 87 if (DEE_option != NULL)
1978     {
1979     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1980     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1981     else
1982     {
1983     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1984     return 2;
1985     }
1986     }
1987 nigel 49
1988 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
1989 nigel 87
1990     #ifdef JFRIEDL_DEBUG
1991     if (S_arg > 9)
1992 nigel 49 {
1993 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
1994     return 2;
1995     }
1996 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1997     {
1998     if (jfriedl_XT == 0) jfriedl_XT = 1;
1999     if (jfriedl_XR == 0) jfriedl_XR = 1;
2000     }
2001 nigel 87 #endif
2002 nigel 77
2003 nigel 87 /* Get memory to store the pattern and hints lists. */
2004    
2005     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2006     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2007    
2008     if (pattern_list == NULL || hints_list == NULL)
2009     {
2010     fprintf(stderr, "pcregrep: malloc failed\n");
2011 ph10 123 goto EXIT2;
2012 nigel 87 }
2013    
2014     /* If no patterns were provided by -e, and there is no file provided by -f,
2015     the first argument is the one and only pattern, and it must exist. */
2016    
2017     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2018     {
2019 nigel 63 if (i >= argc) return usage(2);
2020 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2021     }
2022 nigel 77
2023 nigel 87 /* Compile the patterns that were provided on the command line, either by
2024     multiple uses of -e or as a single unkeyed pattern. */
2025    
2026     for (j = 0; j < cmd_pattern_count; j++)
2027     {
2028     if (!compile_pattern(patterns[j], pcre_options, NULL,
2029     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2030 ph10 123 goto EXIT2;
2031 nigel 87 }
2032    
2033     /* Compile the regular expressions that are provided in a file. */
2034    
2035     if (pattern_filename != NULL)
2036     {
2037     int linenumber = 0;
2038     FILE *f;
2039     char *filename;
2040     char buffer[MBUFTHIRD];
2041    
2042     if (strcmp(pattern_filename, "-") == 0)
2043 nigel 77 {
2044 nigel 87 f = stdin;
2045     filename = stdin_name;
2046 nigel 77 }
2047 nigel 87 else
2048 nigel 77 {
2049 nigel 87 f = fopen(pattern_filename, "r");
2050     if (f == NULL)
2051     {
2052     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2053     strerror(errno));
2054 ph10 123 goto EXIT2;
2055 nigel 87 }
2056     filename = pattern_filename;
2057 nigel 77 }
2058    
2059 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2060 nigel 53 {
2061 nigel 87 char *s = buffer + (int)strlen(buffer);
2062     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2063     *s = 0;
2064     linenumber++;
2065     if (buffer[0] == 0) continue; /* Skip blank lines */
2066     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2067 ph10 121 goto EXIT2;
2068 nigel 53 }
2069 nigel 87
2070     if (f != stdin) fclose(f);
2071 nigel 49 }
2072    
2073 nigel 77 /* Study the regular expressions, as we will be running them many times */
2074 nigel 53
2075     for (j = 0; j < pattern_count; j++)
2076     {
2077     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2078     if (error != NULL)
2079     {
2080     char s[16];
2081     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2082     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2083 ph10 121 goto EXIT2;
2084 nigel 53 }
2085 ph10 142 hint_count++;
2086 nigel 53 }
2087    
2088 nigel 77 /* If there are include or exclude patterns, compile them. */
2089    
2090     if (exclude_pattern != NULL)
2091     {
2092 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2093     pcretables);
2094 nigel 77 if (exclude_compiled == NULL)
2095     {
2096     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2097     errptr, error);
2098 ph10 121 goto EXIT2;
2099 nigel 77 }
2100     }
2101    
2102     if (include_pattern != NULL)
2103     {
2104 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2105     pcretables);
2106 nigel 77 if (include_compiled == NULL)
2107     {
2108     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2109     errptr, error);
2110 ph10 121 goto EXIT2;
2111 nigel 77 }
2112     }
2113    
2114 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2115 nigel 49
2116 nigel 87 if (i >= argc)
2117 ph10 121 {
2118     rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2119     goto EXIT;
2120 ph10 123 }
2121 nigel 49
2122 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2123     Pass in the fact that there is only one argument at top level - this suppresses
2124 nigel 87 the file name if the argument is not a directory and filenames are not
2125     otherwise forced. */
2126 nigel 49
2127 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2128 nigel 49
2129     for (; i < argc; i++)
2130     {
2131 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2132     only_one_at_top);
2133 nigel 77 if (frc > 1) rc = frc;
2134     else if (frc == 0 && rc == 1) rc = 0;
2135 nigel 49 }
2136    
2137 ph10 121 EXIT:
2138     if (pattern_list != NULL)
2139     {
2140 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2141 ph10 121 free(pattern_list);
2142 ph10 123 }
2143 ph10 121 if (hints_list != NULL)
2144     {
2145 ph10 141 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2146 ph10 121 free(hints_list);
2147 ph10 123 }
2148 nigel 49 return rc;
2149 ph10 121
2150     EXIT2:
2151     rc = 2;
2152     goto EXIT;
2153 nigel 49 }
2154    
2155 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12