/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 324 - (hide annotations) (download)
Fri Mar 7 19:48:32 2008 UTC (6 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 65711 byte(s)
Fix bugs with --include and --exclude in pcregrep.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 305 Copyright (c) 1997-2008 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 nigel 49
75 nigel 77 #if BUFSIZ > 8192
76     #define MBUFTHIRD BUFSIZ
77     #else
78     #define MBUFTHIRD 8192
79     #endif
80 nigel 49
81 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
82     output. The order is important; it is assumed that a file name is wanted for
83     all values greater than FN_DEFAULT. */
84 nigel 77
85 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
86    
87 ph10 286 /* File reading styles */
88    
89     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
90    
91 nigel 87 /* Actions for the -d and -D options */
92    
93     enum { dee_READ, dee_SKIP, dee_RECURSE };
94     enum { DEE_READ, DEE_SKIP };
95    
96     /* Actions for special processing options (flag bits) */
97    
98     #define PO_WORD_MATCH 0x0001
99     #define PO_LINE_MATCH 0x0002
100     #define PO_FIXED_STRINGS 0x0004
101    
102 nigel 93 /* Line ending types */
103 nigel 87
104 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
105 nigel 87
106 nigel 93
107    
108 nigel 49 /*************************************************
109     * Global variables *
110     *************************************************/
111    
112 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
113     regular code. */
114    
115     #ifdef JFRIEDL_DEBUG
116     static int S_arg = -1;
117 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
118     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
119     static const char *jfriedl_prefix = "";
120     static const char *jfriedl_postfix = "";
121 nigel 87 #endif
122    
123 nigel 93 static int endlinetype;
124 nigel 91
125 nigel 87 static char *colour_string = (char *)"1;31";
126     static char *colour_option = NULL;
127     static char *dee_option = NULL;
128     static char *DEE_option = NULL;
129 nigel 91 static char *newline = NULL;
130 nigel 53 static char *pattern_filename = NULL;
131 nigel 77 static char *stdin_name = (char *)"(standard input)";
132 nigel 87 static char *locale = NULL;
133    
134     static const unsigned char *pcretables = NULL;
135    
136 nigel 53 static int pattern_count = 0;
137 ph10 121 static pcre **pattern_list = NULL;
138     static pcre_extra **hints_list = NULL;
139 nigel 49
140 nigel 77 static char *include_pattern = NULL;
141     static char *exclude_pattern = NULL;
142    
143     static pcre *include_compiled = NULL;
144     static pcre *exclude_compiled = NULL;
145    
146     static int after_context = 0;
147     static int before_context = 0;
148     static int both_context = 0;
149 nigel 87 static int dee_action = dee_READ;
150     static int DEE_action = DEE_READ;
151     static int error_count = 0;
152     static int filenames = FN_DEFAULT;
153     static int process_options = 0;
154 nigel 77
155 nigel 49 static BOOL count_only = FALSE;
156 nigel 87 static BOOL do_colour = FALSE;
157 ph10 280 static BOOL file_offsets = FALSE;
158 nigel 77 static BOOL hyphenpending = FALSE;
159 nigel 49 static BOOL invert = FALSE;
160 ph10 280 static BOOL line_offsets = FALSE;
161 nigel 77 static BOOL multiline = FALSE;
162 nigel 49 static BOOL number = FALSE;
163 nigel 87 static BOOL only_matching = FALSE;
164 nigel 77 static BOOL quiet = FALSE;
165 nigel 49 static BOOL silent = FALSE;
166 nigel 93 static BOOL utf8 = FALSE;
167 nigel 49
168 nigel 53 /* Structure for options and list of them */
169 nigel 49
170 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
171     OP_PATLIST };
172 nigel 77
173 nigel 53 typedef struct option_item {
174 nigel 77 int type;
175 nigel 53 int one_char;
176 nigel 77 void *dataptr;
177 nigel 67 const char *long_name;
178     const char *help_text;
179 nigel 53 } option_item;
180 nigel 49
181 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
182     used to identify them. */
183    
184     #define N_COLOUR (-1)
185     #define N_EXCLUDE (-2)
186     #define N_HELP (-3)
187     #define N_INCLUDE (-4)
188     #define N_LABEL (-5)
189     #define N_LOCALE (-6)
190     #define N_NULL (-7)
191 ph10 280 #define N_LOFFSETS (-8)
192     #define N_FOFFSETS (-9)
193 nigel 87
194 nigel 53 static option_item optionlist[] = {
195 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
196     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
197     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
198     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
199     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
200     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
201     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
202     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
203     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
204     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
205     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
206     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
207     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
208 ph10 280 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
209 nigel 87 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
210     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
211     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
212     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
213     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
214     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
215 ph10 280 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
216 nigel 87 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
217     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
218 ph10 280 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
219 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
220     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
221     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
222     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
223     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
224     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
225     #ifdef JFRIEDL_DEBUG
226     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
227     #endif
228     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
229     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
230     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
231     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
232     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
233     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
234     { OP_NODATA, 0, NULL, NULL, NULL }
235 nigel 53 };
236    
237 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
238     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
239     that the combination of -w and -x has the same effect as -x on its own, so we
240     can treat them as the same. */
241 nigel 53
242 nigel 87 static const char *prefix[] = {
243     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
244    
245     static const char *suffix[] = {
246     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
247    
248 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
249 nigel 87
250 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
251 nigel 87
252 nigel 93 const char utf8_table4[] = {
253     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
254     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
255     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
256     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
257    
258    
259    
260 nigel 53 /*************************************************
261 nigel 87 * OS-specific functions *
262 nigel 53 *************************************************/
263    
264     /* These functions are defined so that they can be made system specific,
265 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
266 nigel 53
267    
268     /************* Directory scanning in Unix ***********/
269    
270 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
271 nigel 53 #include <sys/types.h>
272     #include <sys/stat.h>
273     #include <dirent.h>
274    
275     typedef DIR directory_type;
276    
277 nigel 67 static int
278 nigel 53 isdirectory(char *filename)
279     {
280     struct stat statbuf;
281     if (stat(filename, &statbuf) < 0)
282     return 0; /* In the expectation that opening as a file will fail */
283     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
284     }
285    
286 nigel 67 static directory_type *
287 nigel 53 opendirectory(char *filename)
288     {
289     return opendir(filename);
290     }
291    
292 nigel 67 static char *
293 nigel 53 readdirectory(directory_type *dir)
294     {
295     for (;;)
296     {
297     struct dirent *dent = readdir(dir);
298     if (dent == NULL) return NULL;
299     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
300     return dent->d_name;
301     }
302 ph10 151 /* Control never reaches here */
303 nigel 53 }
304    
305 nigel 67 static void
306 nigel 53 closedirectory(directory_type *dir)
307     {
308     closedir(dir);
309     }
310    
311    
312 nigel 87 /************* Test for regular file in Unix **********/
313    
314     static int
315     isregfile(char *filename)
316     {
317     struct stat statbuf;
318     if (stat(filename, &statbuf) < 0)
319     return 1; /* In the expectation that opening as a file will fail */
320     return (statbuf.st_mode & S_IFMT) == S_IFREG;
321     }
322    
323    
324     /************* Test stdout for being a terminal in Unix **********/
325    
326     static BOOL
327     is_stdout_tty(void)
328     {
329     return isatty(fileno(stdout));
330     }
331    
332    
333 nigel 63 /************* Directory scanning in Win32 ***********/
334 nigel 53
335 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
336 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
337 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
338     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
339 ph10 283 */
340 nigel 53
341 ph10 97 #elif HAVE_WINDOWS_H
342 nigel 63
343     #ifndef STRICT
344     # define STRICT
345     #endif
346     #ifndef WIN32_LEAN_AND_MEAN
347     # define WIN32_LEAN_AND_MEAN
348     #endif
349 ph10 283
350     #include <windows.h>
351    
352 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
353     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
354     #endif
355    
356 nigel 63 typedef struct directory_type
357     {
358     HANDLE handle;
359     BOOL first;
360     WIN32_FIND_DATA data;
361     } directory_type;
362    
363     int
364     isdirectory(char *filename)
365     {
366     DWORD attr = GetFileAttributes(filename);
367     if (attr == INVALID_FILE_ATTRIBUTES)
368     return 0;
369     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
370     }
371    
372     directory_type *
373     opendirectory(char *filename)
374     {
375     size_t len;
376     char *pattern;
377     directory_type *dir;
378     DWORD err;
379     len = strlen(filename);
380     pattern = (char *) malloc(len + 3);
381     dir = (directory_type *) malloc(sizeof(*dir));
382     if ((pattern == NULL) || (dir == NULL))
383     {
384     fprintf(stderr, "pcregrep: malloc failed\n");
385     exit(2);
386     }
387     memcpy(pattern, filename, len);
388     memcpy(&(pattern[len]), "\\*", 3);
389     dir->handle = FindFirstFile(pattern, &(dir->data));
390     if (dir->handle != INVALID_HANDLE_VALUE)
391     {
392     free(pattern);
393     dir->first = TRUE;
394     return dir;
395     }
396     err = GetLastError();
397     free(pattern);
398     free(dir);
399     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
400     return NULL;
401     }
402    
403     char *
404     readdirectory(directory_type *dir)
405     {
406     for (;;)
407     {
408     if (!dir->first)
409     {
410     if (!FindNextFile(dir->handle, &(dir->data)))
411     return NULL;
412     }
413     else
414     {
415     dir->first = FALSE;
416     }
417     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
418     return dir->data.cFileName;
419     }
420     #ifndef _MSC_VER
421     return NULL; /* Keep compiler happy; never executed */
422     #endif
423     }
424    
425     void
426     closedirectory(directory_type *dir)
427     {
428     FindClose(dir->handle);
429     free(dir);
430     }
431    
432    
433 nigel 87 /************* Test for regular file in Win32 **********/
434    
435     /* I don't know how to do this, or if it can be done; assume all paths are
436     regular if they are not directories. */
437    
438     int isregfile(char *filename)
439     {
440 ph10 283 return !isdirectory(filename);
441 nigel 87 }
442    
443    
444     /************* Test stdout for being a terminal in Win32 **********/
445    
446     /* I don't know how to do this; assume never */
447    
448     static BOOL
449     is_stdout_tty(void)
450     {
451 ph10 283 return FALSE;
452 nigel 87 }
453    
454    
455 nigel 53 /************* Directory scanning when we can't do it ***********/
456    
457     /* The type is void, and apart from isdirectory(), the functions do nothing. */
458    
459 nigel 63 #else
460    
461 nigel 53 typedef void directory_type;
462    
463 nigel 87 int isdirectory(char *filename) { return 0; }
464 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
465     char *readdirectory(directory_type *dir) { return (char*)0;}
466 nigel 53 void closedirectory(directory_type *dir) {}
467    
468 nigel 87
469     /************* Test for regular when we can't do it **********/
470    
471     /* Assume all files are regular. */
472    
473     int isregfile(char *filename) { return 1; }
474    
475    
476     /************* Test stdout for being a terminal when we can't do it **********/
477    
478     static BOOL
479     is_stdout_tty(void)
480     {
481     return FALSE;
482     }
483    
484    
485 nigel 53 #endif
486    
487    
488    
489 ph10 137 #ifndef HAVE_STRERROR
490 nigel 49 /*************************************************
491     * Provide strerror() for non-ANSI libraries *
492     *************************************************/
493    
494     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
495     in their libraries, but can provide the same facility by this simple
496     alternative function. */
497    
498     extern int sys_nerr;
499     extern char *sys_errlist[];
500    
501     char *
502     strerror(int n)
503     {
504     if (n < 0 || n >= sys_nerr) return "unknown error number";
505     return sys_errlist[n];
506     }
507     #endif /* HAVE_STRERROR */
508    
509    
510    
511     /*************************************************
512 nigel 93 * Find end of line *
513     *************************************************/
514    
515     /* The length of the endline sequence that is found is set via lenptr. This may
516     be zero at the very end of the file if there is no line-ending sequence there.
517    
518     Arguments:
519     p current position in line
520     endptr end of available data
521     lenptr where to put the length of the eol sequence
522    
523     Returns: pointer to the last byte of the line
524     */
525    
526     static char *
527     end_of_line(char *p, char *endptr, int *lenptr)
528     {
529     switch(endlinetype)
530     {
531     default: /* Just in case */
532     case EL_LF:
533     while (p < endptr && *p != '\n') p++;
534     if (p < endptr)
535     {
536     *lenptr = 1;
537     return p + 1;
538     }
539     *lenptr = 0;
540     return endptr;
541    
542     case EL_CR:
543     while (p < endptr && *p != '\r') p++;
544     if (p < endptr)
545     {
546     *lenptr = 1;
547     return p + 1;
548     }
549     *lenptr = 0;
550     return endptr;
551    
552     case EL_CRLF:
553     for (;;)
554     {
555     while (p < endptr && *p != '\r') p++;
556     if (++p >= endptr)
557     {
558     *lenptr = 0;
559     return endptr;
560     }
561     if (*p == '\n')
562     {
563     *lenptr = 2;
564     return p + 1;
565     }
566     }
567     break;
568    
569 ph10 149 case EL_ANYCRLF:
570     while (p < endptr)
571     {
572     int extra = 0;
573     register int c = *((unsigned char *)p);
574    
575     if (utf8 && c >= 0xc0)
576     {
577     int gcii, gcss;
578     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
579     gcss = 6*extra;
580     c = (c & utf8_table3[extra]) << gcss;
581     for (gcii = 1; gcii <= extra; gcii++)
582     {
583     gcss -= 6;
584     c |= (p[gcii] & 0x3f) << gcss;
585     }
586     }
587    
588     p += 1 + extra;
589    
590     switch (c)
591     {
592     case 0x0a: /* LF */
593     *lenptr = 1;
594     return p;
595    
596     case 0x0d: /* CR */
597     if (p < endptr && *p == 0x0a)
598     {
599     *lenptr = 2;
600     p++;
601     }
602     else *lenptr = 1;
603     return p;
604 ph10 150
605 ph10 149 default:
606     break;
607     }
608     } /* End of loop for ANYCRLF case */
609 ph10 150
610 ph10 149 *lenptr = 0; /* Must have hit the end */
611     return endptr;
612    
613 nigel 93 case EL_ANY:
614     while (p < endptr)
615     {
616     int extra = 0;
617     register int c = *((unsigned char *)p);
618    
619     if (utf8 && c >= 0xc0)
620     {
621     int gcii, gcss;
622     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
623     gcss = 6*extra;
624     c = (c & utf8_table3[extra]) << gcss;
625     for (gcii = 1; gcii <= extra; gcii++)
626     {
627     gcss -= 6;
628     c |= (p[gcii] & 0x3f) << gcss;
629     }
630     }
631    
632     p += 1 + extra;
633    
634     switch (c)
635     {
636     case 0x0a: /* LF */
637     case 0x0b: /* VT */
638     case 0x0c: /* FF */
639     *lenptr = 1;
640     return p;
641    
642     case 0x0d: /* CR */
643     if (p < endptr && *p == 0x0a)
644     {
645     *lenptr = 2;
646     p++;
647     }
648     else *lenptr = 1;
649     return p;
650    
651     case 0x85: /* NEL */
652     *lenptr = utf8? 2 : 1;
653     return p;
654    
655     case 0x2028: /* LS */
656     case 0x2029: /* PS */
657     *lenptr = 3;
658     return p;
659    
660     default:
661     break;
662     }
663     } /* End of loop for ANY case */
664    
665     *lenptr = 0; /* Must have hit the end */
666     return endptr;
667     } /* End of overall switch */
668     }
669    
670    
671    
672     /*************************************************
673     * Find start of previous line *
674     *************************************************/
675    
676     /* This is called when looking back for before lines to print.
677    
678     Arguments:
679     p start of the subsequent line
680     startptr start of available data
681    
682     Returns: pointer to the start of the previous line
683     */
684    
685     static char *
686     previous_line(char *p, char *startptr)
687     {
688     switch(endlinetype)
689     {
690     default: /* Just in case */
691     case EL_LF:
692     p--;
693     while (p > startptr && p[-1] != '\n') p--;
694     return p;
695    
696     case EL_CR:
697     p--;
698     while (p > startptr && p[-1] != '\n') p--;
699     return p;
700    
701     case EL_CRLF:
702     for (;;)
703     {
704     p -= 2;
705     while (p > startptr && p[-1] != '\n') p--;
706     if (p <= startptr + 1 || p[-2] == '\r') return p;
707     }
708     return p; /* But control should never get here */
709    
710     case EL_ANY:
711 ph10 150 case EL_ANYCRLF:
712 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
713     if (utf8) while ((*p & 0xc0) == 0x80) p--;
714    
715     while (p > startptr)
716     {
717     register int c;
718     char *pp = p - 1;
719    
720     if (utf8)
721     {
722     int extra = 0;
723     while ((*pp & 0xc0) == 0x80) pp--;
724     c = *((unsigned char *)pp);
725     if (c >= 0xc0)
726     {
727     int gcii, gcss;
728     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
729     gcss = 6*extra;
730     c = (c & utf8_table3[extra]) << gcss;
731     for (gcii = 1; gcii <= extra; gcii++)
732     {
733     gcss -= 6;
734     c |= (pp[gcii] & 0x3f) << gcss;
735     }
736     }
737     }
738     else c = *((unsigned char *)pp);
739    
740 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
741 nigel 93 {
742     case 0x0a: /* LF */
743 ph10 149 case 0x0d: /* CR */
744     return p;
745 ph10 150
746 ph10 149 default:
747     break;
748 ph10 150 }
749 ph10 149
750     else switch (c)
751     {
752     case 0x0a: /* LF */
753 nigel 93 case 0x0b: /* VT */
754     case 0x0c: /* FF */
755     case 0x0d: /* CR */
756     case 0x85: /* NEL */
757     case 0x2028: /* LS */
758     case 0x2029: /* PS */
759     return p;
760    
761     default:
762     break;
763     }
764    
765     p = pp; /* Back one character */
766     } /* End of loop for ANY case */
767    
768     return startptr; /* Hit start of data */
769     } /* End of overall switch */
770     }
771    
772    
773    
774    
775    
776     /*************************************************
777 nigel 77 * Print the previous "after" lines *
778 nigel 49 *************************************************/
779    
780 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
781 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
782     that a binary zero does not terminate it.
783 nigel 77
784     Arguments:
785     lastmatchnumber the number of the last matching line, plus one
786     lastmatchrestart where we restarted after the last match
787     endptr end of available data
788     printname filename for printing
789    
790     Returns: nothing
791     */
792    
793     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
794     char *endptr, char *printname)
795     {
796     if (after_context > 0 && lastmatchnumber > 0)
797     {
798     int count = 0;
799     while (lastmatchrestart < endptr && count++ < after_context)
800     {
801 nigel 93 int ellength;
802 nigel 77 char *pp = lastmatchrestart;
803     if (printname != NULL) fprintf(stdout, "%s-", printname);
804     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
805 nigel 93 pp = end_of_line(pp, endptr, &ellength);
806     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
807     lastmatchrestart = pp;
808 nigel 77 }
809     hyphenpending = TRUE;
810     }
811     }
812    
813    
814    
815     /*************************************************
816     * Grep an individual file *
817     *************************************************/
818    
819     /* This is called from grep_or_recurse() below. It uses a buffer that is three
820     times the value of MBUFTHIRD. The matching point is never allowed to stray into
821     the top third of the buffer, thus keeping more of the file available for
822     context printing or for multiline scanning. For large files, the pointer will
823     be in the middle third most of the time, so the bottom third is available for
824     "before" context printing.
825    
826     Arguments:
827 ph10 286 handle the fopened FILE stream for a normal file
828     the gzFile pointer when reading is via libz
829     the BZFILE pointer when reading is via libbz2
830     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
831 nigel 77 printname the file name if it is to be printed for each match
832     or NULL if the file name is not to be printed
833     it cannot be NULL if filenames[_nomatch]_only is set
834    
835     Returns: 0 if there was at least one match
836     1 otherwise (no matches)
837 ph10 286 2 if there is a read error on a .bz2 file
838 nigel 77 */
839    
840 nigel 49 static int
841 ph10 286 pcregrep(void *handle, int frtype, char *printname)
842 nigel 49 {
843     int rc = 1;
844 nigel 77 int linenumber = 1;
845     int lastmatchnumber = 0;
846 nigel 49 int count = 0;
847 ph10 280 int filepos = 0;
848 nigel 49 int offsets[99];
849 nigel 77 char *lastmatchrestart = NULL;
850     char buffer[3*MBUFTHIRD];
851     char *ptr = buffer;
852     char *endptr;
853     size_t bufflength;
854     BOOL endhyphenpending = FALSE;
855 ph10 286 FILE *in = NULL; /* Ensure initialized */
856 nigel 49
857 ph10 286 #ifdef SUPPORT_LIBZ
858     gzFile ingz = NULL;
859     #endif
860 nigel 77
861 ph10 286 #ifdef SUPPORT_LIBBZ2
862     BZFILE *inbz2 = NULL;
863     #endif
864    
865    
866     /* Do the first read into the start of the buffer and set up the pointer to end
867     of what we have. In the case of libz, a non-zipped .gz file will be read as a
868     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
869     fail. */
870    
871     #ifdef SUPPORT_LIBZ
872     if (frtype == FR_LIBZ)
873     {
874     ingz = (gzFile)handle;
875     bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
876     }
877     else
878     #endif
879    
880     #ifdef SUPPORT_LIBBZ2
881     if (frtype == FR_LIBBZ2)
882     {
883     inbz2 = (BZFILE *)handle;
884     bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
885     if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
886     } /* without the cast it is unsigned. */
887     else
888     #endif
889    
890     {
891     in = (FILE *)handle;
892     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
893     }
894    
895 nigel 77 endptr = buffer + bufflength;
896    
897     /* Loop while the current pointer is not at the end of the file. For large
898     files, endptr will be at the end of the buffer when we are in the middle of the
899     file, but ptr will never get there, because as soon as it gets over 2/3 of the
900     way, the buffer is shifted left and re-filled. */
901    
902     while (ptr < endptr)
903 nigel 49 {
904 nigel 93 int i, endlinelength;
905 nigel 87 int mrc = 0;
906 nigel 53 BOOL match = FALSE;
907 ph10 286 char *matchptr = ptr;
908 nigel 77 char *t = ptr;
909     size_t length, linelength;
910 nigel 49
911 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
912     of the subject string to pass to pcre_exec(). In multiline mode, it is the
913     length remainder of the data in the buffer. Otherwise, it is the length of
914     the next line. After matching, we always advance by the length of the next
915     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
916     that any match is constrained to be in the first line. */
917    
918 nigel 93 t = end_of_line(t, endptr, &endlinelength);
919     linelength = t - ptr - endlinelength;
920 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
921 nigel 77
922 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
923    
924     #ifdef JFRIEDL_DEBUG
925     if (jfriedl_XT || jfriedl_XR)
926     {
927     #include <sys/time.h>
928     #include <time.h>
929     struct timeval start_time, end_time;
930     struct timezone dummy;
931    
932     if (jfriedl_XT)
933     {
934     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
935     const char *orig = ptr;
936     ptr = malloc(newlen + 1);
937     if (!ptr) {
938     printf("out of memory");
939     exit(2);
940     }
941     endptr = ptr;
942     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
943     for (i = 0; i < jfriedl_XT; i++) {
944     strncpy(endptr, orig, length);
945     endptr += length;
946     }
947     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
948     length = newlen;
949     }
950    
951     if (gettimeofday(&start_time, &dummy) != 0)
952     perror("bad gettimeofday");
953    
954    
955     for (i = 0; i < jfriedl_XR; i++)
956     match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
957    
958     if (gettimeofday(&end_time, &dummy) != 0)
959     perror("bad gettimeofday");
960    
961     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
962     -
963     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
964    
965     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
966     return 0;
967     }
968     #endif
969    
970 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
971 ph10 279 in order to find any further matches in the same line. */
972 nigel 89
973 ph10 286 ONLY_MATCHING_RESTART:
974    
975 nigel 77 /* Run through all the patterns until one matches. Note that we don't include
976     the final newline in the subject string. */
977    
978 nigel 87 for (i = 0; i < pattern_count; i++)
979 nigel 53 {
980 ph10 279 mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
981 nigel 87 offsets, 99);
982     if (mrc >= 0) { match = TRUE; break; }
983     if (mrc != PCRE_ERROR_NOMATCH)
984     {
985     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
986     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
987     fprintf(stderr, "this line:\n");
988 ph10 279 fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
989 nigel 87 fprintf(stderr, "\n");
990     if (error_count == 0 &&
991     (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
992     {
993     fprintf(stderr, "pcregrep: error %d means that a resource limit "
994     "was exceeded\n", mrc);
995     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
996     }
997     if (error_count++ > 20)
998     {
999     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
1000     exit(2);
1001     }
1002     match = invert; /* No more matching; don't show the line again */
1003     break;
1004     }
1005 nigel 53 }
1006 nigel 49
1007 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1008 nigel 77
1009 nigel 49 if (match != invert)
1010     {
1011 nigel 77 BOOL hyphenprinted = FALSE;
1012    
1013 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1014 nigel 77
1015 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1016    
1017     /* Just count if just counting is wanted. */
1018    
1019 nigel 49 if (count_only) count++;
1020    
1021 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1022     in the file. */
1023    
1024     else if (filenames == FN_ONLY)
1025 nigel 49 {
1026 nigel 77 fprintf(stdout, "%s\n", printname);
1027 nigel 49 return 0;
1028     }
1029    
1030 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1031    
1032 nigel 77 else if (quiet) return 0;
1033 nigel 49
1034 nigel 87 /* The --only-matching option prints just the substring that matched, and
1035 ph10 286 the --file-offsets and --line-offsets options output offsets for the
1036 ph10 280 matching substring (they both force --only-matching). None of these options
1037     prints any context. Afterwards, adjust the start and length, and then jump
1038     back to look for further matches in the same line. If we are in invert
1039     mode, however, nothing is printed - this could be still useful because the
1040     return code is set. */
1041 nigel 87
1042     else if (only_matching)
1043     {
1044 ph10 279 if (!invert)
1045 ph10 286 {
1046 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1047     if (number) fprintf(stdout, "%d:", linenumber);
1048 ph10 280 if (line_offsets)
1049     fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
1050 ph10 286 offsets[1] - offsets[0]);
1051 ph10 280 else if (file_offsets)
1052     fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1053 ph10 286 offsets[1] - offsets[0]);
1054     else
1055 ph10 280 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1056 ph10 279 fprintf(stdout, "\n");
1057     matchptr += offsets[1];
1058     length -= offsets[1];
1059 ph10 286 match = FALSE;
1060     goto ONLY_MATCHING_RESTART;
1061     }
1062 nigel 87 }
1063    
1064     /* This is the default case when none of the above options is set. We print
1065     the matching lines(s), possibly preceded and/or followed by other lines of
1066     context. */
1067    
1068 nigel 49 else
1069     {
1070 nigel 77 /* See if there is a requirement to print some "after" lines from a
1071     previous match. We never print any overlaps. */
1072    
1073     if (after_context > 0 && lastmatchnumber > 0)
1074     {
1075 nigel 93 int ellength;
1076 nigel 77 int linecount = 0;
1077     char *p = lastmatchrestart;
1078    
1079     while (p < ptr && linecount < after_context)
1080     {
1081 nigel 93 p = end_of_line(p, ptr, &ellength);
1082 nigel 77 linecount++;
1083     }
1084    
1085     /* It is important to advance lastmatchrestart during this printing so
1086 nigel 87 that it interacts correctly with any "before" printing below. Print
1087     each line's data using fwrite() in case there are binary zeroes. */
1088 nigel 77
1089     while (lastmatchrestart < p)
1090     {
1091     char *pp = lastmatchrestart;
1092     if (printname != NULL) fprintf(stdout, "%s-", printname);
1093     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1094 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1095     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1096     lastmatchrestart = pp;
1097 nigel 77 }
1098     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1099     }
1100    
1101     /* If there were non-contiguous lines printed above, insert hyphens. */
1102    
1103     if (hyphenpending)
1104     {
1105     fprintf(stdout, "--\n");
1106     hyphenpending = FALSE;
1107     hyphenprinted = TRUE;
1108     }
1109    
1110     /* See if there is a requirement to print some "before" lines for this
1111     match. Again, don't print overlaps. */
1112    
1113     if (before_context > 0)
1114     {
1115     int linecount = 0;
1116     char *p = ptr;
1117    
1118     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1119 nigel 87 linecount < before_context)
1120 nigel 77 {
1121 nigel 87 linecount++;
1122 nigel 93 p = previous_line(p, buffer);
1123 nigel 77 }
1124    
1125     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1126     fprintf(stdout, "--\n");
1127    
1128     while (p < ptr)
1129     {
1130 nigel 93 int ellength;
1131 nigel 77 char *pp = p;
1132     if (printname != NULL) fprintf(stdout, "%s-", printname);
1133     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1134 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1135     fwrite(p, 1, pp - p, stdout);
1136     p = pp;
1137 nigel 77 }
1138     }
1139    
1140     /* Now print the matching line(s); ensure we set hyphenpending at the end
1141 nigel 85 of the file if any context lines are being output. */
1142 nigel 77
1143 nigel 85 if (after_context > 0 || before_context > 0)
1144     endhyphenpending = TRUE;
1145    
1146 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1147 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1148 nigel 77
1149     /* In multiline mode, we want to print to the end of the line in which
1150     the end of the matched string is found, so we adjust linelength and the
1151 ph10 222 line number appropriately, but only when there actually was a match
1152     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1153     the match will always be before the first newline sequence. */
1154 nigel 77
1155     if (multiline)
1156     {
1157 nigel 93 int ellength;
1158 ph10 222 char *endmatch = ptr;
1159     if (!invert)
1160 nigel 93 {
1161 ph10 222 endmatch += offsets[1];
1162     t = ptr;
1163     while (t < endmatch)
1164     {
1165     t = end_of_line(t, endptr, &ellength);
1166     if (t <= endmatch) linenumber++; else break;
1167     }
1168 nigel 93 }
1169     endmatch = end_of_line(endmatch, endptr, &ellength);
1170     linelength = endmatch - ptr - ellength;
1171 nigel 77 }
1172    
1173 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1174     zeroes are treated as just another data character. */
1175    
1176     /* This extra option, for Jeffrey Friedl's debugging requirements,
1177     replaces the matched string, or a specific captured string if it exists,
1178     with X. When this happens, colouring is ignored. */
1179    
1180     #ifdef JFRIEDL_DEBUG
1181     if (S_arg >= 0 && S_arg < mrc)
1182     {
1183     int first = S_arg * 2;
1184     int last = first + 1;
1185     fwrite(ptr, 1, offsets[first], stdout);
1186     fprintf(stdout, "X");
1187     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1188     }
1189     else
1190     #endif
1191    
1192     /* We have to split the line(s) up if colouring. */
1193    
1194     if (do_colour)
1195     {
1196     fwrite(ptr, 1, offsets[0], stdout);
1197     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1198     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1199     fprintf(stdout, "%c[00m", 0x1b);
1200 ph10 243 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1201 ph10 239 stdout);
1202 nigel 87 }
1203 nigel 93 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1204 nigel 49 }
1205    
1206 nigel 87 /* End of doing what has to be done for a match */
1207    
1208 nigel 77 rc = 0; /* Had some success */
1209    
1210     /* Remember where the last match happened for after_context. We remember
1211     where we are about to restart, and that line's number. */
1212    
1213 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1214 nigel 77 lastmatchnumber = linenumber + 1;
1215 nigel 49 }
1216 nigel 77
1217 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1218     anything to be printed), we have to move on to the end of the match before
1219     proceeding. */
1220    
1221     if (multiline && invert && match)
1222     {
1223     int ellength;
1224     char *endmatch = ptr + offsets[1];
1225     t = ptr;
1226     while (t < endmatch)
1227     {
1228     t = end_of_line(t, endptr, &ellength);
1229     if (t <= endmatch) linenumber++; else break;
1230     }
1231     endmatch = end_of_line(endmatch, endptr, &ellength);
1232     linelength = endmatch - ptr - ellength;
1233     }
1234    
1235 ph10 286 /* Advance to after the newline and increment the line number. The file
1236 ph10 280 offset to the current line is maintained in filepos. */
1237 nigel 77
1238 nigel 93 ptr += linelength + endlinelength;
1239 ph10 280 filepos += linelength + endlinelength;
1240 nigel 77 linenumber++;
1241    
1242     /* If we haven't yet reached the end of the file (the buffer is full), and
1243     the current point is in the top 1/3 of the buffer, slide the buffer down by
1244     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1245     about to be lost, print them. */
1246    
1247     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1248     {
1249     if (after_context > 0 &&
1250     lastmatchnumber > 0 &&
1251     lastmatchrestart < buffer + MBUFTHIRD)
1252     {
1253     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1254     lastmatchnumber = 0;
1255     }
1256    
1257     /* Now do the shuffle */
1258    
1259     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1260     ptr -= MBUFTHIRD;
1261 ph10 286
1262     #ifdef SUPPORT_LIBZ
1263     if (frtype == FR_LIBZ)
1264     bufflength = 2*MBUFTHIRD +
1265     gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1266     else
1267     #endif
1268    
1269     #ifdef SUPPORT_LIBBZ2
1270     if (frtype == FR_LIBBZ2)
1271     bufflength = 2*MBUFTHIRD +
1272     BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1273     else
1274     #endif
1275    
1276 nigel 77 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1277 ph10 286
1278 nigel 77 endptr = buffer + bufflength;
1279    
1280     /* Adjust any last match point */
1281    
1282     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1283     }
1284     } /* Loop through the whole file */
1285    
1286     /* End of file; print final "after" lines if wanted; do_after_lines sets
1287     hyphenpending if it prints something. */
1288    
1289 nigel 87 if (!only_matching && !count_only)
1290     {
1291     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1292     hyphenpending |= endhyphenpending;
1293     }
1294 nigel 77
1295     /* Print the file name if we are looking for those without matches and there
1296     were none. If we found a match, we won't have got this far. */
1297    
1298 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1299 nigel 77 {
1300     fprintf(stdout, "%s\n", printname);
1301     return 0;
1302 nigel 49 }
1303    
1304 nigel 77 /* Print the match count if wanted */
1305    
1306 nigel 49 if (count_only)
1307     {
1308 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1309 nigel 49 fprintf(stdout, "%d\n", count);
1310     }
1311    
1312     return rc;
1313     }
1314    
1315    
1316    
1317     /*************************************************
1318 nigel 53 * Grep a file or recurse into a directory *
1319     *************************************************/
1320    
1321 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1322     recursing; if it's a file, grep it.
1323    
1324     Arguments:
1325     pathname the path to investigate
1326 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1327 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1328    
1329     Returns: 0 if there was at least one match
1330     1 if there were no matches
1331     2 there was some kind of error
1332    
1333     However, file opening failures are suppressed if "silent" is set.
1334     */
1335    
1336 nigel 53 static int
1337 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1338 nigel 53 {
1339     int rc = 1;
1340     int sep;
1341 ph10 286 int frtype;
1342     int pathlen;
1343     void *handle;
1344     FILE *in = NULL; /* Ensure initialized */
1345 nigel 53
1346 ph10 286 #ifdef SUPPORT_LIBZ
1347     gzFile ingz = NULL;
1348     #endif
1349    
1350     #ifdef SUPPORT_LIBBZ2
1351     BZFILE *inbz2 = NULL;
1352     #endif
1353    
1354 nigel 77 /* If the file name is "-" we scan stdin */
1355 nigel 53
1356 nigel 77 if (strcmp(pathname, "-") == 0)
1357 nigel 53 {
1358 ph10 286 return pcregrep(stdin, FR_PLAIN,
1359 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1360 nigel 77 stdin_name : NULL);
1361     }
1362    
1363 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1364     each file within it, subject to any include or exclude patterns that were set.
1365     The scanning code is localized so it can be made system-specific. */
1366    
1367     if ((sep = isdirectory(pathname)) != 0)
1368 nigel 77 {
1369 nigel 87 if (dee_action == dee_SKIP) return 1;
1370     if (dee_action == dee_RECURSE)
1371 nigel 53 {
1372 nigel 87 char buffer[1024];
1373     char *nextfile;
1374     directory_type *dir = opendirectory(pathname);
1375 nigel 53
1376 nigel 87 if (dir == NULL)
1377     {
1378     if (!silent)
1379     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1380     strerror(errno));
1381     return 2;
1382     }
1383 nigel 77
1384 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1385     {
1386 ph10 324 int frc, nflen;
1387 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1388 ph10 324 nflen = strlen(nextfile);
1389    
1390     if (!isdirectory(buffer))
1391     {
1392     if (exclude_compiled != NULL &&
1393     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1394     continue;
1395    
1396     if (include_compiled != NULL &&
1397     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1398     continue;
1399     }
1400 nigel 77
1401 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1402     if (frc > 1) rc = frc;
1403     else if (frc == 0 && rc == 1) rc = 0;
1404     }
1405    
1406     closedirectory(dir);
1407     return rc;
1408 nigel 53 }
1409     }
1410    
1411 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1412     been requested. */
1413 nigel 53
1414 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1415    
1416     /* Control reaches here if we have a regular file, or if we have a directory
1417     and recursion or skipping was not requested, or if we have anything else and
1418     skipping was not requested. The scan proceeds. If this is the first and only
1419     argument at top level, we don't show the file name, unless we are only showing
1420     the file name, or the filename was forced (-H). */
1421    
1422 ph10 286 pathlen = strlen(pathname);
1423    
1424     /* Open using zlib if it is supported and the file name ends with .gz. */
1425    
1426     #ifdef SUPPORT_LIBZ
1427     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1428 nigel 53 {
1429 ph10 286 ingz = gzopen(pathname, "rb");
1430     if (ingz == NULL)
1431     {
1432     if (!silent)
1433     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1434     strerror(errno));
1435     return 2;
1436     }
1437     handle = (void *)ingz;
1438     frtype = FR_LIBZ;
1439     }
1440     else
1441     #endif
1442    
1443     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1444    
1445     #ifdef SUPPORT_LIBBZ2
1446     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1447     {
1448     inbz2 = BZ2_bzopen(pathname, "rb");
1449     handle = (void *)inbz2;
1450     frtype = FR_LIBBZ2;
1451     }
1452     else
1453     #endif
1454    
1455     /* Otherwise use plain fopen(). The label is so that we can come back here if
1456     an attempt to read a .bz2 file indicates that it really is a plain file. */
1457    
1458     #ifdef SUPPORT_LIBBZ2
1459     PLAIN_FILE:
1460     #endif
1461     {
1462     in = fopen(pathname, "r");
1463     handle = (void *)in;
1464     frtype = FR_PLAIN;
1465     }
1466    
1467     /* All the opening methods return errno when they fail. */
1468    
1469     if (handle == NULL)
1470     {
1471 nigel 77 if (!silent)
1472     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1473     strerror(errno));
1474 nigel 53 return 2;
1475     }
1476    
1477 ph10 286 /* Now grep the file */
1478    
1479     rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1480 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1481 nigel 77
1482 ph10 286 /* Close in an appropriate manner. */
1483    
1484     #ifdef SUPPORT_LIBZ
1485     if (frtype == FR_LIBZ)
1486     gzclose(ingz);
1487     else
1488     #endif
1489    
1490     /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1491     read failed. If the error indicates that the file isn't in fact bzipped, try
1492     again as a normal file. */
1493    
1494     #ifdef SUPPORT_LIBBZ2
1495     if (frtype == FR_LIBBZ2)
1496     {
1497     if (rc == 2)
1498     {
1499     int errnum;
1500     const char *err = BZ2_bzerror(inbz2, &errnum);
1501     if (errnum == BZ_DATA_ERROR_MAGIC)
1502     {
1503     BZ2_bzclose(inbz2);
1504     goto PLAIN_FILE;
1505     }
1506     else if (!silent)
1507     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1508     pathname, err);
1509     }
1510     BZ2_bzclose(inbz2);
1511     }
1512     else
1513     #endif
1514    
1515     /* Normal file close */
1516    
1517 nigel 53 fclose(in);
1518 ph10 286
1519     /* Pass back the yield from pcregrep(). */
1520    
1521 nigel 53 return rc;
1522     }
1523    
1524    
1525    
1526    
1527     /*************************************************
1528 nigel 49 * Usage function *
1529     *************************************************/
1530    
1531     static int
1532     usage(int rc)
1533     {
1534 nigel 87 option_item *op;
1535     fprintf(stderr, "Usage: pcregrep [-");
1536     for (op = optionlist; op->one_char != 0; op++)
1537     {
1538     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1539     }
1540     fprintf(stderr, "] [long options] [pattern] [files]\n");
1541 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1542     "options.\n");
1543 nigel 49 return rc;
1544     }
1545    
1546    
1547    
1548    
1549     /*************************************************
1550 nigel 53 * Help function *
1551     *************************************************/
1552    
1553     static void
1554     help(void)
1555     {
1556     option_item *op;
1557    
1558 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1559 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1560 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1561 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1562    
1563     #ifdef SUPPORT_LIBZ
1564     printf("Files whose names end in .gz are read using zlib.\n");
1565     #endif
1566    
1567     #ifdef SUPPORT_LIBBZ2
1568     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1569     #endif
1570    
1571     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1572     printf("Other files and the standard input are read as plain files.\n\n");
1573     #else
1574     printf("All files are read as plain files, without any interpretation.\n\n");
1575     #endif
1576    
1577 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1578     printf("Options:\n");
1579    
1580     for (op = optionlist; op->one_char != 0; op++)
1581     {
1582     int n;
1583     char s[4];
1584     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1585 ph10 296 n = 30 - printf(" %s --%s", s, op->long_name);
1586 nigel 53 if (n < 1) n = 1;
1587     printf("%.*s%s\n", n, " ", op->help_text);
1588     }
1589    
1590 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1591     printf("trailing white space is removed and blank lines are ignored.\n");
1592     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1593 nigel 53
1594 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1595 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1596     }
1597    
1598    
1599    
1600    
1601     /*************************************************
1602 nigel 77 * Handle a single-letter, no data option *
1603 nigel 53 *************************************************/
1604    
1605     static int
1606     handle_option(int letter, int options)
1607     {
1608     switch(letter)
1609     {
1610 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1611 nigel 87 case N_HELP: help(); exit(0);
1612 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1613 nigel 53 case 'c': count_only = TRUE; break;
1614 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1615     case 'H': filenames = FN_FORCE; break;
1616     case 'h': filenames = FN_NONE; break;
1617 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1618 nigel 87 case 'l': filenames = FN_ONLY; break;
1619     case 'L': filenames = FN_NOMATCH_ONLY; break;
1620 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1621 nigel 53 case 'n': number = TRUE; break;
1622 nigel 87 case 'o': only_matching = TRUE; break;
1623 nigel 77 case 'q': quiet = TRUE; break;
1624 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1625 nigel 53 case 's': silent = TRUE; break;
1626 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1627 nigel 53 case 'v': invert = TRUE; break;
1628 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1629     case 'x': process_options |= PO_LINE_MATCH; break;
1630 nigel 53
1631     case 'V':
1632 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1633 nigel 53 exit(0);
1634     break;
1635    
1636     default:
1637     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1638     exit(usage(2));
1639     }
1640    
1641     return options;
1642     }
1643    
1644    
1645    
1646    
1647     /*************************************************
1648 nigel 87 * Construct printed ordinal *
1649     *************************************************/
1650    
1651     /* This turns a number into "1st", "3rd", etc. */
1652    
1653     static char *
1654     ordin(int n)
1655     {
1656     static char buffer[8];
1657     char *p = buffer;
1658     sprintf(p, "%d", n);
1659     while (*p != 0) p++;
1660     switch (n%10)
1661     {
1662     case 1: strcpy(p, "st"); break;
1663     case 2: strcpy(p, "nd"); break;
1664     case 3: strcpy(p, "rd"); break;
1665     default: strcpy(p, "th"); break;
1666     }
1667     return buffer;
1668     }
1669    
1670    
1671    
1672     /*************************************************
1673     * Compile a single pattern *
1674     *************************************************/
1675    
1676     /* When the -F option has been used, this is called for each substring.
1677     Otherwise it's called for each supplied pattern.
1678    
1679     Arguments:
1680     pattern the pattern string
1681     options the PCRE options
1682     filename the file name, or NULL for a command-line pattern
1683     count 0 if this is the only command line pattern, or
1684     number of the command line pattern, or
1685     linenumber for a pattern from a file
1686    
1687     Returns: TRUE on success, FALSE after an error
1688     */
1689    
1690     static BOOL
1691     compile_single_pattern(char *pattern, int options, char *filename, int count)
1692     {
1693     char buffer[MBUFTHIRD + 16];
1694     const char *error;
1695     int errptr;
1696    
1697     if (pattern_count >= MAX_PATTERN_COUNT)
1698     {
1699     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1700     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1701     return FALSE;
1702     }
1703    
1704     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1705     suffix[process_options]);
1706     pattern_list[pattern_count] =
1707     pcre_compile(buffer, options, &error, &errptr, pcretables);
1708 ph10 142 if (pattern_list[pattern_count] != NULL)
1709 ph10 141 {
1710 ph10 142 pattern_count++;
1711 ph10 141 return TRUE;
1712 ph10 142 }
1713 nigel 87
1714     /* Handle compile errors */
1715    
1716     errptr -= (int)strlen(prefix[process_options]);
1717     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1718    
1719     if (filename == NULL)
1720     {
1721     if (count == 0)
1722     fprintf(stderr, "pcregrep: Error in command-line regex "
1723     "at offset %d: %s\n", errptr, error);
1724     else
1725     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1726     "at offset %d: %s\n", ordin(count), errptr, error);
1727     }
1728     else
1729     {
1730     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1731     "at offset %d: %s\n", count, filename, errptr, error);
1732     }
1733    
1734     return FALSE;
1735     }
1736    
1737    
1738    
1739     /*************************************************
1740     * Compile one supplied pattern *
1741     *************************************************/
1742    
1743     /* When the -F option has been used, each string may be a list of strings,
1744 nigel 91 separated by line breaks. They will be matched literally.
1745 nigel 87
1746     Arguments:
1747     pattern the pattern string
1748     options the PCRE options
1749     filename the file name, or NULL for a command-line pattern
1750     count 0 if this is the only command line pattern, or
1751     number of the command line pattern, or
1752     linenumber for a pattern from a file
1753    
1754     Returns: TRUE on success, FALSE after an error
1755     */
1756    
1757     static BOOL
1758     compile_pattern(char *pattern, int options, char *filename, int count)
1759     {
1760     if ((process_options & PO_FIXED_STRINGS) != 0)
1761     {
1762 nigel 93 char *eop = pattern + strlen(pattern);
1763 nigel 87 char buffer[MBUFTHIRD];
1764     for(;;)
1765     {
1766 nigel 93 int ellength;
1767     char *p = end_of_line(pattern, eop, &ellength);
1768     if (ellength == 0)
1769 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1770 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1771 nigel 93 pattern = p;
1772 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1773     return FALSE;
1774     }
1775     }
1776     else return compile_single_pattern(pattern, options, filename, count);
1777     }
1778    
1779    
1780    
1781     /*************************************************
1782 nigel 49 * Main program *
1783     *************************************************/
1784    
1785 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1786    
1787 nigel 49 int
1788     main(int argc, char **argv)
1789     {
1790 nigel 53 int i, j;
1791 nigel 49 int rc = 1;
1792 nigel 87 int pcre_options = 0;
1793     int cmd_pattern_count = 0;
1794 ph10 141 int hint_count = 0;
1795 nigel 49 int errptr;
1796 nigel 87 BOOL only_one_at_top;
1797     char *patterns[MAX_PATTERN_COUNT];
1798     const char *locale_from = "--locale";
1799 nigel 49 const char *error;
1800    
1801 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1802     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1803     */
1804 nigel 91
1805     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1806     switch(i)
1807     {
1808     default: newline = (char *)"lf"; break;
1809     case '\r': newline = (char *)"cr"; break;
1810     case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1811 nigel 93 case -1: newline = (char *)"any"; break;
1812 ph10 150 case -2: newline = (char *)"anycrlf"; break;
1813 nigel 91 }
1814    
1815 nigel 49 /* Process the options */
1816    
1817     for (i = 1; i < argc; i++)
1818     {
1819 nigel 77 option_item *op = NULL;
1820     char *option_data = (char *)""; /* default to keep compiler happy */
1821     BOOL longop;
1822     BOOL longopwasequals = FALSE;
1823    
1824 nigel 49 if (argv[i][0] != '-') break;
1825 nigel 53
1826 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1827 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1828 nigel 63
1829 nigel 77 if (argv[i][1] == 0)
1830     {
1831 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1832 nigel 77 else exit(usage(2));
1833     }
1834 nigel 63
1835 nigel 77 /* Handle a long name option, or -- to terminate the options */
1836 nigel 53
1837     if (argv[i][1] == '-')
1838 nigel 49 {
1839 nigel 77 char *arg = argv[i] + 2;
1840     char *argequals = strchr(arg, '=');
1841 nigel 53
1842 nigel 77 if (*arg == 0) /* -- terminates options */
1843 nigel 49 {
1844 nigel 77 i++;
1845     break; /* out of the options-handling loop */
1846 nigel 53 }
1847 nigel 49
1848 nigel 77 longop = TRUE;
1849    
1850     /* Some long options have data that follows after =, for example file=name.
1851     Some options have variations in the long name spelling: specifically, we
1852     allow "regexp" because GNU grep allows it, though I personally go along
1853 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1854     These options are entered in the table as "regex(p)". No option is in both
1855     these categories, fortunately. */
1856 nigel 77
1857 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1858     {
1859 nigel 77 char *opbra = strchr(op->long_name, '(');
1860     char *equals = strchr(op->long_name, '=');
1861     if (opbra == NULL) /* Not a (p) case */
1862 nigel 53 {
1863 nigel 77 if (equals == NULL) /* Not thing=data case */
1864     {
1865     if (strcmp(arg, op->long_name) == 0) break;
1866     }
1867     else /* Special case xxx=data */
1868     {
1869     int oplen = equals - op->long_name;
1870 ph10 199 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1871 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1872     {
1873     option_data = arg + arglen;
1874     if (*option_data == '=')
1875     {
1876     option_data++;
1877     longopwasequals = TRUE;
1878     }
1879     break;
1880     }
1881     }
1882 nigel 53 }
1883 nigel 77 else /* Special case xxxx(p) */
1884     {
1885     char buff1[24];
1886     char buff2[24];
1887     int baselen = opbra - op->long_name;
1888     sprintf(buff1, "%.*s", baselen, op->long_name);
1889 ph10 152 sprintf(buff2, "%s%.*s", buff1,
1890 ph10 151 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1891 nigel 77 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1892     break;
1893     }
1894 nigel 53 }
1895 nigel 77
1896 nigel 53 if (op->one_char == 0)
1897     {
1898     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1899     exit(usage(2));
1900     }
1901     }
1902 nigel 49
1903 nigel 89
1904     /* Jeffrey Friedl's debugging harness uses these additional options which
1905     are not in the right form for putting in the option table because they use
1906     only one hyphen, yet are more than one character long. By putting them
1907     separately here, they will not get displayed as part of the help() output,
1908     but I don't think Jeffrey will care about that. */
1909    
1910     #ifdef JFRIEDL_DEBUG
1911     else if (strcmp(argv[i], "-pre") == 0) {
1912     jfriedl_prefix = argv[++i];
1913     continue;
1914     } else if (strcmp(argv[i], "-post") == 0) {
1915     jfriedl_postfix = argv[++i];
1916     continue;
1917     } else if (strcmp(argv[i], "-XT") == 0) {
1918     sscanf(argv[++i], "%d", &jfriedl_XT);
1919     continue;
1920     } else if (strcmp(argv[i], "-XR") == 0) {
1921     sscanf(argv[++i], "%d", &jfriedl_XR);
1922     continue;
1923     }
1924     #endif
1925    
1926    
1927 nigel 77 /* One-char options; many that have no data may be in a single argument; we
1928     continue till we hit the last one or one that needs data. */
1929 nigel 53
1930     else
1931     {
1932     char *s = argv[i] + 1;
1933 nigel 77 longop = FALSE;
1934 nigel 53 while (*s != 0)
1935     {
1936 nigel 77 for (op = optionlist; op->one_char != 0; op++)
1937     { if (*s == op->one_char) break; }
1938     if (op->one_char == 0)
1939 nigel 53 {
1940 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1941     *s, argv[i]);
1942     exit(usage(2));
1943     }
1944     if (op->type != OP_NODATA || s[1] == 0)
1945     {
1946     option_data = s+1;
1947 nigel 53 break;
1948     }
1949 nigel 87 pcre_options = handle_option(*s++, pcre_options);
1950 nigel 49 }
1951     }
1952 nigel 77
1953 nigel 87 /* At this point we should have op pointing to a matched option. If the type
1954     is NO_DATA, it means that there is no data, and the option might set
1955     something in the PCRE options. */
1956 nigel 77
1957     if (op->type == OP_NODATA)
1958     {
1959 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
1960     continue;
1961     }
1962    
1963     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1964     either has a value or defaults to something. It cannot have data in a
1965     separate item. At the moment, the only such options are "colo(u)r" and
1966 nigel 89 Jeffrey Friedl's special -S debugging option. */
1967 nigel 87
1968     if (*option_data == 0 &&
1969     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1970     {
1971     switch (op->one_char)
1972 nigel 77 {
1973 nigel 87 case N_COLOUR:
1974     colour_option = (char *)"auto";
1975     break;
1976     #ifdef JFRIEDL_DEBUG
1977     case 'S':
1978     S_arg = 0;
1979     break;
1980     #endif
1981 nigel 77 }
1982 nigel 87 continue;
1983     }
1984 nigel 77
1985 nigel 87 /* Otherwise, find the data string for the option. */
1986    
1987     if (*option_data == 0)
1988     {
1989     if (i >= argc - 1 || longopwasequals)
1990 nigel 77 {
1991 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1992     exit(usage(2));
1993     }
1994     option_data = argv[++i];
1995     }
1996    
1997     /* If the option type is OP_PATLIST, it's the -e option, which can be called
1998     multiple times to create a list of patterns. */
1999    
2000     if (op->type == OP_PATLIST)
2001     {
2002     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2003     {
2004     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2005     MAX_PATTERN_COUNT);
2006     return 2;
2007     }
2008     patterns[cmd_pattern_count++] = option_data;
2009     }
2010    
2011     /* Otherwise, deal with single string or numeric data values. */
2012    
2013     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2014     {
2015     *((char **)op->dataptr) = option_data;
2016     }
2017     else
2018     {
2019     char *endptr;
2020     int n = strtoul(option_data, &endptr, 10);
2021     if (*endptr != 0)
2022     {
2023     if (longop)
2024 nigel 77 {
2025 nigel 87 char *equals = strchr(op->long_name, '=');
2026     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2027     equals - op->long_name;
2028     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2029     option_data, nlen, op->long_name);
2030 nigel 77 }
2031 nigel 87 else
2032     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2033     option_data, op->one_char);
2034     exit(usage(2));
2035 nigel 77 }
2036 nigel 87 *((int *)op->dataptr) = n;
2037 nigel 77 }
2038 nigel 49 }
2039    
2040 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2041     for -A and -B. */
2042    
2043     if (both_context > 0)
2044     {
2045     if (after_context == 0) after_context = both_context;
2046     if (before_context == 0) before_context = both_context;
2047     }
2048 ph10 286
2049     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2050 ph10 280 However, the latter two set the only_matching flag. */
2051 nigel 77
2052 ph10 280 if ((only_matching && (file_offsets || line_offsets)) ||
2053 ph10 286 (file_offsets && line_offsets))
2054 ph10 280 {
2055     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2056     "and/or --line-offsets\n");
2057     exit(usage(2));
2058     }
2059    
2060 ph10 286 if (file_offsets || line_offsets) only_matching = TRUE;
2061    
2062 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2063     LC_ALL environment variable is set, and if so, use it. */
2064 nigel 49
2065 nigel 87 if (locale == NULL)
2066 nigel 53 {
2067 nigel 87 locale = getenv("LC_ALL");
2068     locale_from = "LCC_ALL";
2069 nigel 53 }
2070 nigel 49
2071 nigel 87 if (locale == NULL)
2072     {
2073     locale = getenv("LC_CTYPE");
2074     locale_from = "LC_CTYPE";
2075     }
2076 nigel 49
2077 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2078     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2079    
2080     if (locale != NULL)
2081 nigel 49 {
2082 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2083 nigel 53 {
2084 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2085     locale, locale_from);
2086 nigel 53 return 2;
2087     }
2088 nigel 87 pcretables = pcre_maketables();
2089     }
2090 nigel 77
2091 nigel 87 /* Sort out colouring */
2092    
2093     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2094     {
2095     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2096     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2097     else
2098 nigel 53 {
2099 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2100     colour_option);
2101     return 2;
2102 nigel 77 }
2103 nigel 87 if (do_colour)
2104 nigel 77 {
2105 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2106     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2107     if (cs != NULL) colour_string = cs;
2108 nigel 77 }
2109 nigel 87 }
2110 nigel 77
2111 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2112    
2113     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2114     {
2115     pcre_options |= PCRE_NEWLINE_CR;
2116 nigel 93 endlinetype = EL_CR;
2117 nigel 91 }
2118     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2119     {
2120     pcre_options |= PCRE_NEWLINE_LF;
2121 nigel 93 endlinetype = EL_LF;
2122 nigel 91 }
2123     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2124     {
2125     pcre_options |= PCRE_NEWLINE_CRLF;
2126 nigel 93 endlinetype = EL_CRLF;
2127 nigel 91 }
2128 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2129     {
2130     pcre_options |= PCRE_NEWLINE_ANY;
2131     endlinetype = EL_ANY;
2132     }
2133 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2134     {
2135     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2136     endlinetype = EL_ANYCRLF;
2137     }
2138 nigel 91 else
2139     {
2140     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2141     return 2;
2142     }
2143    
2144 nigel 87 /* Interpret the text values for -d and -D */
2145    
2146     if (dee_option != NULL)
2147     {
2148     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2149     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2150     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2151     else
2152 nigel 77 {
2153 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2154     return 2;
2155 nigel 53 }
2156 nigel 49 }
2157    
2158 nigel 87 if (DEE_option != NULL)
2159     {
2160     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2161     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2162     else
2163     {
2164     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2165     return 2;
2166     }
2167     }
2168 nigel 49
2169 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2170 nigel 87
2171     #ifdef JFRIEDL_DEBUG
2172     if (S_arg > 9)
2173 nigel 49 {
2174 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2175     return 2;
2176     }
2177 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2178     {
2179     if (jfriedl_XT == 0) jfriedl_XT = 1;
2180     if (jfriedl_XR == 0) jfriedl_XR = 1;
2181     }
2182 nigel 87 #endif
2183 nigel 77
2184 nigel 87 /* Get memory to store the pattern and hints lists. */
2185    
2186     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2187     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2188    
2189     if (pattern_list == NULL || hints_list == NULL)
2190     {
2191     fprintf(stderr, "pcregrep: malloc failed\n");
2192 ph10 123 goto EXIT2;
2193 nigel 87 }
2194    
2195     /* If no patterns were provided by -e, and there is no file provided by -f,
2196     the first argument is the one and only pattern, and it must exist. */
2197    
2198     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2199     {
2200 nigel 63 if (i >= argc) return usage(2);
2201 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2202     }
2203 nigel 77
2204 nigel 87 /* Compile the patterns that were provided on the command line, either by
2205     multiple uses of -e or as a single unkeyed pattern. */
2206    
2207     for (j = 0; j < cmd_pattern_count; j++)
2208     {
2209     if (!compile_pattern(patterns[j], pcre_options, NULL,
2210     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2211 ph10 123 goto EXIT2;
2212 nigel 87 }
2213    
2214     /* Compile the regular expressions that are provided in a file. */
2215    
2216     if (pattern_filename != NULL)
2217     {
2218     int linenumber = 0;
2219     FILE *f;
2220     char *filename;
2221     char buffer[MBUFTHIRD];
2222    
2223     if (strcmp(pattern_filename, "-") == 0)
2224 nigel 77 {
2225 nigel 87 f = stdin;
2226     filename = stdin_name;
2227 nigel 77 }
2228 nigel 87 else
2229 nigel 77 {
2230 nigel 87 f = fopen(pattern_filename, "r");
2231     if (f == NULL)
2232     {
2233     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2234     strerror(errno));
2235 ph10 123 goto EXIT2;
2236 nigel 87 }
2237     filename = pattern_filename;
2238 nigel 77 }
2239    
2240 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2241 nigel 53 {
2242 nigel 87 char *s = buffer + (int)strlen(buffer);
2243     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2244     *s = 0;
2245     linenumber++;
2246     if (buffer[0] == 0) continue; /* Skip blank lines */
2247     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2248 ph10 121 goto EXIT2;
2249 nigel 53 }
2250 nigel 87
2251     if (f != stdin) fclose(f);
2252 nigel 49 }
2253    
2254 nigel 77 /* Study the regular expressions, as we will be running them many times */
2255 nigel 53
2256     for (j = 0; j < pattern_count; j++)
2257     {
2258     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2259     if (error != NULL)
2260     {
2261     char s[16];
2262     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2263     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2264 ph10 121 goto EXIT2;
2265 nigel 53 }
2266 ph10 142 hint_count++;
2267 nigel 53 }
2268    
2269 nigel 77 /* If there are include or exclude patterns, compile them. */
2270    
2271     if (exclude_pattern != NULL)
2272     {
2273 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2274     pcretables);
2275 nigel 77 if (exclude_compiled == NULL)
2276     {
2277     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2278     errptr, error);
2279 ph10 121 goto EXIT2;
2280 nigel 77 }
2281     }
2282    
2283     if (include_pattern != NULL)
2284     {
2285 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2286     pcretables);
2287 nigel 77 if (include_compiled == NULL)
2288     {
2289     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2290     errptr, error);
2291 ph10 121 goto EXIT2;
2292 nigel 77 }
2293     }
2294    
2295 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2296 nigel 49
2297 nigel 87 if (i >= argc)
2298 ph10 121 {
2299 ph10 286 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2300 ph10 121 goto EXIT;
2301 ph10 123 }
2302 nigel 49
2303 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2304     Pass in the fact that there is only one argument at top level - this suppresses
2305 nigel 87 the file name if the argument is not a directory and filenames are not
2306     otherwise forced. */
2307 nigel 49
2308 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2309 nigel 49
2310     for (; i < argc; i++)
2311     {
2312 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2313     only_one_at_top);
2314 nigel 77 if (frc > 1) rc = frc;
2315     else if (frc == 0 && rc == 1) rc = 0;
2316 nigel 49 }
2317    
2318 ph10 121 EXIT:
2319     if (pattern_list != NULL)
2320     {
2321 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2322 ph10 121 free(pattern_list);
2323 ph10 123 }
2324 ph10 121 if (hints_list != NULL)
2325     {
2326 ph10 141 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2327 ph10 121 free(hints_list);
2328 ph10 123 }
2329 nigel 49 return rc;
2330 ph10 121
2331     EXIT2:
2332     rc = 2;
2333     goto EXIT;
2334 nigel 49 }
2335    
2336 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12