/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 561 - (hide annotations) (download)
Sat Oct 30 18:37:47 2010 UTC (2 years, 6 months ago) by ph10
File MIME type: text/plain
File size: 74526 byte(s)
Added --match-limit and --recursion-limit to pcregrep; tidied some error 
messages.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 515 Copyright (c) 1997-2010 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 ph10 378 #define OFFSET_SIZE 99
75 nigel 49
76 nigel 77 #if BUFSIZ > 8192
77     #define MBUFTHIRD BUFSIZ
78     #else
79     #define MBUFTHIRD 8192
80     #endif
81 nigel 49
82 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
83     output. The order is important; it is assumed that a file name is wanted for
84     all values greater than FN_DEFAULT. */
85 nigel 77
86 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87 nigel 87
88 ph10 286 /* File reading styles */
89    
90     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92 nigel 87 /* Actions for the -d and -D options */
93    
94     enum { dee_READ, dee_SKIP, dee_RECURSE };
95     enum { DEE_READ, DEE_SKIP };
96    
97     /* Actions for special processing options (flag bits) */
98    
99     #define PO_WORD_MATCH 0x0001
100     #define PO_LINE_MATCH 0x0002
101     #define PO_FIXED_STRINGS 0x0004
102    
103 nigel 93 /* Line ending types */
104 nigel 87
105 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 nigel 87
107 ph10 535 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108     environments), a warning is issued if the value of fwrite() is ignored.
109     Unfortunately, casting to (void) does not suppress the warning. To get round
110     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 ph10 515 apply to fprintf(). */
112 nigel 93
113 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114 nigel 93
115 ph10 515
116    
117 nigel 49 /*************************************************
118     * Global variables *
119     *************************************************/
120    
121 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
122     regular code. */
123    
124     #ifdef JFRIEDL_DEBUG
125     static int S_arg = -1;
126 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128     static const char *jfriedl_prefix = "";
129     static const char *jfriedl_postfix = "";
130 nigel 87 #endif
131    
132 nigel 93 static int endlinetype;
133 nigel 91
134 nigel 87 static char *colour_string = (char *)"1;31";
135     static char *colour_option = NULL;
136     static char *dee_option = NULL;
137     static char *DEE_option = NULL;
138 nigel 91 static char *newline = NULL;
139 nigel 53 static char *pattern_filename = NULL;
140 nigel 77 static char *stdin_name = (char *)"(standard input)";
141 nigel 87 static char *locale = NULL;
142    
143     static const unsigned char *pcretables = NULL;
144    
145 nigel 53 static int pattern_count = 0;
146 ph10 121 static pcre **pattern_list = NULL;
147     static pcre_extra **hints_list = NULL;
148 nigel 49
149 nigel 77 static char *include_pattern = NULL;
150     static char *exclude_pattern = NULL;
151 ph10 325 static char *include_dir_pattern = NULL;
152     static char *exclude_dir_pattern = NULL;
153 nigel 77
154     static pcre *include_compiled = NULL;
155     static pcre *exclude_compiled = NULL;
156 ph10 325 static pcre *include_dir_compiled = NULL;
157     static pcre *exclude_dir_compiled = NULL;
158 nigel 77
159     static int after_context = 0;
160     static int before_context = 0;
161     static int both_context = 0;
162 nigel 87 static int dee_action = dee_READ;
163     static int DEE_action = DEE_READ;
164     static int error_count = 0;
165     static int filenames = FN_DEFAULT;
166     static int process_options = 0;
167 nigel 77
168 ph10 561 static unsigned long int match_limit = 0;
169     static unsigned long int match_limit_recursion = 0;
170    
171 nigel 49 static BOOL count_only = FALSE;
172 nigel 87 static BOOL do_colour = FALSE;
173 ph10 280 static BOOL file_offsets = FALSE;
174 nigel 77 static BOOL hyphenpending = FALSE;
175 nigel 49 static BOOL invert = FALSE;
176 ph10 519 static BOOL line_buffered = FALSE;
177 ph10 280 static BOOL line_offsets = FALSE;
178 nigel 77 static BOOL multiline = FALSE;
179 nigel 49 static BOOL number = FALSE;
180 ph10 420 static BOOL omit_zero_count = FALSE;
181 nigel 87 static BOOL only_matching = FALSE;
182 ph10 561 static BOOL resource_error = FALSE;
183 nigel 77 static BOOL quiet = FALSE;
184 nigel 49 static BOOL silent = FALSE;
185 nigel 93 static BOOL utf8 = FALSE;
186 nigel 49
187 nigel 53 /* Structure for options and list of them */
188 nigel 49
189 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
190     OP_PATLIST };
191 nigel 77
192 nigel 53 typedef struct option_item {
193 nigel 77 int type;
194 nigel 53 int one_char;
195 nigel 77 void *dataptr;
196 nigel 67 const char *long_name;
197     const char *help_text;
198 nigel 53 } option_item;
199 nigel 49
200 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
201     used to identify them. */
202    
203 ph10 325 #define N_COLOUR (-1)
204     #define N_EXCLUDE (-2)
205     #define N_EXCLUDE_DIR (-3)
206     #define N_HELP (-4)
207     #define N_INCLUDE (-5)
208     #define N_INCLUDE_DIR (-6)
209     #define N_LABEL (-7)
210     #define N_LOCALE (-8)
211     #define N_NULL (-9)
212     #define N_LOFFSETS (-10)
213     #define N_FOFFSETS (-11)
214 ph10 519 #define N_LBUFFER (-12)
215 ph10 561 #define N_M_LIMIT (-13)
216     #define N_M_LIMIT_REC (-14)
217 nigel 87
218 nigel 53 static option_item optionlist[] = {
219 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
220     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
221     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
222     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
223     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
224 ph10 561 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
225 nigel 87 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
226     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
227     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
228     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
229 ph10 422 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
230 ph10 421 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
231 nigel 87 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
232 ph10 280 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
233 nigel 87 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
234     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
235     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
236     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
237     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
238     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
239 ph10 519 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
240 ph10 280 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
241 nigel 87 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
242 ph10 561 { OP_NUMBER, N_M_LIMIT,&match_limit, "match-limit=number", "set PCRE match limit option" },
243     { OP_NUMBER, N_M_LIMIT_REC,&match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244 nigel 87 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
245 ph10 280 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
247     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
248     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
249     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
250     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
251     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
252 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
253     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
254 nigel 87 #ifdef JFRIEDL_DEBUG
255     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
256     #endif
257     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
258     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
259     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
260     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
261     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
262     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
263     { OP_NODATA, 0, NULL, NULL, NULL }
264 nigel 53 };
265    
266 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
267     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
268     that the combination of -w and -x has the same effect as -x on its own, so we
269     can treat them as the same. */
270 nigel 53
271 nigel 87 static const char *prefix[] = {
272     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
273    
274     static const char *suffix[] = {
275     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
276    
277 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
278 nigel 87
279 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
280 nigel 87
281 nigel 93 const char utf8_table4[] = {
282     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
283     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
284     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
285     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
286    
287    
288    
289 nigel 53 /*************************************************
290 nigel 87 * OS-specific functions *
291 nigel 53 *************************************************/
292    
293     /* These functions are defined so that they can be made system specific,
294 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
295 nigel 53
296    
297     /************* Directory scanning in Unix ***********/
298    
299 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
300 nigel 53 #include <sys/types.h>
301     #include <sys/stat.h>
302     #include <dirent.h>
303    
304     typedef DIR directory_type;
305    
306 nigel 67 static int
307 nigel 53 isdirectory(char *filename)
308     {
309     struct stat statbuf;
310     if (stat(filename, &statbuf) < 0)
311     return 0; /* In the expectation that opening as a file will fail */
312     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
313     }
314    
315 nigel 67 static directory_type *
316 nigel 53 opendirectory(char *filename)
317     {
318     return opendir(filename);
319     }
320    
321 nigel 67 static char *
322 nigel 53 readdirectory(directory_type *dir)
323     {
324     for (;;)
325     {
326     struct dirent *dent = readdir(dir);
327     if (dent == NULL) return NULL;
328     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
329     return dent->d_name;
330     }
331 ph10 151 /* Control never reaches here */
332 nigel 53 }
333    
334 nigel 67 static void
335 nigel 53 closedirectory(directory_type *dir)
336     {
337     closedir(dir);
338     }
339    
340    
341 nigel 87 /************* Test for regular file in Unix **********/
342    
343     static int
344     isregfile(char *filename)
345     {
346     struct stat statbuf;
347     if (stat(filename, &statbuf) < 0)
348     return 1; /* In the expectation that opening as a file will fail */
349     return (statbuf.st_mode & S_IFMT) == S_IFREG;
350     }
351    
352    
353 ph10 519 /************* Test for a terminal in Unix **********/
354 nigel 87
355     static BOOL
356     is_stdout_tty(void)
357     {
358     return isatty(fileno(stdout));
359     }
360    
361 ph10 519 static BOOL
362     is_file_tty(FILE *f)
363     {
364     return isatty(fileno(f));
365     }
366 nigel 87
367 ph10 519
368 nigel 63 /************* Directory scanning in Win32 ***********/
369 nigel 53
370 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
371 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
372 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
373     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
374 ph10 558 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
375     undefined when it is indeed undefined. */
376 nigel 53
377 ph10 558 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
378 nigel 63
379     #ifndef STRICT
380     # define STRICT
381     #endif
382     #ifndef WIN32_LEAN_AND_MEAN
383     # define WIN32_LEAN_AND_MEAN
384     #endif
385 ph10 283
386     #include <windows.h>
387    
388 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
389     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
390     #endif
391    
392 nigel 63 typedef struct directory_type
393     {
394     HANDLE handle;
395     BOOL first;
396     WIN32_FIND_DATA data;
397     } directory_type;
398    
399     int
400     isdirectory(char *filename)
401     {
402     DWORD attr = GetFileAttributes(filename);
403     if (attr == INVALID_FILE_ATTRIBUTES)
404     return 0;
405     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
406     }
407    
408     directory_type *
409     opendirectory(char *filename)
410     {
411     size_t len;
412     char *pattern;
413     directory_type *dir;
414     DWORD err;
415     len = strlen(filename);
416     pattern = (char *) malloc(len + 3);
417     dir = (directory_type *) malloc(sizeof(*dir));
418     if ((pattern == NULL) || (dir == NULL))
419     {
420     fprintf(stderr, "pcregrep: malloc failed\n");
421 ph10 561 pcregrep_exit(2);
422 nigel 63 }
423     memcpy(pattern, filename, len);
424     memcpy(&(pattern[len]), "\\*", 3);
425     dir->handle = FindFirstFile(pattern, &(dir->data));
426     if (dir->handle != INVALID_HANDLE_VALUE)
427     {
428     free(pattern);
429     dir->first = TRUE;
430     return dir;
431     }
432     err = GetLastError();
433     free(pattern);
434     free(dir);
435     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
436     return NULL;
437     }
438    
439     char *
440     readdirectory(directory_type *dir)
441     {
442     for (;;)
443     {
444     if (!dir->first)
445     {
446     if (!FindNextFile(dir->handle, &(dir->data)))
447     return NULL;
448     }
449     else
450     {
451     dir->first = FALSE;
452     }
453     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
454     return dir->data.cFileName;
455     }
456     #ifndef _MSC_VER
457     return NULL; /* Keep compiler happy; never executed */
458     #endif
459     }
460    
461     void
462     closedirectory(directory_type *dir)
463     {
464     FindClose(dir->handle);
465     free(dir);
466     }
467    
468    
469 nigel 87 /************* Test for regular file in Win32 **********/
470    
471     /* I don't know how to do this, or if it can be done; assume all paths are
472     regular if they are not directories. */
473    
474     int isregfile(char *filename)
475     {
476 ph10 283 return !isdirectory(filename);
477 nigel 87 }
478    
479    
480 ph10 519 /************* Test for a terminal in Win32 **********/
481 nigel 87
482     /* I don't know how to do this; assume never */
483    
484     static BOOL
485     is_stdout_tty(void)
486     {
487 ph10 283 return FALSE;
488 nigel 87 }
489    
490 ph10 519 static BOOL
491     is_file_tty(FILE *f)
492     {
493     return FALSE;
494     }
495 nigel 87
496 ph10 519
497 nigel 53 /************* Directory scanning when we can't do it ***********/
498    
499     /* The type is void, and apart from isdirectory(), the functions do nothing. */
500    
501 nigel 63 #else
502    
503 nigel 53 typedef void directory_type;
504    
505 nigel 87 int isdirectory(char *filename) { return 0; }
506 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
507     char *readdirectory(directory_type *dir) { return (char*)0;}
508 nigel 53 void closedirectory(directory_type *dir) {}
509    
510 nigel 87
511     /************* Test for regular when we can't do it **********/
512    
513     /* Assume all files are regular. */
514    
515     int isregfile(char *filename) { return 1; }
516    
517    
518 ph10 519 /************* Test for a terminal when we can't do it **********/
519 nigel 87
520     static BOOL
521     is_stdout_tty(void)
522     {
523     return FALSE;
524     }
525    
526 ph10 519 static BOOL
527     is_file_tty(FILE *f)
528     {
529     return FALSE;
530     }
531 nigel 87
532 nigel 53 #endif
533    
534    
535    
536 ph10 137 #ifndef HAVE_STRERROR
537 nigel 49 /*************************************************
538     * Provide strerror() for non-ANSI libraries *
539     *************************************************/
540    
541     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
542     in their libraries, but can provide the same facility by this simple
543     alternative function. */
544    
545     extern int sys_nerr;
546     extern char *sys_errlist[];
547    
548     char *
549     strerror(int n)
550     {
551     if (n < 0 || n >= sys_nerr) return "unknown error number";
552     return sys_errlist[n];
553     }
554     #endif /* HAVE_STRERROR */
555    
556    
557    
558     /*************************************************
559 ph10 561 * Exit from the program *
560     *************************************************/
561    
562     /* If there has been a resource error, give a suitable message.
563    
564     Argument: the return code
565     Returns: does not return
566     */
567    
568     static void
569     pcregrep_exit(int rc)
570     {
571     if (resource_error)
572     {
573     fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
574     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
575     fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
576     }
577    
578     exit(rc);
579     }
580    
581    
582    
583     /*************************************************
584 ph10 519 * Read one line of input *
585     *************************************************/
586    
587 ph10 535 /* Normally, input is read using fread() into a large buffer, so many lines may
588     be read at once. However, doing this for tty input means that no output appears
589 ph10 519 until a lot of input has been typed. Instead, tty input is handled line by
590     line. We cannot use fgets() for this, because it does not stop at a binary
591 ph10 535 zero, and therefore there is no way of telling how many characters it has read,
592 ph10 519 because there may be binary zeros embedded in the data.
593    
594     Arguments:
595     buffer the buffer to read into
596     length the maximum number of characters to read
597     f the file
598 ph10 535
599 ph10 519 Returns: the number of characters read, zero at end of file
600 ph10 535 */
601 ph10 519
602     static int
603     read_one_line(char *buffer, int length, FILE *f)
604     {
605     int c;
606     int yield = 0;
607     while ((c = fgetc(f)) != EOF)
608     {
609     buffer[yield++] = c;
610 ph10 535 if (c == '\n' || yield >= length) break;
611     }
612     return yield;
613 ph10 519 }
614    
615    
616    
617     /*************************************************
618 nigel 93 * Find end of line *
619     *************************************************/
620    
621     /* The length of the endline sequence that is found is set via lenptr. This may
622     be zero at the very end of the file if there is no line-ending sequence there.
623    
624     Arguments:
625     p current position in line
626     endptr end of available data
627     lenptr where to put the length of the eol sequence
628    
629     Returns: pointer to the last byte of the line
630     */
631    
632     static char *
633     end_of_line(char *p, char *endptr, int *lenptr)
634     {
635     switch(endlinetype)
636     {
637     default: /* Just in case */
638     case EL_LF:
639     while (p < endptr && *p != '\n') p++;
640     if (p < endptr)
641     {
642     *lenptr = 1;
643     return p + 1;
644     }
645     *lenptr = 0;
646     return endptr;
647    
648     case EL_CR:
649     while (p < endptr && *p != '\r') p++;
650     if (p < endptr)
651     {
652     *lenptr = 1;
653     return p + 1;
654     }
655     *lenptr = 0;
656     return endptr;
657    
658     case EL_CRLF:
659     for (;;)
660     {
661     while (p < endptr && *p != '\r') p++;
662     if (++p >= endptr)
663     {
664     *lenptr = 0;
665     return endptr;
666     }
667     if (*p == '\n')
668     {
669     *lenptr = 2;
670     return p + 1;
671     }
672     }
673     break;
674    
675 ph10 149 case EL_ANYCRLF:
676     while (p < endptr)
677     {
678     int extra = 0;
679     register int c = *((unsigned char *)p);
680    
681     if (utf8 && c >= 0xc0)
682     {
683     int gcii, gcss;
684     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
685     gcss = 6*extra;
686     c = (c & utf8_table3[extra]) << gcss;
687     for (gcii = 1; gcii <= extra; gcii++)
688     {
689     gcss -= 6;
690     c |= (p[gcii] & 0x3f) << gcss;
691     }
692     }
693    
694     p += 1 + extra;
695    
696     switch (c)
697     {
698     case 0x0a: /* LF */
699     *lenptr = 1;
700     return p;
701    
702     case 0x0d: /* CR */
703     if (p < endptr && *p == 0x0a)
704     {
705     *lenptr = 2;
706     p++;
707     }
708     else *lenptr = 1;
709     return p;
710 ph10 150
711 ph10 149 default:
712     break;
713     }
714     } /* End of loop for ANYCRLF case */
715 ph10 150
716 ph10 149 *lenptr = 0; /* Must have hit the end */
717     return endptr;
718    
719 nigel 93 case EL_ANY:
720     while (p < endptr)
721     {
722     int extra = 0;
723     register int c = *((unsigned char *)p);
724    
725     if (utf8 && c >= 0xc0)
726     {
727     int gcii, gcss;
728     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
729     gcss = 6*extra;
730     c = (c & utf8_table3[extra]) << gcss;
731     for (gcii = 1; gcii <= extra; gcii++)
732     {
733     gcss -= 6;
734     c |= (p[gcii] & 0x3f) << gcss;
735     }
736     }
737    
738     p += 1 + extra;
739    
740     switch (c)
741     {
742     case 0x0a: /* LF */
743     case 0x0b: /* VT */
744     case 0x0c: /* FF */
745     *lenptr = 1;
746     return p;
747    
748     case 0x0d: /* CR */
749     if (p < endptr && *p == 0x0a)
750     {
751     *lenptr = 2;
752     p++;
753     }
754     else *lenptr = 1;
755     return p;
756    
757     case 0x85: /* NEL */
758     *lenptr = utf8? 2 : 1;
759     return p;
760    
761     case 0x2028: /* LS */
762     case 0x2029: /* PS */
763     *lenptr = 3;
764     return p;
765    
766     default:
767     break;
768     }
769     } /* End of loop for ANY case */
770    
771     *lenptr = 0; /* Must have hit the end */
772     return endptr;
773     } /* End of overall switch */
774     }
775    
776    
777    
778     /*************************************************
779     * Find start of previous line *
780     *************************************************/
781    
782     /* This is called when looking back for before lines to print.
783    
784     Arguments:
785     p start of the subsequent line
786     startptr start of available data
787    
788     Returns: pointer to the start of the previous line
789     */
790    
791     static char *
792     previous_line(char *p, char *startptr)
793     {
794     switch(endlinetype)
795     {
796     default: /* Just in case */
797     case EL_LF:
798     p--;
799     while (p > startptr && p[-1] != '\n') p--;
800     return p;
801    
802     case EL_CR:
803     p--;
804     while (p > startptr && p[-1] != '\n') p--;
805     return p;
806    
807     case EL_CRLF:
808     for (;;)
809     {
810     p -= 2;
811     while (p > startptr && p[-1] != '\n') p--;
812     if (p <= startptr + 1 || p[-2] == '\r') return p;
813     }
814     return p; /* But control should never get here */
815    
816     case EL_ANY:
817 ph10 150 case EL_ANYCRLF:
818 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
819     if (utf8) while ((*p & 0xc0) == 0x80) p--;
820    
821     while (p > startptr)
822     {
823     register int c;
824     char *pp = p - 1;
825    
826     if (utf8)
827     {
828     int extra = 0;
829     while ((*pp & 0xc0) == 0x80) pp--;
830     c = *((unsigned char *)pp);
831     if (c >= 0xc0)
832     {
833     int gcii, gcss;
834     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
835     gcss = 6*extra;
836     c = (c & utf8_table3[extra]) << gcss;
837     for (gcii = 1; gcii <= extra; gcii++)
838     {
839     gcss -= 6;
840     c |= (pp[gcii] & 0x3f) << gcss;
841     }
842     }
843     }
844     else c = *((unsigned char *)pp);
845    
846 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
847 nigel 93 {
848     case 0x0a: /* LF */
849 ph10 149 case 0x0d: /* CR */
850     return p;
851 ph10 150
852 ph10 149 default:
853     break;
854 ph10 150 }
855 ph10 149
856     else switch (c)
857     {
858     case 0x0a: /* LF */
859 nigel 93 case 0x0b: /* VT */
860     case 0x0c: /* FF */
861     case 0x0d: /* CR */
862     case 0x85: /* NEL */
863     case 0x2028: /* LS */
864     case 0x2029: /* PS */
865     return p;
866    
867     default:
868     break;
869     }
870    
871     p = pp; /* Back one character */
872     } /* End of loop for ANY case */
873    
874     return startptr; /* Hit start of data */
875     } /* End of overall switch */
876     }
877    
878    
879    
880    
881    
882     /*************************************************
883 nigel 77 * Print the previous "after" lines *
884 nigel 49 *************************************************/
885    
886 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
887 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
888     that a binary zero does not terminate it.
889 nigel 77
890     Arguments:
891     lastmatchnumber the number of the last matching line, plus one
892     lastmatchrestart where we restarted after the last match
893     endptr end of available data
894     printname filename for printing
895    
896     Returns: nothing
897     */
898    
899     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
900     char *endptr, char *printname)
901     {
902     if (after_context > 0 && lastmatchnumber > 0)
903     {
904     int count = 0;
905     while (lastmatchrestart < endptr && count++ < after_context)
906     {
907 nigel 93 int ellength;
908 nigel 77 char *pp = lastmatchrestart;
909     if (printname != NULL) fprintf(stdout, "%s-", printname);
910     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
911 nigel 93 pp = end_of_line(pp, endptr, &ellength);
912 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
913 nigel 93 lastmatchrestart = pp;
914 nigel 77 }
915     hyphenpending = TRUE;
916     }
917     }
918    
919    
920    
921     /*************************************************
922 ph10 378 * Apply patterns to subject till one matches *
923     *************************************************/
924    
925 ph10 392 /* This function is called to run through all patterns, looking for a match. It
926     is used multiple times for the same subject when colouring is enabled, in order
927 ph10 378 to find all possible matches.
928    
929     Arguments:
930     matchptr the start of the subject
931     length the length of the subject to match
932     offsets the offets vector to fill in
933     mrc address of where to put the result of pcre_exec()
934 ph10 392
935     Returns: TRUE if there was a match
936 ph10 378 FALSE if there was no match
937     invert if there was a non-fatal error
938 ph10 392 */
939 ph10 378
940     static BOOL
941     match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
942     {
943     int i;
944 ph10 561 size_t slen = length;
945     const char *msg = "this text:\n\n";
946     if (slen > 200)
947     {
948     slen = 200;
949     msg = "text that starts:\n\n";
950     }
951 ph10 378 for (i = 0; i < pattern_count; i++)
952     {
953 ph10 530 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
954 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
955 ph10 378 if (*mrc >= 0) return TRUE;
956     if (*mrc == PCRE_ERROR_NOMATCH) continue;
957 ph10 561 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
958 ph10 378 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
959 ph10 561 fprintf(stderr, "%s", msg);
960     FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
961     fprintf(stderr, "\n\n");
962     if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
963     resource_error = TRUE;
964 ph10 378 if (error_count++ > 20)
965     {
966 ph10 561 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
967     pcregrep_exit(2);
968 ph10 378 }
969     return invert; /* No more matching; don't show the line again */
970     }
971    
972     return FALSE; /* No match, no errors */
973     }
974    
975    
976    
977     /*************************************************
978 nigel 77 * Grep an individual file *
979     *************************************************/
980    
981     /* This is called from grep_or_recurse() below. It uses a buffer that is three
982     times the value of MBUFTHIRD. The matching point is never allowed to stray into
983     the top third of the buffer, thus keeping more of the file available for
984     context printing or for multiline scanning. For large files, the pointer will
985     be in the middle third most of the time, so the bottom third is available for
986     "before" context printing.
987    
988     Arguments:
989 ph10 286 handle the fopened FILE stream for a normal file
990     the gzFile pointer when reading is via libz
991     the BZFILE pointer when reading is via libbz2
992     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
993 nigel 77 printname the file name if it is to be printed for each match
994     or NULL if the file name is not to be printed
995     it cannot be NULL if filenames[_nomatch]_only is set
996    
997     Returns: 0 if there was at least one match
998     1 otherwise (no matches)
999 ph10 286 2 if there is a read error on a .bz2 file
1000 nigel 77 */
1001    
1002 nigel 49 static int
1003 ph10 286 pcregrep(void *handle, int frtype, char *printname)
1004 nigel 49 {
1005     int rc = 1;
1006 nigel 77 int linenumber = 1;
1007     int lastmatchnumber = 0;
1008 nigel 49 int count = 0;
1009 ph10 280 int filepos = 0;
1010 ph10 378 int offsets[OFFSET_SIZE];
1011 nigel 77 char *lastmatchrestart = NULL;
1012     char buffer[3*MBUFTHIRD];
1013     char *ptr = buffer;
1014     char *endptr;
1015     size_t bufflength;
1016     BOOL endhyphenpending = FALSE;
1017 ph10 519 BOOL input_line_buffered = line_buffered;
1018 ph10 286 FILE *in = NULL; /* Ensure initialized */
1019 nigel 49
1020 ph10 286 #ifdef SUPPORT_LIBZ
1021     gzFile ingz = NULL;
1022     #endif
1023 nigel 77
1024 ph10 286 #ifdef SUPPORT_LIBBZ2
1025     BZFILE *inbz2 = NULL;
1026     #endif
1027    
1028    
1029     /* Do the first read into the start of the buffer and set up the pointer to end
1030     of what we have. In the case of libz, a non-zipped .gz file will be read as a
1031     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1032     fail. */
1033    
1034     #ifdef SUPPORT_LIBZ
1035     if (frtype == FR_LIBZ)
1036     {
1037     ingz = (gzFile)handle;
1038     bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1039     }
1040     else
1041     #endif
1042    
1043     #ifdef SUPPORT_LIBBZ2
1044     if (frtype == FR_LIBBZ2)
1045     {
1046     inbz2 = (BZFILE *)handle;
1047     bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1048     if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1049     } /* without the cast it is unsigned. */
1050     else
1051     #endif
1052    
1053     {
1054     in = (FILE *)handle;
1055 ph10 519 if (is_file_tty(in)) input_line_buffered = TRUE;
1056 ph10 535 bufflength = input_line_buffered?
1057 ph10 519 read_one_line(buffer, 3*MBUFTHIRD, in) :
1058     fread(buffer, 1, 3*MBUFTHIRD, in);
1059 ph10 286 }
1060 ph10 535
1061 nigel 77 endptr = buffer + bufflength;
1062    
1063     /* Loop while the current pointer is not at the end of the file. For large
1064     files, endptr will be at the end of the buffer when we are in the middle of the
1065     file, but ptr will never get there, because as soon as it gets over 2/3 of the
1066     way, the buffer is shifted left and re-filled. */
1067    
1068     while (ptr < endptr)
1069 nigel 49 {
1070 ph10 378 int endlinelength;
1071 nigel 87 int mrc = 0;
1072 ph10 378 BOOL match;
1073 ph10 286 char *matchptr = ptr;
1074 nigel 77 char *t = ptr;
1075     size_t length, linelength;
1076 nigel 49
1077 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
1078     of the subject string to pass to pcre_exec(). In multiline mode, it is the
1079     length remainder of the data in the buffer. Otherwise, it is the length of
1080 ph10 378 the next line, excluding the terminating newline. After matching, we always
1081     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1082     option is used for compiling, so that any match is constrained to be in the
1083     first line. */
1084 nigel 77
1085 nigel 93 t = end_of_line(t, endptr, &endlinelength);
1086     linelength = t - ptr - endlinelength;
1087 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
1088 nigel 77
1089 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
1090    
1091     #ifdef JFRIEDL_DEBUG
1092     if (jfriedl_XT || jfriedl_XR)
1093     {
1094     #include <sys/time.h>
1095     #include <time.h>
1096     struct timeval start_time, end_time;
1097     struct timezone dummy;
1098 ph10 392 int i;
1099 nigel 89
1100     if (jfriedl_XT)
1101     {
1102     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1103     const char *orig = ptr;
1104     ptr = malloc(newlen + 1);
1105     if (!ptr) {
1106     printf("out of memory");
1107 ph10 561 pcregrep_exit(2);
1108 nigel 89 }
1109     endptr = ptr;
1110     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1111     for (i = 0; i < jfriedl_XT; i++) {
1112     strncpy(endptr, orig, length);
1113     endptr += length;
1114     }
1115     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1116     length = newlen;
1117     }
1118    
1119     if (gettimeofday(&start_time, &dummy) != 0)
1120     perror("bad gettimeofday");
1121    
1122    
1123     for (i = 0; i < jfriedl_XR; i++)
1124 ph10 392 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1125 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1126 nigel 89
1127     if (gettimeofday(&end_time, &dummy) != 0)
1128     perror("bad gettimeofday");
1129    
1130     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1131     -
1132     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1133    
1134     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1135     return 0;
1136     }
1137     #endif
1138    
1139 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
1140 ph10 279 in order to find any further matches in the same line. */
1141 nigel 89
1142 ph10 286 ONLY_MATCHING_RESTART:
1143    
1144 ph10 392 /* Run through all the patterns until one matches or there is an error other
1145 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1146     finding subsequent matches when colouring matched lines. */
1147 ph10 392
1148 ph10 378 match = match_patterns(matchptr, length, offsets, &mrc);
1149 nigel 77
1150 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1151 nigel 77
1152 nigel 49 if (match != invert)
1153     {
1154 nigel 77 BOOL hyphenprinted = FALSE;
1155    
1156 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1157 nigel 77
1158 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1159    
1160     /* Just count if just counting is wanted. */
1161    
1162 nigel 49 if (count_only) count++;
1163    
1164 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1165     in the file. */
1166    
1167 ph10 420 else if (filenames == FN_MATCH_ONLY)
1168 nigel 49 {
1169 nigel 77 fprintf(stdout, "%s\n", printname);
1170 nigel 49 return 0;
1171     }
1172    
1173 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1174    
1175 nigel 77 else if (quiet) return 0;
1176 nigel 49
1177 nigel 87 /* The --only-matching option prints just the substring that matched, and
1178 ph10 286 the --file-offsets and --line-offsets options output offsets for the
1179 ph10 280 matching substring (they both force --only-matching). None of these options
1180     prints any context. Afterwards, adjust the start and length, and then jump
1181     back to look for further matches in the same line. If we are in invert
1182     mode, however, nothing is printed - this could be still useful because the
1183     return code is set. */
1184 nigel 87
1185     else if (only_matching)
1186     {
1187 ph10 279 if (!invert)
1188 ph10 286 {
1189 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1190     if (number) fprintf(stdout, "%d:", linenumber);
1191 ph10 280 if (line_offsets)
1192 ph10 357 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1193 ph10 286 offsets[1] - offsets[0]);
1194 ph10 280 else if (file_offsets)
1195 ph10 357 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1196 ph10 286 offsets[1] - offsets[0]);
1197     else
1198 ph10 377 {
1199     if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1200 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1201 ph10 377 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1202 ph10 392 }
1203 ph10 279 fprintf(stdout, "\n");
1204     matchptr += offsets[1];
1205     length -= offsets[1];
1206 ph10 286 match = FALSE;
1207     goto ONLY_MATCHING_RESTART;
1208     }
1209 nigel 87 }
1210    
1211     /* This is the default case when none of the above options is set. We print
1212     the matching lines(s), possibly preceded and/or followed by other lines of
1213     context. */
1214    
1215 nigel 49 else
1216     {
1217 nigel 77 /* See if there is a requirement to print some "after" lines from a
1218     previous match. We never print any overlaps. */
1219    
1220     if (after_context > 0 && lastmatchnumber > 0)
1221     {
1222 nigel 93 int ellength;
1223 nigel 77 int linecount = 0;
1224     char *p = lastmatchrestart;
1225    
1226     while (p < ptr && linecount < after_context)
1227     {
1228 nigel 93 p = end_of_line(p, ptr, &ellength);
1229 nigel 77 linecount++;
1230     }
1231    
1232     /* It is important to advance lastmatchrestart during this printing so
1233 nigel 87 that it interacts correctly with any "before" printing below. Print
1234     each line's data using fwrite() in case there are binary zeroes. */
1235 nigel 77
1236     while (lastmatchrestart < p)
1237     {
1238     char *pp = lastmatchrestart;
1239     if (printname != NULL) fprintf(stdout, "%s-", printname);
1240     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1241 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1242 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1243 nigel 93 lastmatchrestart = pp;
1244 nigel 77 }
1245     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1246     }
1247    
1248     /* If there were non-contiguous lines printed above, insert hyphens. */
1249    
1250     if (hyphenpending)
1251     {
1252     fprintf(stdout, "--\n");
1253     hyphenpending = FALSE;
1254     hyphenprinted = TRUE;
1255     }
1256    
1257     /* See if there is a requirement to print some "before" lines for this
1258     match. Again, don't print overlaps. */
1259    
1260     if (before_context > 0)
1261     {
1262     int linecount = 0;
1263     char *p = ptr;
1264    
1265     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1266 nigel 87 linecount < before_context)
1267 nigel 77 {
1268 nigel 87 linecount++;
1269 nigel 93 p = previous_line(p, buffer);
1270 nigel 77 }
1271    
1272     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1273     fprintf(stdout, "--\n");
1274    
1275     while (p < ptr)
1276     {
1277 nigel 93 int ellength;
1278 nigel 77 char *pp = p;
1279     if (printname != NULL) fprintf(stdout, "%s-", printname);
1280     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1281 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1282 ph10 515 FWRITE(p, 1, pp - p, stdout);
1283 nigel 93 p = pp;
1284 nigel 77 }
1285     }
1286    
1287     /* Now print the matching line(s); ensure we set hyphenpending at the end
1288 nigel 85 of the file if any context lines are being output. */
1289 nigel 77
1290 nigel 85 if (after_context > 0 || before_context > 0)
1291     endhyphenpending = TRUE;
1292    
1293 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1294 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1295 nigel 77
1296     /* In multiline mode, we want to print to the end of the line in which
1297     the end of the matched string is found, so we adjust linelength and the
1298 ph10 222 line number appropriately, but only when there actually was a match
1299     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1300     the match will always be before the first newline sequence. */
1301 nigel 77
1302     if (multiline)
1303     {
1304 nigel 93 int ellength;
1305 ph10 222 char *endmatch = ptr;
1306     if (!invert)
1307 nigel 93 {
1308 ph10 222 endmatch += offsets[1];
1309     t = ptr;
1310     while (t < endmatch)
1311     {
1312     t = end_of_line(t, endptr, &ellength);
1313     if (t <= endmatch) linenumber++; else break;
1314     }
1315 nigel 93 }
1316     endmatch = end_of_line(endmatch, endptr, &ellength);
1317     linelength = endmatch - ptr - ellength;
1318 nigel 77 }
1319    
1320 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1321     zeroes are treated as just another data character. */
1322    
1323     /* This extra option, for Jeffrey Friedl's debugging requirements,
1324     replaces the matched string, or a specific captured string if it exists,
1325     with X. When this happens, colouring is ignored. */
1326    
1327     #ifdef JFRIEDL_DEBUG
1328     if (S_arg >= 0 && S_arg < mrc)
1329     {
1330     int first = S_arg * 2;
1331     int last = first + 1;
1332 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1333 nigel 87 fprintf(stdout, "X");
1334 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1335 nigel 87 }
1336     else
1337     #endif
1338    
1339 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1340 ph10 378 matches. */
1341 nigel 87
1342     if (do_colour)
1343     {
1344 ph10 392 int last_offset = 0;
1345 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1346 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1347 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1348 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1349 ph10 378 for (;;)
1350     {
1351 ph10 392 last_offset += offsets[1];
1352 ph10 378 matchptr += offsets[1];
1353     length -= offsets[1];
1354     if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1355 ph10 515 FWRITE(matchptr, 1, offsets[0], stdout);
1356 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1357 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1358 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1359     }
1360 ph10 535 FWRITE(ptr + last_offset, 1,
1361 ph10 515 (linelength + endlinelength) - last_offset, stdout);
1362 nigel 87 }
1363 ph10 392
1364 ph10 378 /* Not colouring; no need to search for further matches */
1365 ph10 392
1366 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1367 nigel 49 }
1368    
1369 ph10 519 /* End of doing what has to be done for a match. If --line-buffered was
1370     given, flush the output. */
1371 nigel 87
1372 ph10 519 if (line_buffered) fflush(stdout);
1373 nigel 77 rc = 0; /* Had some success */
1374    
1375     /* Remember where the last match happened for after_context. We remember
1376     where we are about to restart, and that line's number. */
1377    
1378 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1379 nigel 77 lastmatchnumber = linenumber + 1;
1380 nigel 49 }
1381 nigel 77
1382 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1383     anything to be printed), we have to move on to the end of the match before
1384     proceeding. */
1385    
1386     if (multiline && invert && match)
1387     {
1388     int ellength;
1389     char *endmatch = ptr + offsets[1];
1390     t = ptr;
1391     while (t < endmatch)
1392     {
1393     t = end_of_line(t, endptr, &ellength);
1394     if (t <= endmatch) linenumber++; else break;
1395     }
1396     endmatch = end_of_line(endmatch, endptr, &ellength);
1397     linelength = endmatch - ptr - ellength;
1398     }
1399    
1400 ph10 286 /* Advance to after the newline and increment the line number. The file
1401 ph10 280 offset to the current line is maintained in filepos. */
1402 nigel 77
1403 nigel 93 ptr += linelength + endlinelength;
1404 ph10 530 filepos += (int)(linelength + endlinelength);
1405 nigel 77 linenumber++;
1406 ph10 535
1407     /* If input is line buffered, and the buffer is not yet full, read another
1408 ph10 519 line and add it into the buffer. */
1409 ph10 535
1410 ph10 519 if (input_line_buffered && bufflength < sizeof(buffer))
1411     {
1412     int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1413     bufflength += add;
1414 ph10 535 endptr += add;
1415     }
1416 nigel 77
1417     /* If we haven't yet reached the end of the file (the buffer is full), and
1418     the current point is in the top 1/3 of the buffer, slide the buffer down by
1419     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1420     about to be lost, print them. */
1421    
1422     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1423     {
1424     if (after_context > 0 &&
1425     lastmatchnumber > 0 &&
1426     lastmatchrestart < buffer + MBUFTHIRD)
1427     {
1428     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1429     lastmatchnumber = 0;
1430     }
1431    
1432     /* Now do the shuffle */
1433    
1434     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1435     ptr -= MBUFTHIRD;
1436 ph10 286
1437     #ifdef SUPPORT_LIBZ
1438     if (frtype == FR_LIBZ)
1439     bufflength = 2*MBUFTHIRD +
1440     gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1441     else
1442     #endif
1443    
1444     #ifdef SUPPORT_LIBBZ2
1445     if (frtype == FR_LIBBZ2)
1446     bufflength = 2*MBUFTHIRD +
1447     BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1448     else
1449     #endif
1450    
1451 ph10 535 bufflength = 2*MBUFTHIRD +
1452     (input_line_buffered?
1453     read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1454 ph10 519 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1455 nigel 77 endptr = buffer + bufflength;
1456    
1457     /* Adjust any last match point */
1458    
1459     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1460     }
1461     } /* Loop through the whole file */
1462    
1463     /* End of file; print final "after" lines if wanted; do_after_lines sets
1464     hyphenpending if it prints something. */
1465    
1466 nigel 87 if (!only_matching && !count_only)
1467     {
1468     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1469     hyphenpending |= endhyphenpending;
1470     }
1471 nigel 77
1472     /* Print the file name if we are looking for those without matches and there
1473     were none. If we found a match, we won't have got this far. */
1474    
1475 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1476 nigel 77 {
1477     fprintf(stdout, "%s\n", printname);
1478     return 0;
1479 nigel 49 }
1480    
1481 nigel 77 /* Print the match count if wanted */
1482    
1483 nigel 49 if (count_only)
1484     {
1485 ph10 420 if (count > 0 || !omit_zero_count)
1486 ph10 461 {
1487     if (printname != NULL && filenames != FN_NONE)
1488 ph10 420 fprintf(stdout, "%s:", printname);
1489     fprintf(stdout, "%d\n", count);
1490 ph10 461 }
1491 nigel 49 }
1492    
1493     return rc;
1494     }
1495    
1496    
1497    
1498     /*************************************************
1499 nigel 53 * Grep a file or recurse into a directory *
1500     *************************************************/
1501    
1502 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1503     recursing; if it's a file, grep it.
1504    
1505     Arguments:
1506     pathname the path to investigate
1507 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1508 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1509    
1510     Returns: 0 if there was at least one match
1511     1 if there were no matches
1512     2 there was some kind of error
1513    
1514     However, file opening failures are suppressed if "silent" is set.
1515     */
1516    
1517 nigel 53 static int
1518 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1519 nigel 53 {
1520     int rc = 1;
1521     int sep;
1522 ph10 286 int frtype;
1523     int pathlen;
1524     void *handle;
1525     FILE *in = NULL; /* Ensure initialized */
1526 nigel 53
1527 ph10 286 #ifdef SUPPORT_LIBZ
1528     gzFile ingz = NULL;
1529     #endif
1530    
1531     #ifdef SUPPORT_LIBBZ2
1532     BZFILE *inbz2 = NULL;
1533     #endif
1534    
1535 nigel 77 /* If the file name is "-" we scan stdin */
1536 nigel 53
1537 nigel 77 if (strcmp(pathname, "-") == 0)
1538 nigel 53 {
1539 ph10 286 return pcregrep(stdin, FR_PLAIN,
1540 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1541 nigel 77 stdin_name : NULL);
1542     }
1543    
1544 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1545 ph10 325 each file and directory within it, subject to any include or exclude patterns
1546     that were set. The scanning code is localized so it can be made
1547     system-specific. */
1548 nigel 87
1549     if ((sep = isdirectory(pathname)) != 0)
1550 nigel 77 {
1551 nigel 87 if (dee_action == dee_SKIP) return 1;
1552     if (dee_action == dee_RECURSE)
1553 nigel 53 {
1554 nigel 87 char buffer[1024];
1555     char *nextfile;
1556     directory_type *dir = opendirectory(pathname);
1557 nigel 53
1558 nigel 87 if (dir == NULL)
1559     {
1560     if (!silent)
1561     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1562     strerror(errno));
1563     return 2;
1564     }
1565 nigel 77
1566 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1567     {
1568 ph10 324 int frc, nflen;
1569 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1570 ph10 530 nflen = (int)(strlen(nextfile));
1571 ph10 345
1572 ph10 325 if (isdirectory(buffer))
1573     {
1574     if (exclude_dir_compiled != NULL &&
1575     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1576     continue;
1577 ph10 345
1578 ph10 325 if (include_dir_compiled != NULL &&
1579     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1580     continue;
1581     }
1582 ph10 345 else
1583     {
1584 ph10 324 if (exclude_compiled != NULL &&
1585     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1586     continue;
1587 ph10 345
1588 ph10 324 if (include_compiled != NULL &&
1589     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1590     continue;
1591 ph10 345 }
1592 nigel 77
1593 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1594     if (frc > 1) rc = frc;
1595     else if (frc == 0 && rc == 1) rc = 0;
1596     }
1597    
1598     closedirectory(dir);
1599     return rc;
1600 nigel 53 }
1601     }
1602    
1603 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1604     been requested. */
1605 nigel 53
1606 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1607    
1608     /* Control reaches here if we have a regular file, or if we have a directory
1609     and recursion or skipping was not requested, or if we have anything else and
1610     skipping was not requested. The scan proceeds. If this is the first and only
1611     argument at top level, we don't show the file name, unless we are only showing
1612     the file name, or the filename was forced (-H). */
1613    
1614 ph10 530 pathlen = (int)(strlen(pathname));
1615 ph10 286
1616     /* Open using zlib if it is supported and the file name ends with .gz. */
1617    
1618     #ifdef SUPPORT_LIBZ
1619     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1620 nigel 53 {
1621 ph10 286 ingz = gzopen(pathname, "rb");
1622     if (ingz == NULL)
1623     {
1624     if (!silent)
1625     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1626     strerror(errno));
1627     return 2;
1628     }
1629     handle = (void *)ingz;
1630     frtype = FR_LIBZ;
1631     }
1632     else
1633     #endif
1634    
1635     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1636    
1637     #ifdef SUPPORT_LIBBZ2
1638     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1639     {
1640     inbz2 = BZ2_bzopen(pathname, "rb");
1641     handle = (void *)inbz2;
1642     frtype = FR_LIBBZ2;
1643     }
1644     else
1645     #endif
1646    
1647     /* Otherwise use plain fopen(). The label is so that we can come back here if
1648     an attempt to read a .bz2 file indicates that it really is a plain file. */
1649    
1650     #ifdef SUPPORT_LIBBZ2
1651     PLAIN_FILE:
1652     #endif
1653     {
1654 ph10 419 in = fopen(pathname, "rb");
1655 ph10 286 handle = (void *)in;
1656     frtype = FR_PLAIN;
1657     }
1658    
1659     /* All the opening methods return errno when they fail. */
1660    
1661     if (handle == NULL)
1662     {
1663 nigel 77 if (!silent)
1664     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1665     strerror(errno));
1666 nigel 53 return 2;
1667     }
1668    
1669 ph10 286 /* Now grep the file */
1670    
1671     rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1672 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1673 nigel 77
1674 ph10 286 /* Close in an appropriate manner. */
1675    
1676     #ifdef SUPPORT_LIBZ
1677     if (frtype == FR_LIBZ)
1678     gzclose(ingz);
1679     else
1680     #endif
1681    
1682     /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1683     read failed. If the error indicates that the file isn't in fact bzipped, try
1684     again as a normal file. */
1685    
1686     #ifdef SUPPORT_LIBBZ2
1687     if (frtype == FR_LIBBZ2)
1688     {
1689     if (rc == 2)
1690     {
1691     int errnum;
1692     const char *err = BZ2_bzerror(inbz2, &errnum);
1693     if (errnum == BZ_DATA_ERROR_MAGIC)
1694     {
1695     BZ2_bzclose(inbz2);
1696     goto PLAIN_FILE;
1697     }
1698     else if (!silent)
1699     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1700     pathname, err);
1701     }
1702     BZ2_bzclose(inbz2);
1703     }
1704     else
1705     #endif
1706    
1707     /* Normal file close */
1708    
1709 nigel 53 fclose(in);
1710 ph10 286
1711     /* Pass back the yield from pcregrep(). */
1712    
1713 nigel 53 return rc;
1714     }
1715    
1716    
1717    
1718    
1719     /*************************************************
1720 nigel 49 * Usage function *
1721     *************************************************/
1722    
1723     static int
1724     usage(int rc)
1725     {
1726 nigel 87 option_item *op;
1727     fprintf(stderr, "Usage: pcregrep [-");
1728     for (op = optionlist; op->one_char != 0; op++)
1729     {
1730     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1731     }
1732     fprintf(stderr, "] [long options] [pattern] [files]\n");
1733 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1734     "options.\n");
1735 nigel 49 return rc;
1736     }
1737    
1738    
1739    
1740    
1741     /*************************************************
1742 nigel 53 * Help function *
1743     *************************************************/
1744    
1745     static void
1746     help(void)
1747     {
1748     option_item *op;
1749    
1750 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1751 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1752 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1753 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1754    
1755     #ifdef SUPPORT_LIBZ
1756     printf("Files whose names end in .gz are read using zlib.\n");
1757     #endif
1758    
1759     #ifdef SUPPORT_LIBBZ2
1760     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1761     #endif
1762    
1763     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1764     printf("Other files and the standard input are read as plain files.\n\n");
1765     #else
1766     printf("All files are read as plain files, without any interpretation.\n\n");
1767     #endif
1768    
1769 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1770     printf("Options:\n");
1771    
1772     for (op = optionlist; op->one_char != 0; op++)
1773     {
1774     int n;
1775     char s[4];
1776     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1777 ph10 296 n = 30 - printf(" %s --%s", s, op->long_name);
1778 nigel 53 if (n < 1) n = 1;
1779     printf("%.*s%s\n", n, " ", op->help_text);
1780     }
1781    
1782 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1783     printf("trailing white space is removed and blank lines are ignored.\n");
1784     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1785 nigel 53
1786 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1787 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1788     }
1789    
1790    
1791    
1792    
1793     /*************************************************
1794 nigel 77 * Handle a single-letter, no data option *
1795 nigel 53 *************************************************/
1796    
1797     static int
1798     handle_option(int letter, int options)
1799     {
1800     switch(letter)
1801     {
1802 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1803 ph10 561 case N_HELP: help(); pcregrep_exit(0);
1804 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1805 ph10 535 case N_LBUFFER: line_buffered = TRUE; break;
1806 nigel 53 case 'c': count_only = TRUE; break;
1807 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1808     case 'H': filenames = FN_FORCE; break;
1809     case 'h': filenames = FN_NONE; break;
1810 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1811 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1812 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
1813 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1814 nigel 53 case 'n': number = TRUE; break;
1815 nigel 87 case 'o': only_matching = TRUE; break;
1816 nigel 77 case 'q': quiet = TRUE; break;
1817 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1818 nigel 53 case 's': silent = TRUE; break;
1819 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1820 nigel 53 case 'v': invert = TRUE; break;
1821 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1822     case 'x': process_options |= PO_LINE_MATCH; break;
1823 nigel 53
1824     case 'V':
1825 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1826 ph10 561 pcregrep_exit(0);
1827 nigel 53 break;
1828    
1829     default:
1830     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1831 ph10 561 pcregrep_exit(usage(2));
1832 nigel 53 }
1833    
1834     return options;
1835     }
1836    
1837    
1838    
1839    
1840     /*************************************************
1841 nigel 87 * Construct printed ordinal *
1842     *************************************************/
1843    
1844     /* This turns a number into "1st", "3rd", etc. */
1845    
1846     static char *
1847     ordin(int n)
1848     {
1849     static char buffer[8];
1850     char *p = buffer;
1851     sprintf(p, "%d", n);
1852     while (*p != 0) p++;
1853     switch (n%10)
1854     {
1855     case 1: strcpy(p, "st"); break;
1856     case 2: strcpy(p, "nd"); break;
1857     case 3: strcpy(p, "rd"); break;
1858     default: strcpy(p, "th"); break;
1859     }
1860     return buffer;
1861     }
1862    
1863    
1864    
1865     /*************************************************
1866     * Compile a single pattern *
1867     *************************************************/
1868    
1869     /* When the -F option has been used, this is called for each substring.
1870     Otherwise it's called for each supplied pattern.
1871    
1872     Arguments:
1873     pattern the pattern string
1874     options the PCRE options
1875     filename the file name, or NULL for a command-line pattern
1876     count 0 if this is the only command line pattern, or
1877     number of the command line pattern, or
1878     linenumber for a pattern from a file
1879    
1880     Returns: TRUE on success, FALSE after an error
1881     */
1882    
1883     static BOOL
1884     compile_single_pattern(char *pattern, int options, char *filename, int count)
1885     {
1886     char buffer[MBUFTHIRD + 16];
1887     const char *error;
1888     int errptr;
1889    
1890     if (pattern_count >= MAX_PATTERN_COUNT)
1891     {
1892     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1893     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1894     return FALSE;
1895     }
1896    
1897     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1898     suffix[process_options]);
1899     pattern_list[pattern_count] =
1900     pcre_compile(buffer, options, &error, &errptr, pcretables);
1901 ph10 142 if (pattern_list[pattern_count] != NULL)
1902 ph10 141 {
1903 ph10 142 pattern_count++;
1904 ph10 141 return TRUE;
1905 ph10 142 }
1906 nigel 87
1907     /* Handle compile errors */
1908    
1909     errptr -= (int)strlen(prefix[process_options]);
1910     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1911    
1912     if (filename == NULL)
1913     {
1914     if (count == 0)
1915     fprintf(stderr, "pcregrep: Error in command-line regex "
1916     "at offset %d: %s\n", errptr, error);
1917     else
1918     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1919     "at offset %d: %s\n", ordin(count), errptr, error);
1920     }
1921     else
1922     {
1923     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1924     "at offset %d: %s\n", count, filename, errptr, error);
1925     }
1926    
1927     return FALSE;
1928     }
1929    
1930    
1931    
1932     /*************************************************
1933     * Compile one supplied pattern *
1934     *************************************************/
1935    
1936     /* When the -F option has been used, each string may be a list of strings,
1937 nigel 91 separated by line breaks. They will be matched literally.
1938 nigel 87
1939     Arguments:
1940     pattern the pattern string
1941     options the PCRE options
1942     filename the file name, or NULL for a command-line pattern
1943     count 0 if this is the only command line pattern, or
1944     number of the command line pattern, or
1945     linenumber for a pattern from a file
1946    
1947     Returns: TRUE on success, FALSE after an error
1948     */
1949    
1950     static BOOL
1951     compile_pattern(char *pattern, int options, char *filename, int count)
1952     {
1953     if ((process_options & PO_FIXED_STRINGS) != 0)
1954     {
1955 nigel 93 char *eop = pattern + strlen(pattern);
1956 nigel 87 char buffer[MBUFTHIRD];
1957     for(;;)
1958     {
1959 nigel 93 int ellength;
1960     char *p = end_of_line(pattern, eop, &ellength);
1961     if (ellength == 0)
1962 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1963 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1964 nigel 93 pattern = p;
1965 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1966     return FALSE;
1967     }
1968     }
1969     else return compile_single_pattern(pattern, options, filename, count);
1970     }
1971    
1972    
1973    
1974     /*************************************************
1975 nigel 49 * Main program *
1976     *************************************************/
1977    
1978 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1979    
1980 nigel 49 int
1981     main(int argc, char **argv)
1982     {
1983 nigel 53 int i, j;
1984 nigel 49 int rc = 1;
1985 nigel 87 int pcre_options = 0;
1986     int cmd_pattern_count = 0;
1987 ph10 141 int hint_count = 0;
1988 nigel 49 int errptr;
1989 nigel 87 BOOL only_one_at_top;
1990     char *patterns[MAX_PATTERN_COUNT];
1991     const char *locale_from = "--locale";
1992 nigel 49 const char *error;
1993    
1994 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1995     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1996 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
1997 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
1998 ph10 391 rather than escapes such as as '\r'. */
1999 nigel 91
2000     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2001     switch(i)
2002     {
2003 ph10 391 default: newline = (char *)"lf"; break;
2004     case 13: newline = (char *)"cr"; break;
2005     case (13 << 8) | 10: newline = (char *)"crlf"; break;
2006     case -1: newline = (char *)"any"; break;
2007     case -2: newline = (char *)"anycrlf"; break;
2008 nigel 91 }
2009    
2010 nigel 49 /* Process the options */
2011    
2012     for (i = 1; i < argc; i++)
2013     {
2014 nigel 77 option_item *op = NULL;
2015     char *option_data = (char *)""; /* default to keep compiler happy */
2016     BOOL longop;
2017     BOOL longopwasequals = FALSE;
2018    
2019 nigel 49 if (argv[i][0] != '-') break;
2020 nigel 53
2021 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2022 nigel 87 but only if we have previously had -e or -f to define the patterns. */
2023 nigel 63
2024 nigel 77 if (argv[i][1] == 0)
2025     {
2026 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
2027 ph10 561 else pcregrep_exit(usage(2));
2028 nigel 77 }
2029 nigel 63
2030 nigel 77 /* Handle a long name option, or -- to terminate the options */
2031 nigel 53
2032     if (argv[i][1] == '-')
2033 nigel 49 {
2034 nigel 77 char *arg = argv[i] + 2;
2035     char *argequals = strchr(arg, '=');
2036 nigel 53
2037 nigel 77 if (*arg == 0) /* -- terminates options */
2038 nigel 49 {
2039 nigel 77 i++;
2040     break; /* out of the options-handling loop */
2041 nigel 53 }
2042 nigel 49
2043 nigel 77 longop = TRUE;
2044    
2045     /* Some long options have data that follows after =, for example file=name.
2046     Some options have variations in the long name spelling: specifically, we
2047     allow "regexp" because GNU grep allows it, though I personally go along
2048 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2049 ph10 422 These options are entered in the table as "regex(p)". Options can be in
2050     both these categories. */
2051 nigel 77
2052 nigel 53 for (op = optionlist; op->one_char != 0; op++)
2053     {
2054 nigel 77 char *opbra = strchr(op->long_name, '(');
2055     char *equals = strchr(op->long_name, '=');
2056 ph10 461
2057 ph10 422 /* Handle options with only one spelling of the name */
2058 ph10 461
2059 ph10 422 if (opbra == NULL) /* Does not contain '(' */
2060 nigel 53 {
2061 nigel 77 if (equals == NULL) /* Not thing=data case */
2062     {
2063     if (strcmp(arg, op->long_name) == 0) break;
2064     }
2065     else /* Special case xxx=data */
2066     {
2067 ph10 530 int oplen = (int)(equals - op->long_name);
2068 ph10 535 int arglen = (argequals == NULL)?
2069 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2070 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2071     {
2072     option_data = arg + arglen;
2073     if (*option_data == '=')
2074     {
2075     option_data++;
2076     longopwasequals = TRUE;
2077     }
2078     break;
2079     }
2080     }
2081 nigel 53 }
2082 ph10 461
2083 ph10 422 /* Handle options with an alternate spelling of the name */
2084 ph10 461
2085     else
2086 nigel 77 {
2087     char buff1[24];
2088     char buff2[24];
2089 ph10 461
2090 ph10 530 int baselen = (int)(opbra - op->long_name);
2091     int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2092 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
2093 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2094 ph10 461
2095 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
2096 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2097 ph10 461
2098     if (strncmp(arg, buff1, arglen) == 0 ||
2099 ph10 422 strncmp(arg, buff2, arglen) == 0)
2100     {
2101     if (equals != NULL && argequals != NULL)
2102     {
2103 ph10 461 option_data = argequals;
2104 ph10 422 if (*option_data == '=')
2105     {
2106 ph10 461 option_data++;
2107 ph10 422 longopwasequals = TRUE;
2108 ph10 461 }
2109     }
2110 nigel 77 break;
2111 ph10 461 }
2112 nigel 77 }
2113 nigel 53 }
2114 nigel 77
2115 nigel 53 if (op->one_char == 0)
2116     {
2117     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2118 ph10 561 pcregrep_exit(usage(2));
2119 nigel 53 }
2120     }
2121 nigel 49
2122 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2123     are not in the right form for putting in the option table because they use
2124     only one hyphen, yet are more than one character long. By putting them
2125     separately here, they will not get displayed as part of the help() output,
2126     but I don't think Jeffrey will care about that. */
2127    
2128     #ifdef JFRIEDL_DEBUG
2129     else if (strcmp(argv[i], "-pre") == 0) {
2130     jfriedl_prefix = argv[++i];
2131     continue;
2132     } else if (strcmp(argv[i], "-post") == 0) {
2133     jfriedl_postfix = argv[++i];
2134     continue;
2135     } else if (strcmp(argv[i], "-XT") == 0) {
2136     sscanf(argv[++i], "%d", &jfriedl_XT);
2137     continue;
2138     } else if (strcmp(argv[i], "-XR") == 0) {
2139     sscanf(argv[++i], "%d", &jfriedl_XR);
2140     continue;
2141     }
2142     #endif
2143    
2144    
2145 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2146     continue till we hit the last one or one that needs data. */
2147 nigel 53
2148     else
2149     {
2150     char *s = argv[i] + 1;
2151 nigel 77 longop = FALSE;
2152 nigel 53 while (*s != 0)
2153     {
2154 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2155     { if (*s == op->one_char) break; }
2156     if (op->one_char == 0)
2157 nigel 53 {
2158 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2159     *s, argv[i]);
2160 ph10 561 pcregrep_exit(usage(2));
2161 nigel 77 }
2162     if (op->type != OP_NODATA || s[1] == 0)
2163     {
2164     option_data = s+1;
2165 nigel 53 break;
2166     }
2167 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2168 nigel 49 }
2169     }
2170 nigel 77
2171 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2172     is NO_DATA, it means that there is no data, and the option might set
2173     something in the PCRE options. */
2174 nigel 77
2175     if (op->type == OP_NODATA)
2176     {
2177 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2178     continue;
2179     }
2180    
2181     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2182     either has a value or defaults to something. It cannot have data in a
2183     separate item. At the moment, the only such options are "colo(u)r" and
2184 nigel 89 Jeffrey Friedl's special -S debugging option. */
2185 nigel 87
2186     if (*option_data == 0 &&
2187     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2188     {
2189     switch (op->one_char)
2190 nigel 77 {
2191 nigel 87 case N_COLOUR:
2192     colour_option = (char *)"auto";
2193     break;
2194     #ifdef JFRIEDL_DEBUG
2195     case 'S':
2196     S_arg = 0;
2197     break;
2198     #endif
2199 nigel 77 }
2200 nigel 87 continue;
2201     }
2202 nigel 77
2203 nigel 87 /* Otherwise, find the data string for the option. */
2204    
2205     if (*option_data == 0)
2206     {
2207     if (i >= argc - 1 || longopwasequals)
2208 nigel 77 {
2209 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2210 ph10 561 pcregrep_exit(usage(2));
2211 nigel 87 }
2212     option_data = argv[++i];
2213     }
2214    
2215     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2216     multiple times to create a list of patterns. */
2217    
2218     if (op->type == OP_PATLIST)
2219     {
2220     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2221     {
2222     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2223     MAX_PATTERN_COUNT);
2224     return 2;
2225     }
2226     patterns[cmd_pattern_count++] = option_data;
2227     }
2228    
2229     /* Otherwise, deal with single string or numeric data values. */
2230    
2231     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2232     {
2233     *((char **)op->dataptr) = option_data;
2234     }
2235 ph10 558
2236     /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2237     only for unpicking arguments, so just keep it simple. */
2238    
2239 nigel 87 else
2240     {
2241 ph10 561 unsigned long int n = 0;
2242 ph10 558 char *endptr = option_data;
2243     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2244     while (isdigit((unsigned char)(*endptr)))
2245     n = n * 10 + (int)(*endptr++ - '0');
2246 nigel 87 if (*endptr != 0)
2247     {
2248     if (longop)
2249 nigel 77 {
2250 nigel 87 char *equals = strchr(op->long_name, '=');
2251     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2252 ph10 530 (int)(equals - op->long_name);
2253 nigel 87 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2254     option_data, nlen, op->long_name);
2255 nigel 77 }
2256 nigel 87 else
2257     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2258     option_data, op->one_char);
2259 ph10 561 pcregrep_exit(usage(2));
2260 nigel 77 }
2261 nigel 87 *((int *)op->dataptr) = n;
2262 nigel 77 }
2263 nigel 49 }
2264    
2265 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2266     for -A and -B. */
2267    
2268     if (both_context > 0)
2269     {
2270     if (after_context == 0) after_context = both_context;
2271     if (before_context == 0) before_context = both_context;
2272     }
2273 ph10 286
2274     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2275 ph10 280 However, the latter two set the only_matching flag. */
2276 nigel 77
2277 ph10 280 if ((only_matching && (file_offsets || line_offsets)) ||
2278 ph10 286 (file_offsets && line_offsets))
2279 ph10 280 {
2280     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2281     "and/or --line-offsets\n");
2282 ph10 561 pcregrep_exit(usage(2));
2283 ph10 280 }
2284    
2285 ph10 286 if (file_offsets || line_offsets) only_matching = TRUE;
2286    
2287 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2288     LC_ALL environment variable is set, and if so, use it. */
2289 nigel 49
2290 nigel 87 if (locale == NULL)
2291 nigel 53 {
2292 nigel 87 locale = getenv("LC_ALL");
2293     locale_from = "LCC_ALL";
2294 nigel 53 }
2295 nigel 49
2296 nigel 87 if (locale == NULL)
2297     {
2298     locale = getenv("LC_CTYPE");
2299     locale_from = "LC_CTYPE";
2300     }
2301 nigel 49
2302 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2303     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2304    
2305     if (locale != NULL)
2306 nigel 49 {
2307 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2308 nigel 53 {
2309 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2310     locale, locale_from);
2311 nigel 53 return 2;
2312     }
2313 nigel 87 pcretables = pcre_maketables();
2314     }
2315 nigel 77
2316 nigel 87 /* Sort out colouring */
2317    
2318     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2319     {
2320     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2321     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2322     else
2323 nigel 53 {
2324 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2325     colour_option);
2326     return 2;
2327 nigel 77 }
2328 nigel 87 if (do_colour)
2329 nigel 77 {
2330 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2331     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2332     if (cs != NULL) colour_string = cs;
2333 nigel 77 }
2334 nigel 87 }
2335 ph10 535
2336 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2337    
2338     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2339     {
2340     pcre_options |= PCRE_NEWLINE_CR;
2341 nigel 93 endlinetype = EL_CR;
2342 nigel 91 }
2343     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2344     {
2345     pcre_options |= PCRE_NEWLINE_LF;
2346 nigel 93 endlinetype = EL_LF;
2347 nigel 91 }
2348     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2349     {
2350     pcre_options |= PCRE_NEWLINE_CRLF;
2351 nigel 93 endlinetype = EL_CRLF;
2352 nigel 91 }
2353 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2354     {
2355     pcre_options |= PCRE_NEWLINE_ANY;
2356     endlinetype = EL_ANY;
2357     }
2358 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2359     {
2360     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2361     endlinetype = EL_ANYCRLF;
2362     }
2363 nigel 91 else
2364     {
2365     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2366     return 2;
2367     }
2368    
2369 nigel 87 /* Interpret the text values for -d and -D */
2370    
2371     if (dee_option != NULL)
2372     {
2373     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2374     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2375     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2376     else
2377 nigel 77 {
2378 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2379     return 2;
2380 nigel 53 }
2381 nigel 49 }
2382    
2383 nigel 87 if (DEE_option != NULL)
2384     {
2385     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2386     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2387     else
2388     {
2389     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2390     return 2;
2391     }
2392     }
2393 nigel 49
2394 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2395 nigel 87
2396     #ifdef JFRIEDL_DEBUG
2397     if (S_arg > 9)
2398 nigel 49 {
2399 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2400     return 2;
2401     }
2402 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2403     {
2404     if (jfriedl_XT == 0) jfriedl_XT = 1;
2405     if (jfriedl_XR == 0) jfriedl_XR = 1;
2406     }
2407 nigel 87 #endif
2408 nigel 77
2409 nigel 87 /* Get memory to store the pattern and hints lists. */
2410    
2411     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2412     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2413    
2414     if (pattern_list == NULL || hints_list == NULL)
2415     {
2416     fprintf(stderr, "pcregrep: malloc failed\n");
2417 ph10 123 goto EXIT2;
2418 nigel 87 }
2419    
2420     /* If no patterns were provided by -e, and there is no file provided by -f,
2421     the first argument is the one and only pattern, and it must exist. */
2422    
2423     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2424     {
2425 nigel 63 if (i >= argc) return usage(2);
2426 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2427     }
2428 nigel 77
2429 nigel 87 /* Compile the patterns that were provided on the command line, either by
2430     multiple uses of -e or as a single unkeyed pattern. */
2431    
2432     for (j = 0; j < cmd_pattern_count; j++)
2433     {
2434     if (!compile_pattern(patterns[j], pcre_options, NULL,
2435     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2436 ph10 123 goto EXIT2;
2437 nigel 87 }
2438    
2439     /* Compile the regular expressions that are provided in a file. */
2440    
2441     if (pattern_filename != NULL)
2442     {
2443     int linenumber = 0;
2444     FILE *f;
2445     char *filename;
2446     char buffer[MBUFTHIRD];
2447    
2448     if (strcmp(pattern_filename, "-") == 0)
2449 nigel 77 {
2450 nigel 87 f = stdin;
2451     filename = stdin_name;
2452 nigel 77 }
2453 nigel 87 else
2454 nigel 77 {
2455 nigel 87 f = fopen(pattern_filename, "r");
2456     if (f == NULL)
2457     {
2458     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2459     strerror(errno));
2460 ph10 123 goto EXIT2;
2461 nigel 87 }
2462     filename = pattern_filename;
2463 nigel 77 }
2464    
2465 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2466 nigel 53 {
2467 nigel 87 char *s = buffer + (int)strlen(buffer);
2468     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2469     *s = 0;
2470     linenumber++;
2471     if (buffer[0] == 0) continue; /* Skip blank lines */
2472     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2473 ph10 121 goto EXIT2;
2474 nigel 53 }
2475 nigel 87
2476     if (f != stdin) fclose(f);
2477 nigel 49 }
2478    
2479 nigel 77 /* Study the regular expressions, as we will be running them many times */
2480 nigel 53
2481     for (j = 0; j < pattern_count; j++)
2482     {
2483     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2484     if (error != NULL)
2485     {
2486     char s[16];
2487     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2488     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2489 ph10 121 goto EXIT2;
2490 nigel 53 }
2491 ph10 142 hint_count++;
2492 nigel 53 }
2493 ph10 561
2494     /* If --match-limit or --recursion-limit was set, put the value(s) into the
2495     pcre_extra block for each pattern. */
2496 nigel 53
2497 ph10 561 if (match_limit > 0 || match_limit_recursion > 0)
2498     {
2499     for (j = 0; j < pattern_count; j++)
2500     {
2501     if (hints_list[j] == NULL)
2502     {
2503     hints_list[j] = malloc(sizeof(pcre_extra));
2504     if (hints_list[j] == NULL)
2505     {
2506     fprintf(stderr, "pcregrep: malloc failed\n");
2507     pcregrep_exit(2);
2508     }
2509     }
2510     if (match_limit > 0)
2511     {
2512     hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2513     hints_list[j]->match_limit = match_limit;
2514     }
2515     if (match_limit_recursion > 0)
2516     {
2517     hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2518     hints_list[j]->match_limit_recursion = match_limit_recursion;
2519     }
2520     }
2521     }
2522    
2523 nigel 77 /* If there are include or exclude patterns, compile them. */
2524    
2525     if (exclude_pattern != NULL)
2526     {
2527 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2528     pcretables);
2529 nigel 77 if (exclude_compiled == NULL)
2530     {
2531     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2532     errptr, error);
2533 ph10 121 goto EXIT2;
2534 nigel 77 }
2535     }
2536    
2537     if (include_pattern != NULL)
2538     {
2539 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2540     pcretables);
2541 nigel 77 if (include_compiled == NULL)
2542     {
2543     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2544     errptr, error);
2545 ph10 121 goto EXIT2;
2546 nigel 77 }
2547     }
2548    
2549 ph10 325 if (exclude_dir_pattern != NULL)
2550     {
2551     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2552     pcretables);
2553     if (exclude_dir_compiled == NULL)
2554     {
2555     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2556     errptr, error);
2557     goto EXIT2;
2558     }
2559     }
2560    
2561     if (include_dir_pattern != NULL)
2562     {
2563     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2564     pcretables);
2565     if (include_dir_compiled == NULL)
2566     {
2567     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2568     errptr, error);
2569     goto EXIT2;
2570     }
2571     }
2572    
2573 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2574 nigel 49
2575 nigel 87 if (i >= argc)
2576 ph10 121 {
2577 ph10 286 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2578 ph10 121 goto EXIT;
2579 ph10 123 }
2580 nigel 49
2581 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2582     Pass in the fact that there is only one argument at top level - this suppresses
2583 nigel 87 the file name if the argument is not a directory and filenames are not
2584     otherwise forced. */
2585 nigel 49
2586 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2587 nigel 49
2588     for (; i < argc; i++)
2589     {
2590 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2591     only_one_at_top);
2592 nigel 77 if (frc > 1) rc = frc;
2593     else if (frc == 0 && rc == 1) rc = 0;
2594 nigel 49 }
2595    
2596 ph10 121 EXIT:
2597     if (pattern_list != NULL)
2598     {
2599 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2600 ph10 121 free(pattern_list);
2601 ph10 123 }
2602 ph10 121 if (hints_list != NULL)
2603     {
2604 ph10 561 for (i = 0; i < hint_count; i++)
2605     {
2606     if (hints_list[i] != NULL) free(hints_list[i]);
2607     }
2608 ph10 121 free(hints_list);
2609 ph10 123 }
2610 ph10 561 pcregrep_exit(rc);
2611 ph10 121
2612     EXIT2:
2613     rc = 2;
2614     goto EXIT;
2615 nigel 49 }
2616    
2617 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12