/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1467 - (hide annotations) (download)
Thu Apr 3 16:51:41 2014 UTC (2 weeks, 2 days ago) by ph10
File MIME type: text/plain
File size: 95008 byte(s)
Two minor changes to avoid compiler warnings.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 ph10 1354 its pattern matching. On Unix-like, Windows, and native z/OS systems it can
7 ph10 1404 recurse into directories, and in z/OS it can handle PDS files.
8 nigel 49
9 ph10 1354 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
10     additional header is required. That header is not included in the main PCRE
11     distribution because other apparatus is needed to compile pcregrep for z/OS.
12     The header can be found in the special z/OS distribution, which is available
13     from www.zaconsultants.net or from www.cbttape.org.
14 nigel 75
15 ph10 1467 Copyright (c) 1997-2014 University of Cambridge
16 ph10 1354
17 nigel 75 -----------------------------------------------------------------------------
18     Redistribution and use in source and binary forms, with or without
19     modification, are permitted provided that the following conditions are met:
20    
21     * Redistributions of source code must retain the above copyright notice,
22     this list of conditions and the following disclaimer.
23    
24     * Redistributions in binary form must reproduce the above copyright
25     notice, this list of conditions and the following disclaimer in the
26     documentation and/or other materials provided with the distribution.
27    
28     * Neither the name of the University of Cambridge nor the names of its
29     contributors may be used to endorse or promote products derived from
30     this software without specific prior written permission.
31    
32     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
36     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
42     POSSIBILITY OF SUCH DAMAGE.
43     -----------------------------------------------------------------------------
44     */
45    
46 ph10 97 #ifdef HAVE_CONFIG_H
47 ph10 236 #include "config.h"
48 ph10 97 #endif
49    
50 nigel 53 #include <ctype.h>
51 nigel 87 #include <locale.h>
52 nigel 49 #include <stdio.h>
53     #include <string.h>
54     #include <stdlib.h>
55     #include <errno.h>
56 nigel 77
57     #include <sys/types.h>
58     #include <sys/stat.h>
59 ph10 199
60 ph10 137 #ifdef HAVE_UNISTD_H
61 ph10 199 #include <unistd.h>
62 ph10 137 #endif
63 nigel 77
64 ph10 286 #ifdef SUPPORT_LIBZ
65     #include <zlib.h>
66     #endif
67    
68     #ifdef SUPPORT_LIBBZ2
69     #include <bzlib.h>
70     #endif
71    
72 ph10 236 #include "pcre.h"
73 nigel 49
74     #define FALSE 0
75     #define TRUE 1
76    
77     typedef int BOOL;
78    
79 ph10 378 #define OFFSET_SIZE 99
80 nigel 49
81 nigel 77 #if BUFSIZ > 8192
82 ph10 1003 #define MAXPATLEN BUFSIZ
83 nigel 77 #else
84 ph10 1003 #define MAXPATLEN 8192
85 nigel 77 #endif
86 nigel 49
87 ph10 1003 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
88    
89 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
90     output. The order is important; it is assumed that a file name is wanted for
91     all values greater than FN_DEFAULT. */
92 nigel 77
93 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
94 nigel 87
95 ph10 286 /* File reading styles */
96    
97     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
98    
99 nigel 87 /* Actions for the -d and -D options */
100    
101     enum { dee_READ, dee_SKIP, dee_RECURSE };
102     enum { DEE_READ, DEE_SKIP };
103    
104     /* Actions for special processing options (flag bits) */
105    
106     #define PO_WORD_MATCH 0x0001
107     #define PO_LINE_MATCH 0x0002
108     #define PO_FIXED_STRINGS 0x0004
109    
110 nigel 93 /* Line ending types */
111 nigel 87
112 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
113 nigel 87
114 ph10 947 /* Binary file options */
115    
116     enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
117    
118 ph10 535 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
119     environments), a warning is issued if the value of fwrite() is ignored.
120     Unfortunately, casting to (void) does not suppress the warning. To get round
121     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
122 ph10 515 apply to fprintf(). */
123 nigel 93
124 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
125 nigel 93
126 ph10 515
127    
128 nigel 49 /*************************************************
129     * Global variables *
130     *************************************************/
131    
132 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
133     regular code. */
134    
135     #ifdef JFRIEDL_DEBUG
136     static int S_arg = -1;
137 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
138     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
139     static const char *jfriedl_prefix = "";
140     static const char *jfriedl_postfix = "";
141 nigel 87 #endif
142    
143 nigel 93 static int endlinetype;
144 nigel 91
145 nigel 87 static char *colour_string = (char *)"1;31";
146     static char *colour_option = NULL;
147     static char *dee_option = NULL;
148     static char *DEE_option = NULL;
149 ph10 1003 static char *locale = NULL;
150 ph10 644 static char *main_buffer = NULL;
151 nigel 91 static char *newline = NULL;
152 ph10 1039 static char *om_separator = (char *)"";
153 nigel 77 static char *stdin_name = (char *)"(standard input)";
154 nigel 87
155     static const unsigned char *pcretables = NULL;
156    
157 nigel 77 static int after_context = 0;
158     static int before_context = 0;
159 ph10 947 static int binary_files = BIN_BINARY;
160 nigel 77 static int both_context = 0;
161 ph10 644 static int bufthird = PCREGREP_BUFSIZE;
162     static int bufsize = 3*PCREGREP_BUFSIZE;
163 ph10 1003
164     #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
165     static int dee_action = dee_SKIP;
166     #else
167 nigel 87 static int dee_action = dee_READ;
168 ph10 1003 #endif
169    
170 nigel 87 static int DEE_action = DEE_READ;
171     static int error_count = 0;
172     static int filenames = FN_DEFAULT;
173 ph10 1003 static int pcre_options = 0;
174 nigel 87 static int process_options = 0;
175 ph10 685
176     #ifdef SUPPORT_PCREGREP_JIT
177     static int study_options = PCRE_STUDY_JIT_COMPILE;
178     #else
179 ph10 667 static int study_options = 0;
180 ph10 685 #endif
181 nigel 77
182 ph10 561 static unsigned long int match_limit = 0;
183     static unsigned long int match_limit_recursion = 0;
184    
185 nigel 49 static BOOL count_only = FALSE;
186 nigel 87 static BOOL do_colour = FALSE;
187 ph10 280 static BOOL file_offsets = FALSE;
188 nigel 77 static BOOL hyphenpending = FALSE;
189 nigel 49 static BOOL invert = FALSE;
190 ph10 519 static BOOL line_buffered = FALSE;
191 ph10 280 static BOOL line_offsets = FALSE;
192 nigel 77 static BOOL multiline = FALSE;
193 nigel 49 static BOOL number = FALSE;
194 ph10 420 static BOOL omit_zero_count = FALSE;
195 ph10 561 static BOOL resource_error = FALSE;
196 nigel 77 static BOOL quiet = FALSE;
197 ph10 1039 static BOOL show_only_matching = FALSE;
198 nigel 49 static BOOL silent = FALSE;
199 nigel 93 static BOOL utf8 = FALSE;
200 nigel 49
201 ph10 1039 /* Structure for list of --only-matching capturing numbers. */
202    
203     typedef struct omstr {
204     struct omstr *next;
205     int groupnum;
206     } omstr;
207    
208     static omstr *only_matching = NULL;
209     static omstr *only_matching_last = NULL;
210    
211     /* Structure for holding the two variables that describe a number chain. */
212    
213     typedef struct omdatastr {
214     omstr **anchor;
215     omstr **lastptr;
216     } omdatastr;
217    
218     static omdatastr only_matching_data = { &only_matching, &only_matching_last };
219    
220 ph10 1003 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
221    
222     typedef struct fnstr {
223     struct fnstr *next;
224     char *name;
225     } fnstr;
226    
227     static fnstr *exclude_from = NULL;
228     static fnstr *exclude_from_last = NULL;
229     static fnstr *include_from = NULL;
230     static fnstr *include_from_last = NULL;
231    
232     static fnstr *file_lists = NULL;
233     static fnstr *file_lists_last = NULL;
234     static fnstr *pattern_files = NULL;
235     static fnstr *pattern_files_last = NULL;
236    
237     /* Structure for holding the two variables that describe a file name chain. */
238    
239     typedef struct fndatastr {
240     fnstr **anchor;
241     fnstr **lastptr;
242     } fndatastr;
243    
244     static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
245     static fndatastr include_from_data = { &include_from, &include_from_last };
246     static fndatastr file_lists_data = { &file_lists, &file_lists_last };
247     static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
248    
249     /* Structure for pattern and its compiled form; used for matching patterns and
250     also for include/exclude patterns. */
251    
252     typedef struct patstr {
253     struct patstr *next;
254     char *string;
255     pcre *compiled;
256     pcre_extra *hint;
257     } patstr;
258    
259     static patstr *patterns = NULL;
260     static patstr *patterns_last = NULL;
261     static patstr *include_patterns = NULL;
262     static patstr *include_patterns_last = NULL;
263     static patstr *exclude_patterns = NULL;
264     static patstr *exclude_patterns_last = NULL;
265     static patstr *include_dir_patterns = NULL;
266     static patstr *include_dir_patterns_last = NULL;
267     static patstr *exclude_dir_patterns = NULL;
268     static patstr *exclude_dir_patterns_last = NULL;
269    
270     /* Structure holding the two variables that describe a pattern chain. A pointer
271     to such structures is used for each appropriate option. */
272    
273     typedef struct patdatastr {
274     patstr **anchor;
275     patstr **lastptr;
276     } patdatastr;
277    
278     static patdatastr match_patdata = { &patterns, &patterns_last };
279     static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
280     static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
281     static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
282     static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
283    
284     static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
285     &include_dir_patterns, &exclude_dir_patterns };
286    
287     static const char *incexname[4] = { "--include", "--exclude",
288     "--include-dir", "--exclude-dir" };
289    
290 nigel 53 /* Structure for options and list of them */
291 nigel 49
292 ph10 584 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
293 ph10 1039 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
294 nigel 77
295 nigel 53 typedef struct option_item {
296 nigel 77 int type;
297 nigel 53 int one_char;
298 nigel 77 void *dataptr;
299 nigel 67 const char *long_name;
300     const char *help_text;
301 nigel 53 } option_item;
302 nigel 49
303 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
304     used to identify them. */
305    
306 ph10 325 #define N_COLOUR (-1)
307     #define N_EXCLUDE (-2)
308     #define N_EXCLUDE_DIR (-3)
309     #define N_HELP (-4)
310     #define N_INCLUDE (-5)
311     #define N_INCLUDE_DIR (-6)
312     #define N_LABEL (-7)
313     #define N_LOCALE (-8)
314     #define N_NULL (-9)
315     #define N_LOFFSETS (-10)
316     #define N_FOFFSETS (-11)
317 ph10 519 #define N_LBUFFER (-12)
318 ph10 561 #define N_M_LIMIT (-13)
319     #define N_M_LIMIT_REC (-14)
320 ph10 644 #define N_BUFSIZE (-15)
321 ph10 685 #define N_NOJIT (-16)
322 ph10 944 #define N_FILE_LIST (-17)
323 ph10 947 #define N_BINARY_FILES (-18)
324 ph10 1003 #define N_EXCLUDE_FROM (-19)
325     #define N_INCLUDE_FROM (-20)
326 ph10 1039 #define N_OM_SEPARATOR (-21)
327 nigel 87
328 nigel 53 static option_item optionlist[] = {
329 ph10 947 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
330 ph10 584 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
331     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
332 ph10 947 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
333 ph10 584 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
334 ph10 947 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
335 ph10 644 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
336 ph10 584 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
337     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
338     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
339     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
340     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
341     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
342 ph10 1003 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
343 ph10 584 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
344 ph10 1003 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
345     { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
346 ph10 584 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
347     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
348     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
349 ph10 947 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
350 ph10 584 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
351 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
352     { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
353     #else
354     { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
355     #endif
356 ph10 584 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
357     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
358     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
359     { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
360     { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
361     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
362     { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
363     { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
364     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
365     { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
366     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
367 ph10 1039 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
368     { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
369 ph10 584 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
370     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
371 ph10 1003 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
372     { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
373     { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
374     { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
375     { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
376     { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
377 ph10 571
378     /* These two were accidentally implemented with underscores instead of
379     hyphens in the option names. As this was not discovered for several releases,
380     the incorrect versions are left in the table for compatibility. However, the
381     --help function misses out any option that has an underscore in its name. */
382 ph10 579
383 ph10 1003 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
384     { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
385 ph10 571
386 nigel 87 #ifdef JFRIEDL_DEBUG
387     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
388     #endif
389     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
390     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
391     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
392     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
393     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
394     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
395     { OP_NODATA, 0, NULL, NULL, NULL }
396 nigel 53 };
397    
398 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
399     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
400     that the combination of -w and -x has the same effect as -x on its own, so we
401 ph10 1003 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
402     prefix+suffix is 10 characters; if anything longer is added, it must be
403     adjusted. */
404 nigel 53
405 nigel 87 static const char *prefix[] = {
406     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
407    
408     static const char *suffix[] = {
409     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
410    
411 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
412 nigel 87
413 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
414 nigel 87
415 nigel 93 const char utf8_table4[] = {
416     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
417     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
418     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
419     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
420    
421    
422    
423 nigel 53 /*************************************************
424 ph10 1039 * Exit from the program *
425     *************************************************/
426    
427     /* If there has been a resource error, give a suitable message.
428    
429     Argument: the return code
430     Returns: does not return
431     */
432    
433     static void
434     pcregrep_exit(int rc)
435     {
436     if (resource_error)
437     {
438     fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
439     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
440     PCRE_ERROR_JIT_STACKLIMIT);
441     fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
442     }
443     exit(rc);
444     }
445    
446    
447     /*************************************************
448 ph10 1003 * Add item to chain of patterns *
449     *************************************************/
450    
451     /* Used to add an item onto a chain, or just return an unconnected item if the
452     "after" argument is NULL.
453    
454     Arguments:
455     s pattern string to add
456     after if not NULL points to item to insert after
457    
458 ph10 1039 Returns: new pattern block
459 ph10 1003 */
460    
461     static patstr *
462     add_pattern(char *s, patstr *after)
463     {
464     patstr *p = (patstr *)malloc(sizeof(patstr));
465     if (p == NULL)
466     {
467     fprintf(stderr, "pcregrep: malloc failed\n");
468 ph10 1039 pcregrep_exit(2);
469 ph10 1003 }
470     if (strlen(s) > MAXPATLEN)
471     {
472     fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
473     MAXPATLEN);
474     return NULL;
475     }
476     p->next = NULL;
477     p->string = s;
478     p->compiled = NULL;
479     p->hint = NULL;
480    
481     if (after != NULL)
482     {
483     p->next = after->next;
484     after->next = p;
485     }
486     return p;
487     }
488    
489    
490     /*************************************************
491     * Free chain of patterns *
492     *************************************************/
493    
494     /* Used for several chains of patterns.
495    
496     Argument: pointer to start of chain
497     Returns: nothing
498     */
499    
500     static void
501     free_pattern_chain(patstr *pc)
502     {
503     while (pc != NULL)
504     {
505     patstr *p = pc;
506     pc = p->next;
507     if (p->hint != NULL) pcre_free_study(p->hint);
508     if (p->compiled != NULL) pcre_free(p->compiled);
509     free(p);
510     }
511     }
512    
513    
514     /*************************************************
515     * Free chain of file names *
516     *************************************************/
517    
518     /*
519     Argument: pointer to start of chain
520     Returns: nothing
521     */
522    
523     static void
524     free_file_chain(fnstr *fn)
525     {
526     while (fn != NULL)
527     {
528     fnstr *f = fn;
529     fn = f->next;
530     free(f);
531     }
532     }
533    
534    
535     /*************************************************
536 nigel 87 * OS-specific functions *
537 nigel 53 *************************************************/
538    
539 ph10 1354 /* These functions are defined so that they can be made system specific.
540     At present there are versions for Unix-style environments, Windows, native
541     z/OS, and "no support". */
542 nigel 53
543    
544 ph10 1354 /************* Directory scanning Unix-style and z/OS ***********/
545 nigel 53
546 ph10 1354 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
547 nigel 53 #include <sys/types.h>
548     #include <sys/stat.h>
549     #include <dirent.h>
550    
551 ph10 1354 #if defined NATIVE_ZOS
552     /************* Directory and PDS/E scanning for z/OS ***********/
553     /************* z/OS looks mostly like Unix with USS ************/
554     /* However, z/OS needs the #include statements in this header */
555     #include "pcrzosfs.h"
556     /* That header is not included in the main PCRE distribution because
557     other apparatus is needed to compile pcregrep for z/OS. The header
558     can be found in the special z/OS distribution, which is available
559     from www.zaconsultants.net or from www.cbttape.org. */
560     #endif
561    
562 nigel 53 typedef DIR directory_type;
563 ph10 1003 #define FILESEP '/'
564 nigel 53
565 nigel 67 static int
566 nigel 53 isdirectory(char *filename)
567     {
568     struct stat statbuf;
569     if (stat(filename, &statbuf) < 0)
570     return 0; /* In the expectation that opening as a file will fail */
571 ph10 1003 return (statbuf.st_mode & S_IFMT) == S_IFDIR;
572 nigel 53 }
573    
574 nigel 67 static directory_type *
575 nigel 53 opendirectory(char *filename)
576     {
577     return opendir(filename);
578     }
579    
580 nigel 67 static char *
581 nigel 53 readdirectory(directory_type *dir)
582     {
583     for (;;)
584     {
585     struct dirent *dent = readdir(dir);
586     if (dent == NULL) return NULL;
587     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
588     return dent->d_name;
589     }
590 ph10 151 /* Control never reaches here */
591 nigel 53 }
592    
593 nigel 67 static void
594 nigel 53 closedirectory(directory_type *dir)
595     {
596     closedir(dir);
597     }
598    
599    
600 ph10 1354 /************* Test for regular file, Unix-style **********/
601 nigel 87
602     static int
603     isregfile(char *filename)
604     {
605     struct stat statbuf;
606     if (stat(filename, &statbuf) < 0)
607     return 1; /* In the expectation that opening as a file will fail */
608     return (statbuf.st_mode & S_IFMT) == S_IFREG;
609     }
610    
611    
612 ph10 1354 #if defined NATIVE_ZOS
613     /************* Test for a terminal in z/OS **********/
614     /* isatty() does not work in a TSO environment, so always give FALSE.*/
615 nigel 87
616     static BOOL
617     is_stdout_tty(void)
618     {
619 ph10 1354 return FALSE;
620     }
621    
622     static BOOL
623     is_file_tty(FILE *f)
624     {
625     return FALSE;
626     }
627    
628    
629     /************* Test for a terminal, Unix-style **********/
630    
631     #else
632     static BOOL
633     is_stdout_tty(void)
634     {
635 nigel 87 return isatty(fileno(stdout));
636     }
637    
638 ph10 519 static BOOL
639     is_file_tty(FILE *f)
640     {
641     return isatty(fileno(f));
642     }
643 ph10 1354 #endif
644 nigel 87
645 ph10 1354 /* End of Unix-style or native z/OS environment functions. */
646 ph10 519
647 nigel 53
648 ph10 1354 /************* Directory scanning in Windows ***********/
649    
650 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
651 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
652 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
653     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
654 ph10 558 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
655     undefined when it is indeed undefined. */
656 nigel 53
657 ph10 558 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
658 nigel 63
659     #ifndef STRICT
660     # define STRICT
661     #endif
662     #ifndef WIN32_LEAN_AND_MEAN
663     # define WIN32_LEAN_AND_MEAN
664     #endif
665 ph10 283
666     #include <windows.h>
667    
668 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
669     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
670     #endif
671    
672 nigel 63 typedef struct directory_type
673     {
674     HANDLE handle;
675     BOOL first;
676     WIN32_FIND_DATA data;
677     } directory_type;
678    
679 ph10 1004 #define FILESEP '/'
680 ph10 1003
681 nigel 63 int
682     isdirectory(char *filename)
683     {
684     DWORD attr = GetFileAttributes(filename);
685     if (attr == INVALID_FILE_ATTRIBUTES)
686     return 0;
687 ph10 1003 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
688 nigel 63 }
689    
690     directory_type *
691     opendirectory(char *filename)
692     {
693     size_t len;
694     char *pattern;
695     directory_type *dir;
696     DWORD err;
697     len = strlen(filename);
698 ph10 1003 pattern = (char *)malloc(len + 3);
699     dir = (directory_type *)malloc(sizeof(*dir));
700 nigel 63 if ((pattern == NULL) || (dir == NULL))
701     {
702     fprintf(stderr, "pcregrep: malloc failed\n");
703 ph10 561 pcregrep_exit(2);
704 nigel 63 }
705     memcpy(pattern, filename, len);
706     memcpy(&(pattern[len]), "\\*", 3);
707     dir->handle = FindFirstFile(pattern, &(dir->data));
708     if (dir->handle != INVALID_HANDLE_VALUE)
709     {
710     free(pattern);
711     dir->first = TRUE;
712     return dir;
713     }
714     err = GetLastError();
715     free(pattern);
716     free(dir);
717     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
718     return NULL;
719     }
720    
721     char *
722     readdirectory(directory_type *dir)
723     {
724     for (;;)
725     {
726     if (!dir->first)
727     {
728     if (!FindNextFile(dir->handle, &(dir->data)))
729     return NULL;
730     }
731     else
732     {
733     dir->first = FALSE;
734     }
735     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
736     return dir->data.cFileName;
737     }
738     #ifndef _MSC_VER
739     return NULL; /* Keep compiler happy; never executed */
740     #endif
741     }
742    
743     void
744     closedirectory(directory_type *dir)
745     {
746     FindClose(dir->handle);
747     free(dir);
748     }
749    
750    
751 ph10 1354 /************* Test for regular file in Windows **********/
752 nigel 87
753     /* I don't know how to do this, or if it can be done; assume all paths are
754     regular if they are not directories. */
755    
756     int isregfile(char *filename)
757     {
758 ph10 283 return !isdirectory(filename);
759 nigel 87 }
760    
761    
762 ph10 1354 /************* Test for a terminal in Windows **********/
763 nigel 87
764     /* I don't know how to do this; assume never */
765    
766     static BOOL
767     is_stdout_tty(void)
768     {
769 ph10 283 return FALSE;
770 nigel 87 }
771    
772 ph10 519 static BOOL
773     is_file_tty(FILE *f)
774     {
775     return FALSE;
776     }
777 nigel 87
778 ph10 1354 /* End of Windows functions */
779 ph10 519
780 ph10 1354
781 nigel 53 /************* Directory scanning when we can't do it ***********/
782    
783     /* The type is void, and apart from isdirectory(), the functions do nothing. */
784    
785 nigel 63 #else
786    
787 ph10 1005 #define FILESEP 0
788 nigel 53 typedef void directory_type;
789    
790 nigel 87 int isdirectory(char *filename) { return 0; }
791 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
792     char *readdirectory(directory_type *dir) { return (char*)0;}
793 nigel 53 void closedirectory(directory_type *dir) {}
794    
795 nigel 87
796 ph10 1354 /************* Test for regular file when we can't do it **********/
797 nigel 87
798     /* Assume all files are regular. */
799    
800     int isregfile(char *filename) { return 1; }
801    
802    
803 ph10 519 /************* Test for a terminal when we can't do it **********/
804 nigel 87
805     static BOOL
806     is_stdout_tty(void)
807     {
808     return FALSE;
809     }
810    
811 ph10 519 static BOOL
812     is_file_tty(FILE *f)
813     {
814     return FALSE;
815     }
816 nigel 87
817 ph10 1354 #endif /* End of system-specific functions */
818 nigel 53
819    
820    
821 ph10 137 #ifndef HAVE_STRERROR
822 nigel 49 /*************************************************
823     * Provide strerror() for non-ANSI libraries *
824     *************************************************/
825    
826     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
827     in their libraries, but can provide the same facility by this simple
828     alternative function. */
829    
830     extern int sys_nerr;
831     extern char *sys_errlist[];
832    
833     char *
834     strerror(int n)
835     {
836     if (n < 0 || n >= sys_nerr) return "unknown error number";
837     return sys_errlist[n];
838     }
839     #endif /* HAVE_STRERROR */
840    
841    
842    
843     /*************************************************
844 ph10 1039 * Usage function *
845     *************************************************/
846    
847     static int
848     usage(int rc)
849     {
850     option_item *op;
851     fprintf(stderr, "Usage: pcregrep [-");
852     for (op = optionlist; op->one_char != 0; op++)
853     {
854     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
855     }
856     fprintf(stderr, "] [long options] [pattern] [files]\n");
857     fprintf(stderr, "Type `pcregrep --help' for more information and the long "
858     "options.\n");
859     return rc;
860     }
861    
862    
863    
864     /*************************************************
865     * Help function *
866     *************************************************/
867    
868     static void
869     help(void)
870     {
871     option_item *op;
872    
873     printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
874     printf("Search for PATTERN in each FILE or standard input.\n");
875     printf("PATTERN must be present if neither -e nor -f is used.\n");
876     printf("\"-\" can be used as a file name to mean STDIN.\n");
877    
878     #ifdef SUPPORT_LIBZ
879     printf("Files whose names end in .gz are read using zlib.\n");
880     #endif
881    
882     #ifdef SUPPORT_LIBBZ2
883     printf("Files whose names end in .bz2 are read using bzlib2.\n");
884     #endif
885    
886     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
887     printf("Other files and the standard input are read as plain files.\n\n");
888     #else
889     printf("All files are read as plain files, without any interpretation.\n\n");
890     #endif
891    
892     printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
893     printf("Options:\n");
894    
895     for (op = optionlist; op->one_char != 0; op++)
896     {
897     int n;
898     char s[4];
899    
900     /* Two options were accidentally implemented and documented with underscores
901     instead of hyphens in their names, something that was not noticed for quite a
902     few releases. When fixing this, I left the underscored versions in the list
903     in case people were using them. However, we don't want to display them in the
904     help data. There are no other options that contain underscores, and we do not
905     expect ever to implement such options. Therefore, just omit any option that
906     contains an underscore. */
907    
908     if (strchr(op->long_name, '_') != NULL) continue;
909    
910     if (op->one_char > 0 && (op->long_name)[0] == 0)
911     n = 31 - printf(" -%c", op->one_char);
912     else
913     {
914     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
915     else strcpy(s, " ");
916     n = 31 - printf(" %s --%s", s, op->long_name);
917     }
918    
919     if (n < 1) n = 1;
920     printf("%.*s%s\n", n, " ", op->help_text);
921     }
922    
923     printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
924     printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
925     printf("When reading patterns or file names from a file, trailing white\n");
926     printf("space is removed and blank lines are ignored.\n");
927     printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
928    
929     printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
930     printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
931     }
932    
933    
934    
935     /*************************************************
936 ph10 1003 * Test exclude/includes *
937     *************************************************/
938    
939     /* If any exclude pattern matches, the path is excluded. Otherwise, unless
940     there are no includes, the path must match an include pattern.
941    
942     Arguments:
943     path the path to be matched
944     ip the chain of include patterns
945     ep the chain of exclude patterns
946    
947     Returns: TRUE if the path is not excluded
948     */
949    
950     static BOOL
951     test_incexc(char *path, patstr *ip, patstr *ep)
952     {
953     int plen = strlen(path);
954    
955     for (; ep != NULL; ep = ep->next)
956     {
957     if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
958     return FALSE;
959     }
960    
961     if (ip == NULL) return TRUE;
962    
963     for (; ip != NULL; ip = ip->next)
964     {
965     if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
966     return TRUE;
967     }
968    
969     return FALSE;
970     }
971    
972    
973    
974     /*************************************************
975 ph10 1039 * Decode integer argument value *
976     *************************************************/
977    
978     /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
979     because SunOS4 doesn't have it. This is used only for unpicking arguments, so
980     just keep it simple.
981    
982     Arguments:
983     option_data the option data string
984     op the option item (for error messages)
985     longop TRUE if option given in long form
986    
987     Returns: a long integer
988     */
989    
990     static long int
991     decode_number(char *option_data, option_item *op, BOOL longop)
992     {
993     unsigned long int n = 0;
994     char *endptr = option_data;
995     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
996     while (isdigit((unsigned char)(*endptr)))
997     n = n * 10 + (int)(*endptr++ - '0');
998     if (toupper(*endptr) == 'K')
999     {
1000     n *= 1024;
1001     endptr++;
1002     }
1003     else if (toupper(*endptr) == 'M')
1004     {
1005     n *= 1024*1024;
1006     endptr++;
1007     }
1008    
1009     if (*endptr != 0) /* Error */
1010     {
1011     if (longop)
1012     {
1013     char *equals = strchr(op->long_name, '=');
1014     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1015     (int)(equals - op->long_name);
1016     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1017     option_data, nlen, op->long_name);
1018     }
1019     else
1020     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1021     option_data, op->one_char);
1022     pcregrep_exit(usage(2));
1023     }
1024    
1025     return n;
1026     }
1027    
1028    
1029    
1030     /*************************************************
1031     * Add item to a chain of numbers *
1032     *************************************************/
1033    
1034     /* Used to add an item onto a chain, or just return an unconnected item if the
1035     "after" argument is NULL.
1036    
1037     Arguments:
1038     n the number to add
1039     after if not NULL points to item to insert after
1040    
1041     Returns: new number block
1042     */
1043    
1044     static omstr *
1045     add_number(int n, omstr *after)
1046     {
1047     omstr *om = (omstr *)malloc(sizeof(omstr));
1048    
1049     if (om == NULL)
1050     {
1051     fprintf(stderr, "pcregrep: malloc failed\n");
1052     pcregrep_exit(2);
1053     }
1054     om->next = NULL;
1055     om->groupnum = n;
1056    
1057     if (after != NULL)
1058     {
1059     om->next = after->next;
1060     after->next = om;
1061     }
1062     return om;
1063     }
1064    
1065    
1066    
1067     /*************************************************
1068 ph10 519 * Read one line of input *
1069     *************************************************/
1070    
1071 ph10 535 /* Normally, input is read using fread() into a large buffer, so many lines may
1072     be read at once. However, doing this for tty input means that no output appears
1073 ph10 519 until a lot of input has been typed. Instead, tty input is handled line by
1074     line. We cannot use fgets() for this, because it does not stop at a binary
1075 ph10 535 zero, and therefore there is no way of telling how many characters it has read,
1076 ph10 519 because there may be binary zeros embedded in the data.
1077    
1078     Arguments:
1079     buffer the buffer to read into
1080     length the maximum number of characters to read
1081     f the file
1082 ph10 535
1083 ph10 519 Returns: the number of characters read, zero at end of file
1084 ph10 535 */
1085 ph10 519
1086 ph10 904 static unsigned int
1087 ph10 519 read_one_line(char *buffer, int length, FILE *f)
1088     {
1089     int c;
1090     int yield = 0;
1091     while ((c = fgetc(f)) != EOF)
1092     {
1093     buffer[yield++] = c;
1094 ph10 535 if (c == '\n' || yield >= length) break;
1095     }
1096     return yield;
1097 ph10 519 }
1098    
1099    
1100    
1101     /*************************************************
1102 nigel 93 * Find end of line *
1103     *************************************************/
1104    
1105     /* The length of the endline sequence that is found is set via lenptr. This may
1106     be zero at the very end of the file if there is no line-ending sequence there.
1107    
1108     Arguments:
1109     p current position in line
1110     endptr end of available data
1111     lenptr where to put the length of the eol sequence
1112    
1113 ph10 654 Returns: pointer after the last byte of the line,
1114 ph10 644 including the newline byte(s)
1115 nigel 93 */
1116    
1117     static char *
1118     end_of_line(char *p, char *endptr, int *lenptr)
1119     {
1120     switch(endlinetype)
1121     {
1122     default: /* Just in case */
1123     case EL_LF:
1124     while (p < endptr && *p != '\n') p++;
1125     if (p < endptr)
1126     {
1127     *lenptr = 1;
1128     return p + 1;
1129     }
1130     *lenptr = 0;
1131     return endptr;
1132    
1133     case EL_CR:
1134     while (p < endptr && *p != '\r') p++;
1135     if (p < endptr)
1136     {
1137     *lenptr = 1;
1138     return p + 1;
1139     }
1140     *lenptr = 0;
1141     return endptr;
1142    
1143     case EL_CRLF:
1144     for (;;)
1145     {
1146     while (p < endptr && *p != '\r') p++;
1147     if (++p >= endptr)
1148     {
1149     *lenptr = 0;
1150     return endptr;
1151     }
1152     if (*p == '\n')
1153     {
1154     *lenptr = 2;
1155     return p + 1;
1156     }
1157     }
1158     break;
1159    
1160 ph10 149 case EL_ANYCRLF:
1161     while (p < endptr)
1162     {
1163     int extra = 0;
1164     register int c = *((unsigned char *)p);
1165    
1166     if (utf8 && c >= 0xc0)
1167     {
1168     int gcii, gcss;
1169     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1170     gcss = 6*extra;
1171     c = (c & utf8_table3[extra]) << gcss;
1172     for (gcii = 1; gcii <= extra; gcii++)
1173     {
1174     gcss -= 6;
1175     c |= (p[gcii] & 0x3f) << gcss;
1176     }
1177     }
1178    
1179     p += 1 + extra;
1180    
1181     switch (c)
1182     {
1183 ph10 1033 case '\n':
1184 ph10 149 *lenptr = 1;
1185     return p;
1186    
1187 ph10 1033 case '\r':
1188     if (p < endptr && *p == '\n')
1189 ph10 149 {
1190     *lenptr = 2;
1191     p++;
1192     }
1193     else *lenptr = 1;
1194     return p;
1195 ph10 150
1196 ph10 149 default:
1197     break;
1198     }
1199     } /* End of loop for ANYCRLF case */
1200 ph10 150
1201 ph10 149 *lenptr = 0; /* Must have hit the end */
1202     return endptr;
1203    
1204 nigel 93 case EL_ANY:
1205     while (p < endptr)
1206     {
1207     int extra = 0;
1208     register int c = *((unsigned char *)p);
1209    
1210     if (utf8 && c >= 0xc0)
1211     {
1212     int gcii, gcss;
1213     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1214     gcss = 6*extra;
1215     c = (c & utf8_table3[extra]) << gcss;
1216     for (gcii = 1; gcii <= extra; gcii++)
1217     {
1218     gcss -= 6;
1219     c |= (p[gcii] & 0x3f) << gcss;
1220     }
1221     }
1222    
1223     p += 1 + extra;
1224    
1225     switch (c)
1226     {
1227 ph10 1033 case '\n': /* LF */
1228     case '\v': /* VT */
1229     case '\f': /* FF */
1230 nigel 93 *lenptr = 1;
1231     return p;
1232    
1233 ph10 1033 case '\r': /* CR */
1234     if (p < endptr && *p == '\n')
1235 nigel 93 {
1236     *lenptr = 2;
1237     p++;
1238     }
1239     else *lenptr = 1;
1240     return p;
1241    
1242 ph10 1033 #ifndef EBCDIC
1243     case 0x85: /* Unicode NEL */
1244 nigel 93 *lenptr = utf8? 2 : 1;
1245     return p;
1246    
1247 ph10 1033 case 0x2028: /* Unicode LS */
1248     case 0x2029: /* Unicode PS */
1249 nigel 93 *lenptr = 3;
1250     return p;
1251 ph10 1039 #endif /* Not EBCDIC */
1252 nigel 93
1253     default:
1254     break;
1255     }
1256     } /* End of loop for ANY case */
1257    
1258     *lenptr = 0; /* Must have hit the end */
1259     return endptr;
1260     } /* End of overall switch */
1261     }
1262    
1263    
1264    
1265     /*************************************************
1266     * Find start of previous line *
1267     *************************************************/
1268    
1269     /* This is called when looking back for before lines to print.
1270    
1271     Arguments:
1272     p start of the subsequent line
1273     startptr start of available data
1274    
1275     Returns: pointer to the start of the previous line
1276     */
1277    
1278     static char *
1279     previous_line(char *p, char *startptr)
1280     {
1281     switch(endlinetype)
1282     {
1283     default: /* Just in case */
1284     case EL_LF:
1285     p--;
1286     while (p > startptr && p[-1] != '\n') p--;
1287     return p;
1288    
1289     case EL_CR:
1290     p--;
1291     while (p > startptr && p[-1] != '\n') p--;
1292     return p;
1293    
1294     case EL_CRLF:
1295     for (;;)
1296     {
1297     p -= 2;
1298     while (p > startptr && p[-1] != '\n') p--;
1299     if (p <= startptr + 1 || p[-2] == '\r') return p;
1300     }
1301 ph10 1467 /* Control can never get here */
1302 nigel 93
1303     case EL_ANY:
1304 ph10 150 case EL_ANYCRLF:
1305 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1306     if (utf8) while ((*p & 0xc0) == 0x80) p--;
1307    
1308     while (p > startptr)
1309     {
1310 chpe 1096 register unsigned int c;
1311 nigel 93 char *pp = p - 1;
1312    
1313     if (utf8)
1314     {
1315     int extra = 0;
1316     while ((*pp & 0xc0) == 0x80) pp--;
1317     c = *((unsigned char *)pp);
1318     if (c >= 0xc0)
1319     {
1320     int gcii, gcss;
1321     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1322     gcss = 6*extra;
1323     c = (c & utf8_table3[extra]) << gcss;
1324     for (gcii = 1; gcii <= extra; gcii++)
1325     {
1326     gcss -= 6;
1327     c |= (pp[gcii] & 0x3f) << gcss;
1328     }
1329     }
1330     }
1331     else c = *((unsigned char *)pp);
1332    
1333 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
1334 nigel 93 {
1335 ph10 1033 case '\n': /* LF */
1336     case '\r': /* CR */
1337 ph10 149 return p;
1338 ph10 150
1339 ph10 149 default:
1340     break;
1341 ph10 150 }
1342 ph10 149
1343     else switch (c)
1344     {
1345 ph10 1033 case '\n': /* LF */
1346     case '\v': /* VT */
1347     case '\f': /* FF */
1348     case '\r': /* CR */
1349 ph10 1039 #ifndef EBCDIE
1350 ph10 1033 case 0x85: /* Unicode NEL */
1351     case 0x2028: /* Unicode LS */
1352     case 0x2029: /* Unicode PS */
1353 ph10 1039 #endif /* Not EBCDIC */
1354 nigel 93 return p;
1355    
1356     default:
1357     break;
1358     }
1359    
1360     p = pp; /* Back one character */
1361     } /* End of loop for ANY case */
1362    
1363     return startptr; /* Hit start of data */
1364     } /* End of overall switch */
1365     }
1366    
1367    
1368    
1369    
1370    
1371     /*************************************************
1372 nigel 77 * Print the previous "after" lines *
1373 nigel 49 *************************************************/
1374    
1375 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
1376 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
1377     that a binary zero does not terminate it.
1378 nigel 77
1379     Arguments:
1380     lastmatchnumber the number of the last matching line, plus one
1381     lastmatchrestart where we restarted after the last match
1382     endptr end of available data
1383     printname filename for printing
1384    
1385     Returns: nothing
1386     */
1387    
1388 ph10 1003 static void
1389     do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1390     char *printname)
1391 nigel 77 {
1392     if (after_context > 0 && lastmatchnumber > 0)
1393     {
1394     int count = 0;
1395     while (lastmatchrestart < endptr && count++ < after_context)
1396     {
1397 nigel 93 int ellength;
1398 nigel 77 char *pp = lastmatchrestart;
1399     if (printname != NULL) fprintf(stdout, "%s-", printname);
1400     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1401 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1402 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1403 nigel 93 lastmatchrestart = pp;
1404 nigel 77 }
1405     hyphenpending = TRUE;
1406     }
1407     }
1408    
1409    
1410    
1411     /*************************************************
1412 ph10 378 * Apply patterns to subject till one matches *
1413     *************************************************/
1414    
1415 ph10 392 /* This function is called to run through all patterns, looking for a match. It
1416     is used multiple times for the same subject when colouring is enabled, in order
1417 ph10 378 to find all possible matches.
1418    
1419     Arguments:
1420 ph10 632 matchptr the start of the subject
1421     length the length of the subject to match
1422 ph10 1335 options options for pcre_exec
1423 ph10 632 startoffset where to start matching
1424     offsets the offets vector to fill in
1425     mrc address of where to put the result of pcre_exec()
1426 ph10 392
1427     Returns: TRUE if there was a match
1428 ph10 378 FALSE if there was no match
1429     invert if there was a non-fatal error
1430 ph10 392 */
1431 ph10 378
1432     static BOOL
1433 ph10 1335 match_patterns(char *matchptr, size_t length, unsigned int options,
1434 ph10 1324 int startoffset, int *offsets, int *mrc)
1435 ph10 378 {
1436     int i;
1437 ph10 561 size_t slen = length;
1438 ph10 1003 patstr *p = patterns;
1439 ph10 561 const char *msg = "this text:\n\n";
1440 ph10 1003
1441 ph10 561 if (slen > 200)
1442     {
1443     slen = 200;
1444     msg = "text that starts:\n\n";
1445 ph10 579 }
1446 ph10 1003 for (i = 1; p != NULL; p = p->next, i++)
1447 ph10 378 {
1448 ph10 1003 *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1449 ph10 1324 startoffset, options, offsets, OFFSET_SIZE);
1450 ph10 378 if (*mrc >= 0) return TRUE;
1451     if (*mrc == PCRE_ERROR_NOMATCH) continue;
1452 ph10 561 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1453 ph10 1003 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1454 ph10 561 fprintf(stderr, "%s", msg);
1455     FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1456     fprintf(stderr, "\n\n");
1457 ph10 685 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1458     *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1459 ph10 561 resource_error = TRUE;
1460 ph10 378 if (error_count++ > 20)
1461     {
1462 ph10 561 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1463     pcregrep_exit(2);
1464 ph10 378 }
1465     return invert; /* No more matching; don't show the line again */
1466     }
1467    
1468     return FALSE; /* No match, no errors */
1469     }
1470    
1471    
1472    
1473     /*************************************************
1474 nigel 77 * Grep an individual file *
1475     *************************************************/
1476    
1477     /* This is called from grep_or_recurse() below. It uses a buffer that is three
1478 ph10 644 times the value of bufthird. The matching point is never allowed to stray into
1479 nigel 77 the top third of the buffer, thus keeping more of the file available for
1480     context printing or for multiline scanning. For large files, the pointer will
1481     be in the middle third most of the time, so the bottom third is available for
1482     "before" context printing.
1483    
1484     Arguments:
1485 ph10 286 handle the fopened FILE stream for a normal file
1486     the gzFile pointer when reading is via libz
1487     the BZFILE pointer when reading is via libbz2
1488     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1489 ph10 644 filename the file name or NULL (for errors)
1490 nigel 77 printname the file name if it is to be printed for each match
1491     or NULL if the file name is not to be printed
1492     it cannot be NULL if filenames[_nomatch]_only is set
1493    
1494     Returns: 0 if there was at least one match
1495     1 otherwise (no matches)
1496 ph10 654 2 if an overlong line is encountered
1497 ph10 644 3 if there is a read error on a .bz2 file
1498 nigel 77 */
1499    
1500 nigel 49 static int
1501 ph10 644 pcregrep(void *handle, int frtype, char *filename, char *printname)
1502 nigel 49 {
1503     int rc = 1;
1504 nigel 77 int linenumber = 1;
1505     int lastmatchnumber = 0;
1506 nigel 49 int count = 0;
1507 ph10 280 int filepos = 0;
1508 ph10 378 int offsets[OFFSET_SIZE];
1509 nigel 77 char *lastmatchrestart = NULL;
1510 ph10 644 char *ptr = main_buffer;
1511 nigel 77 char *endptr;
1512     size_t bufflength;
1513 ph10 947 BOOL binary = FALSE;
1514 nigel 77 BOOL endhyphenpending = FALSE;
1515 ph10 519 BOOL input_line_buffered = line_buffered;
1516 ph10 286 FILE *in = NULL; /* Ensure initialized */
1517 nigel 49
1518 ph10 286 #ifdef SUPPORT_LIBZ
1519     gzFile ingz = NULL;
1520     #endif
1521 nigel 77
1522 ph10 286 #ifdef SUPPORT_LIBBZ2
1523     BZFILE *inbz2 = NULL;
1524     #endif
1525    
1526    
1527     /* Do the first read into the start of the buffer and set up the pointer to end
1528     of what we have. In the case of libz, a non-zipped .gz file will be read as a
1529     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1530     fail. */
1531    
1532 chpe 1136 (void)frtype;
1533    
1534 ph10 286 #ifdef SUPPORT_LIBZ
1535     if (frtype == FR_LIBZ)
1536     {
1537     ingz = (gzFile)handle;
1538 ph10 644 bufflength = gzread (ingz, main_buffer, bufsize);
1539 ph10 286 }
1540     else
1541     #endif
1542    
1543     #ifdef SUPPORT_LIBBZ2
1544     if (frtype == FR_LIBBZ2)
1545     {
1546     inbz2 = (BZFILE *)handle;
1547 ph10 644 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1548 ph10 286 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1549     } /* without the cast it is unsigned. */
1550     else
1551     #endif
1552    
1553     {
1554     in = (FILE *)handle;
1555 ph10 519 if (is_file_tty(in)) input_line_buffered = TRUE;
1556 ph10 535 bufflength = input_line_buffered?
1557 ph10 644 read_one_line(main_buffer, bufsize, in) :
1558     fread(main_buffer, 1, bufsize, in);
1559 ph10 286 }
1560 ph10 535
1561 ph10 644 endptr = main_buffer + bufflength;
1562 nigel 77
1563 ph10 947 /* Unless binary-files=text, see if we have a binary file. This uses the same
1564 ph10 975 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1565 ph10 947 file. */
1566    
1567     if (binary_files != BIN_TEXT)
1568     {
1569 ph10 975 binary =
1570 ph10 947 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1571     if (binary && binary_files == BIN_NOMATCH) return 1;
1572 ph10 975 }
1573 ph10 947
1574 nigel 77 /* Loop while the current pointer is not at the end of the file. For large
1575     files, endptr will be at the end of the buffer when we are in the middle of the
1576     file, but ptr will never get there, because as soon as it gets over 2/3 of the
1577     way, the buffer is shifted left and re-filled. */
1578    
1579     while (ptr < endptr)
1580 nigel 49 {
1581 ph10 378 int endlinelength;
1582 nigel 87 int mrc = 0;
1583 ph10 654 int startoffset = 0;
1584 ph10 1335 unsigned int options = 0;
1585 ph10 378 BOOL match;
1586 ph10 286 char *matchptr = ptr;
1587 nigel 77 char *t = ptr;
1588     size_t length, linelength;
1589 nigel 49
1590 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
1591     of the subject string to pass to pcre_exec(). In multiline mode, it is the
1592     length remainder of the data in the buffer. Otherwise, it is the length of
1593 ph10 378 the next line, excluding the terminating newline. After matching, we always
1594     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1595     option is used for compiling, so that any match is constrained to be in the
1596     first line. */
1597 nigel 77
1598 nigel 93 t = end_of_line(t, endptr, &endlinelength);
1599     linelength = t - ptr - endlinelength;
1600 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
1601 ph10 654
1602     /* Check to see if the line we are looking at extends right to the very end
1603     of the buffer without a line terminator. This means the line is too long to
1604 ph10 644 handle. */
1605 ph10 654
1606 ph10 644 if (endlinelength == 0 && t == main_buffer + bufsize)
1607     {
1608     fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1609 ph10 646 "pcregrep: check the --buffer-size option\n",
1610 ph10 654 linenumber,
1611 ph10 644 (filename == NULL)? "" : " of file ",
1612     (filename == NULL)? "" : filename);
1613     return 2;
1614 ph10 654 }
1615 nigel 77
1616 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
1617    
1618     #ifdef JFRIEDL_DEBUG
1619     if (jfriedl_XT || jfriedl_XR)
1620     {
1621 zherczeg 1216 # include <sys/time.h>
1622     # include <time.h>
1623 nigel 89 struct timeval start_time, end_time;
1624     struct timezone dummy;
1625 ph10 392 int i;
1626 nigel 89
1627     if (jfriedl_XT)
1628     {
1629     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1630     const char *orig = ptr;
1631     ptr = malloc(newlen + 1);
1632     if (!ptr) {
1633     printf("out of memory");
1634 ph10 561 pcregrep_exit(2);
1635 nigel 89 }
1636     endptr = ptr;
1637     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1638     for (i = 0; i < jfriedl_XT; i++) {
1639     strncpy(endptr, orig, length);
1640     endptr += length;
1641     }
1642     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1643     length = newlen;
1644     }
1645    
1646     if (gettimeofday(&start_time, &dummy) != 0)
1647     perror("bad gettimeofday");
1648    
1649    
1650     for (i = 0; i < jfriedl_XR; i++)
1651 ph10 1003 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1652 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1653 nigel 89
1654     if (gettimeofday(&end_time, &dummy) != 0)
1655     perror("bad gettimeofday");
1656    
1657     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1658     -
1659     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1660    
1661     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1662     return 0;
1663     }
1664     #endif
1665    
1666 ph10 1039 /* We come back here after a match when show_only_matching is set, in order
1667     to find any further matches in the same line. This applies to
1668     --only-matching, --file-offsets, and --line-offsets. */
1669 nigel 89
1670 ph10 286 ONLY_MATCHING_RESTART:
1671    
1672 ph10 392 /* Run through all the patterns until one matches or there is an error other
1673 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1674 ph10 1335 finding subsequent matches when colouring matched lines. After finding one
1675     match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1676 ph10 1324 this line. */
1677 ph10 392
1678 ph10 1324 match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1679     options = PCRE_NOTEMPTY;
1680 nigel 77
1681 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1682 nigel 77
1683 nigel 49 if (match != invert)
1684     {
1685 nigel 77 BOOL hyphenprinted = FALSE;
1686    
1687 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1688 nigel 77
1689 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1690    
1691     /* Just count if just counting is wanted. */
1692    
1693 nigel 49 if (count_only) count++;
1694 ph10 975
1695     /* When handling a binary file and binary-files==binary, the "binary"
1696     variable will be set true (it's false in all other cases). In this
1697 ph10 947 situation we just want to output the file name. No need to scan further. */
1698 ph10 975
1699 ph10 947 else if (binary)
1700     {
1701     fprintf(stdout, "Binary file %s matches\n", filename);
1702 ph10 975 return 0;
1703     }
1704 nigel 49
1705 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1706     in the file. */
1707    
1708 ph10 420 else if (filenames == FN_MATCH_ONLY)
1709 nigel 49 {
1710 nigel 77 fprintf(stdout, "%s\n", printname);
1711 nigel 49 return 0;
1712     }
1713    
1714 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1715    
1716 nigel 77 else if (quiet) return 0;
1717 nigel 49
1718 ph10 1039 /* The --only-matching option prints just the substring that matched,
1719     and/or one or more captured portions of it, as long as these strings are
1720     not empty. The --file-offsets and --line-offsets options output offsets for
1721     the matching substring (all three set show_only_matching). None of these
1722     mutually exclusive options prints any context. Afterwards, adjust the start
1723     and then jump back to look for further matches in the same line. If we are
1724     in invert mode, however, nothing is printed and we do not restart - this
1725     could still be useful because the return code is set. */
1726 nigel 87
1727 ph10 1039 else if (show_only_matching)
1728 nigel 87 {
1729 ph10 279 if (!invert)
1730 ph10 286 {
1731 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1732     if (number) fprintf(stdout, "%d:", linenumber);
1733 ph10 1039
1734     /* Handle --line-offsets */
1735    
1736 ph10 280 if (line_offsets)
1737 ph10 565 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1738 ph10 286 offsets[1] - offsets[0]);
1739 ph10 1039
1740     /* Handle --file-offsets */
1741    
1742 ph10 280 else if (file_offsets)
1743 ph10 579 fprintf(stdout, "%d,%d\n",
1744 ph10 565 (int)(filepos + matchptr + offsets[0] - ptr),
1745 ph10 286 offsets[1] - offsets[0]);
1746 ph10 1039
1747     /* Handle --only-matching, which may occur many times */
1748    
1749     else
1750 ph10 377 {
1751 ph10 1039 BOOL printed = FALSE;
1752     omstr *om;
1753 ph10 1221
1754 ph10 1039 for (om = only_matching; om != NULL; om = om->next)
1755 ph10 579 {
1756 ph10 1039 int n = om->groupnum;
1757     if (n < mrc)
1758     {
1759     int plen = offsets[2*n + 1] - offsets[2*n];
1760     if (plen > 0)
1761     {
1762 ph10 1221 if (printed) fprintf(stdout, "%s", om_separator);
1763 ph10 1039 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1764     FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1765     if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1766     printed = TRUE;
1767     }
1768     }
1769 ph10 579 }
1770 ph10 1221
1771 ph10 1039 if (printed || printname != NULL || number) fprintf(stdout, "\n");
1772 ph10 392 }
1773 ph10 1039
1774     /* Prepare to repeat to find the next match */
1775    
1776 ph10 286 match = FALSE;
1777 ph10 564 if (line_buffered) fflush(stdout);
1778 ph10 636 rc = 0; /* Had some success */
1779     startoffset = offsets[1]; /* Restart after the match */
1780 ph10 286 goto ONLY_MATCHING_RESTART;
1781     }
1782 nigel 87 }
1783    
1784     /* This is the default case when none of the above options is set. We print
1785     the matching lines(s), possibly preceded and/or followed by other lines of
1786     context. */
1787    
1788 nigel 49 else
1789     {
1790 nigel 77 /* See if there is a requirement to print some "after" lines from a
1791     previous match. We never print any overlaps. */
1792    
1793     if (after_context > 0 && lastmatchnumber > 0)
1794     {
1795 nigel 93 int ellength;
1796 nigel 77 int linecount = 0;
1797     char *p = lastmatchrestart;
1798    
1799     while (p < ptr && linecount < after_context)
1800     {
1801 nigel 93 p = end_of_line(p, ptr, &ellength);
1802 nigel 77 linecount++;
1803     }
1804    
1805     /* It is important to advance lastmatchrestart during this printing so
1806 nigel 87 that it interacts correctly with any "before" printing below. Print
1807     each line's data using fwrite() in case there are binary zeroes. */
1808 nigel 77
1809     while (lastmatchrestart < p)
1810     {
1811     char *pp = lastmatchrestart;
1812     if (printname != NULL) fprintf(stdout, "%s-", printname);
1813     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1814 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1815 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1816 nigel 93 lastmatchrestart = pp;
1817 nigel 77 }
1818     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1819     }
1820    
1821     /* If there were non-contiguous lines printed above, insert hyphens. */
1822    
1823     if (hyphenpending)
1824     {
1825     fprintf(stdout, "--\n");
1826     hyphenpending = FALSE;
1827     hyphenprinted = TRUE;
1828     }
1829    
1830     /* See if there is a requirement to print some "before" lines for this
1831     match. Again, don't print overlaps. */
1832    
1833     if (before_context > 0)
1834     {
1835     int linecount = 0;
1836     char *p = ptr;
1837    
1838 ph10 644 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1839 nigel 87 linecount < before_context)
1840 nigel 77 {
1841 nigel 87 linecount++;
1842 ph10 644 p = previous_line(p, main_buffer);
1843 nigel 77 }
1844    
1845     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1846     fprintf(stdout, "--\n");
1847    
1848     while (p < ptr)
1849     {
1850 nigel 93 int ellength;
1851 nigel 77 char *pp = p;
1852     if (printname != NULL) fprintf(stdout, "%s-", printname);
1853     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1854 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1855 ph10 515 FWRITE(p, 1, pp - p, stdout);
1856 nigel 93 p = pp;
1857 nigel 77 }
1858     }
1859    
1860     /* Now print the matching line(s); ensure we set hyphenpending at the end
1861 nigel 85 of the file if any context lines are being output. */
1862 nigel 77
1863 nigel 85 if (after_context > 0 || before_context > 0)
1864     endhyphenpending = TRUE;
1865    
1866 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1867 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1868 nigel 77
1869     /* In multiline mode, we want to print to the end of the line in which
1870     the end of the matched string is found, so we adjust linelength and the
1871 ph10 222 line number appropriately, but only when there actually was a match
1872     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1873     the match will always be before the first newline sequence. */
1874 nigel 77
1875 ph10 587 if (multiline & !invert)
1876 nigel 77 {
1877 ph10 587 char *endmatch = ptr + offsets[1];
1878     t = ptr;
1879 ph10 1353 while (t <= endmatch)
1880 nigel 93 {
1881 ph10 587 t = end_of_line(t, endptr, &endlinelength);
1882     if (t < endmatch) linenumber++; else break;
1883 nigel 93 }
1884 ph10 587 linelength = t - ptr - endlinelength;
1885 nigel 77 }
1886    
1887 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1888     zeroes are treated as just another data character. */
1889    
1890     /* This extra option, for Jeffrey Friedl's debugging requirements,
1891     replaces the matched string, or a specific captured string if it exists,
1892     with X. When this happens, colouring is ignored. */
1893    
1894     #ifdef JFRIEDL_DEBUG
1895     if (S_arg >= 0 && S_arg < mrc)
1896     {
1897     int first = S_arg * 2;
1898     int last = first + 1;
1899 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1900 nigel 87 fprintf(stdout, "X");
1901 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1902 nigel 87 }
1903     else
1904     #endif
1905    
1906 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1907 ph10 585 matches, but not of course if the line is a non-match. */
1908 ph10 589
1909 ph10 585 if (do_colour && !invert)
1910 nigel 87 {
1911 ph10 589 int plength;
1912 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1913 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1914 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1915 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1916 ph10 378 for (;;)
1917     {
1918 ph10 632 startoffset = offsets[1];
1919 ph10 718 if (startoffset >= (int)linelength + endlinelength ||
1920 ph10 1324 !match_patterns(matchptr, length, options, startoffset, offsets,
1921     &mrc))
1922 ph10 632 break;
1923     FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1924 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1925 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1926 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1927     }
1928 ph10 587
1929     /* In multiline mode, we may have already printed the complete line
1930 ph10 589 and its line-ending characters (if they matched the pattern), so there
1931 ph10 587 may be no more to print. */
1932 ph10 589
1933 ph10 836 plength = (int)((linelength + endlinelength) - startoffset);
1934 ph10 636 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1935 nigel 87 }
1936 ph10 392
1937 ph10 378 /* Not colouring; no need to search for further matches */
1938 ph10 392
1939 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1940 nigel 49 }
1941    
1942 ph10 519 /* End of doing what has to be done for a match. If --line-buffered was
1943     given, flush the output. */
1944 nigel 87
1945 ph10 519 if (line_buffered) fflush(stdout);
1946 nigel 77 rc = 0; /* Had some success */
1947    
1948     /* Remember where the last match happened for after_context. We remember
1949     where we are about to restart, and that line's number. */
1950    
1951 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1952 nigel 77 lastmatchnumber = linenumber + 1;
1953 nigel 49 }
1954 nigel 77
1955 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1956     anything to be printed), we have to move on to the end of the match before
1957     proceeding. */
1958    
1959     if (multiline && invert && match)
1960     {
1961     int ellength;
1962     char *endmatch = ptr + offsets[1];
1963     t = ptr;
1964     while (t < endmatch)
1965     {
1966     t = end_of_line(t, endptr, &ellength);
1967     if (t <= endmatch) linenumber++; else break;
1968     }
1969     endmatch = end_of_line(endmatch, endptr, &ellength);
1970     linelength = endmatch - ptr - ellength;
1971     }
1972    
1973 ph10 286 /* Advance to after the newline and increment the line number. The file
1974 ph10 280 offset to the current line is maintained in filepos. */
1975 nigel 77
1976 nigel 93 ptr += linelength + endlinelength;
1977 ph10 530 filepos += (int)(linelength + endlinelength);
1978 nigel 77 linenumber++;
1979 ph10 535
1980     /* If input is line buffered, and the buffer is not yet full, read another
1981 ph10 519 line and add it into the buffer. */
1982 ph10 535
1983 ph10 718 if (input_line_buffered && bufflength < (size_t)bufsize)
1984 ph10 519 {
1985 ph10 836 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1986 ph10 519 bufflength += add;
1987 ph10 535 endptr += add;
1988     }
1989 nigel 77
1990     /* If we haven't yet reached the end of the file (the buffer is full), and
1991     the current point is in the top 1/3 of the buffer, slide the buffer down by
1992     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1993     about to be lost, print them. */
1994    
1995 ph10 718 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1996 nigel 77 {
1997     if (after_context > 0 &&
1998     lastmatchnumber > 0 &&
1999 ph10 644 lastmatchrestart < main_buffer + bufthird)
2000 nigel 77 {
2001     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2002     lastmatchnumber = 0;
2003     }
2004    
2005     /* Now do the shuffle */
2006    
2007 ph10 644 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2008     ptr -= bufthird;
2009 ph10 286
2010     #ifdef SUPPORT_LIBZ
2011     if (frtype == FR_LIBZ)
2012 ph10 644 bufflength = 2*bufthird +
2013     gzread (ingz, main_buffer + 2*bufthird, bufthird);
2014 ph10 286 else
2015     #endif
2016    
2017     #ifdef SUPPORT_LIBBZ2
2018     if (frtype == FR_LIBBZ2)
2019 ph10 644 bufflength = 2*bufthird +
2020     BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2021 ph10 286 else
2022     #endif
2023    
2024 ph10 644 bufflength = 2*bufthird +
2025 ph10 535 (input_line_buffered?
2026 ph10 644 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2027     fread(main_buffer + 2*bufthird, 1, bufthird, in));
2028     endptr = main_buffer + bufflength;
2029 nigel 77
2030     /* Adjust any last match point */
2031    
2032 ph10 644 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2033 nigel 77 }
2034     } /* Loop through the whole file */
2035    
2036     /* End of file; print final "after" lines if wanted; do_after_lines sets
2037     hyphenpending if it prints something. */
2038    
2039 ph10 1039 if (!show_only_matching && !count_only)
2040 nigel 87 {
2041     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2042     hyphenpending |= endhyphenpending;
2043     }
2044 nigel 77
2045     /* Print the file name if we are looking for those without matches and there
2046     were none. If we found a match, we won't have got this far. */
2047    
2048 nigel 87 if (filenames == FN_NOMATCH_ONLY)
2049 nigel 77 {
2050     fprintf(stdout, "%s\n", printname);
2051     return 0;
2052 nigel 49 }
2053    
2054 nigel 77 /* Print the match count if wanted */
2055    
2056 nigel 49 if (count_only)
2057     {
2058 ph10 420 if (count > 0 || !omit_zero_count)
2059 ph10 461 {
2060     if (printname != NULL && filenames != FN_NONE)
2061 ph10 420 fprintf(stdout, "%s:", printname);
2062     fprintf(stdout, "%d\n", count);
2063 ph10 461 }
2064 nigel 49 }
2065    
2066     return rc;
2067     }
2068    
2069    
2070    
2071     /*************************************************
2072 nigel 53 * Grep a file or recurse into a directory *
2073     *************************************************/
2074    
2075 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
2076     recursing; if it's a file, grep it.
2077    
2078     Arguments:
2079     pathname the path to investigate
2080 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
2081 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
2082    
2083 ph10 1003 Returns: -1 the file/directory was skipped
2084     0 if there was at least one match
2085 nigel 77 1 if there were no matches
2086     2 there was some kind of error
2087    
2088     However, file opening failures are suppressed if "silent" is set.
2089     */
2090    
2091 nigel 53 static int
2092 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2093 nigel 53 {
2094     int rc = 1;
2095 ph10 286 int frtype;
2096     void *handle;
2097 ph10 1003 char *lastcomp;
2098 ph10 286 FILE *in = NULL; /* Ensure initialized */
2099 nigel 53
2100 ph10 286 #ifdef SUPPORT_LIBZ
2101     gzFile ingz = NULL;
2102     #endif
2103    
2104     #ifdef SUPPORT_LIBBZ2
2105     BZFILE *inbz2 = NULL;
2106     #endif
2107    
2108 ph10 971 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2109 ph10 879 int pathlen;
2110     #endif
2111    
2112 ph10 1354 #if defined NATIVE_ZOS
2113     int zos_type;
2114     FILE *zos_test_file;
2115     #endif
2116    
2117 nigel 77 /* If the file name is "-" we scan stdin */
2118 nigel 53
2119 nigel 77 if (strcmp(pathname, "-") == 0)
2120 nigel 53 {
2121 ph10 644 return pcregrep(stdin, FR_PLAIN, stdin_name,
2122 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2123 nigel 77 stdin_name : NULL);
2124     }
2125    
2126 ph10 1003 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2127     directories, whereas --include and --exclude apply to everything else. The test
2128     is against the final component of the path. */
2129 nigel 87
2130 ph10 1003 lastcomp = strrchr(pathname, FILESEP);
2131     lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2132    
2133     /* If the file is a directory, skip if not recursing or if explicitly excluded.
2134     Otherwise, scan the directory and recurse for each path within it. The scanning
2135     code is localized so it can be made system-specific. */
2136    
2137 ph10 1354
2138     /* For z/OS, determine the file type. */
2139    
2140     #if defined NATIVE_ZOS
2141     zos_test_file = fopen(pathname,"rb");
2142    
2143     if (zos_test_file == NULL)
2144     {
2145 ph10 1404 if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
2146 ph10 1354 pathname, strerror(errno));
2147     return -1;
2148     }
2149     zos_type = identifyzosfiletype (zos_test_file);
2150     fclose (zos_test_file);
2151    
2152     /* Handle a PDS in separate code */
2153    
2154     if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2155     {
2156 ph10 1355 return travelonpdsdir (pathname, only_one_at_top);
2157 ph10 1354 }
2158    
2159     /* Deal with regular files in the normal way below. These types are:
2160     zos_type == __ZOS_PDS_MEMBER
2161     zos_type == __ZOS_PS
2162     zos_type == __ZOS_VSAM_KSDS
2163     zos_type == __ZOS_VSAM_ESDS
2164     zos_type == __ZOS_VSAM_RRDS
2165     */
2166    
2167     /* Handle a z/OS directory using common code. */
2168    
2169     else if (zos_type == __ZOS_HFS)
2170     {
2171     #endif /* NATIVE_ZOS */
2172    
2173    
2174     /* Handle directories: common code for all OS */
2175    
2176 ph10 1003 if (isdirectory(pathname))
2177 nigel 77 {
2178 ph10 1003 if (dee_action == dee_SKIP ||
2179     !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2180     return -1;
2181    
2182 nigel 87 if (dee_action == dee_RECURSE)
2183 nigel 53 {
2184 nigel 87 char buffer[1024];
2185     char *nextfile;
2186     directory_type *dir = opendirectory(pathname);
2187 nigel 53
2188 nigel 87 if (dir == NULL)
2189     {
2190     if (!silent)
2191     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2192     strerror(errno));
2193     return 2;
2194     }
2195 nigel 77
2196 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
2197     {
2198 ph10 1003 int frc;
2199     sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2200 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2201     if (frc > 1) rc = frc;
2202     else if (frc == 0 && rc == 1) rc = 0;
2203     }
2204    
2205     closedirectory(dir);
2206     return rc;
2207 nigel 53 }
2208     }
2209    
2210 ph10 1354 #if defined NATIVE_ZOS
2211     }
2212     #endif
2213 nigel 53
2214 ph10 1354 /* If the file is not a directory, check for a regular file, and if it is not,
2215     skip it if that's been requested. Otherwise, check for an explicit inclusion or
2216     exclusion. */
2217 nigel 87
2218 ph10 1354 else if (
2219     #if defined NATIVE_ZOS
2220     (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2221     #else /* all other OS */
2222     (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2223     #endif
2224     !test_incexc(lastcomp, include_patterns, exclude_patterns))
2225     return -1; /* File skipped */
2226    
2227 nigel 87 /* Control reaches here if we have a regular file, or if we have a directory
2228     and recursion or skipping was not requested, or if we have anything else and
2229     skipping was not requested. The scan proceeds. If this is the first and only
2230     argument at top level, we don't show the file name, unless we are only showing
2231     the file name, or the filename was forced (-H). */
2232    
2233 ph10 971 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2234 ph10 530 pathlen = (int)(strlen(pathname));
2235 ph10 879 #endif
2236 ph10 286
2237     /* Open using zlib if it is supported and the file name ends with .gz. */
2238    
2239     #ifdef SUPPORT_LIBZ
2240     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2241 nigel 53 {
2242 ph10 286 ingz = gzopen(pathname, "rb");
2243     if (ingz == NULL)
2244     {
2245     if (!silent)
2246     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2247     strerror(errno));
2248     return 2;
2249     }
2250     handle = (void *)ingz;
2251     frtype = FR_LIBZ;
2252     }
2253     else
2254     #endif
2255    
2256     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2257    
2258     #ifdef SUPPORT_LIBBZ2
2259     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2260     {
2261     inbz2 = BZ2_bzopen(pathname, "rb");
2262     handle = (void *)inbz2;
2263     frtype = FR_LIBBZ2;
2264     }
2265     else
2266     #endif
2267    
2268     /* Otherwise use plain fopen(). The label is so that we can come back here if
2269     an attempt to read a .bz2 file indicates that it really is a plain file. */
2270    
2271     #ifdef SUPPORT_LIBBZ2
2272     PLAIN_FILE:
2273     #endif
2274     {
2275 ph10 419 in = fopen(pathname, "rb");
2276 ph10 286 handle = (void *)in;
2277     frtype = FR_PLAIN;
2278     }
2279    
2280     /* All the opening methods return errno when they fail. */
2281    
2282     if (handle == NULL)
2283     {
2284 nigel 77 if (!silent)
2285     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2286     strerror(errno));
2287 nigel 53 return 2;
2288     }
2289    
2290 ph10 286 /* Now grep the file */
2291    
2292 ph10 644 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2293 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2294 nigel 77
2295 ph10 286 /* Close in an appropriate manner. */
2296    
2297     #ifdef SUPPORT_LIBZ
2298     if (frtype == FR_LIBZ)
2299     gzclose(ingz);
2300     else
2301     #endif
2302    
2303 ph10 644 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2304 ph10 286 read failed. If the error indicates that the file isn't in fact bzipped, try
2305     again as a normal file. */
2306    
2307     #ifdef SUPPORT_LIBBZ2
2308     if (frtype == FR_LIBBZ2)
2309     {
2310 ph10 644 if (rc == 3)
2311 ph10 286 {
2312     int errnum;
2313     const char *err = BZ2_bzerror(inbz2, &errnum);
2314     if (errnum == BZ_DATA_ERROR_MAGIC)
2315     {
2316     BZ2_bzclose(inbz2);
2317     goto PLAIN_FILE;
2318     }
2319     else if (!silent)
2320     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2321     pathname, err);
2322 ph10 654 rc = 2; /* The normal "something went wrong" code */
2323 ph10 286 }
2324     BZ2_bzclose(inbz2);
2325     }
2326     else
2327     #endif
2328    
2329     /* Normal file close */
2330    
2331 nigel 53 fclose(in);
2332 ph10 286
2333     /* Pass back the yield from pcregrep(). */
2334    
2335 nigel 53 return rc;
2336     }
2337    
2338    
2339    
2340     /*************************************************
2341 nigel 77 * Handle a single-letter, no data option *
2342 nigel 53 *************************************************/
2343    
2344     static int
2345     handle_option(int letter, int options)
2346     {
2347     switch(letter)
2348     {
2349 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
2350 ph10 561 case N_HELP: help(); pcregrep_exit(0);
2351 ph10 685 case N_LBUFFER: line_buffered = TRUE; break;
2352 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
2353 ph10 691 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2354 ph10 947 case 'a': binary_files = BIN_TEXT; break;
2355 nigel 53 case 'c': count_only = TRUE; break;
2356 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
2357     case 'H': filenames = FN_FORCE; break;
2358 ph10 947 case 'I': binary_files = BIN_NOMATCH; break;
2359 nigel 87 case 'h': filenames = FN_NONE; break;
2360 nigel 53 case 'i': options |= PCRE_CASELESS; break;
2361 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2362 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
2363 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2364 nigel 53 case 'n': number = TRUE; break;
2365 ph10 1221
2366 ph10 1039 case 'o':
2367     only_matching_last = add_number(0, only_matching_last);
2368     if (only_matching == NULL) only_matching = only_matching_last;
2369     break;
2370 ph10 1221
2371 nigel 77 case 'q': quiet = TRUE; break;
2372 nigel 87 case 'r': dee_action = dee_RECURSE; break;
2373 nigel 53 case 's': silent = TRUE; break;
2374 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2375 nigel 53 case 'v': invert = TRUE; break;
2376 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
2377     case 'x': process_options |= PO_LINE_MATCH; break;
2378 nigel 53
2379     case 'V':
2380 ph10 1003 fprintf(stdout, "pcregrep version %s\n", pcre_version());
2381 ph10 561 pcregrep_exit(0);
2382 nigel 53 break;
2383    
2384     default:
2385     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2386 ph10 561 pcregrep_exit(usage(2));
2387 nigel 53 }
2388    
2389     return options;
2390     }
2391    
2392    
2393    
2394    
2395     /*************************************************
2396 nigel 87 * Construct printed ordinal *
2397     *************************************************/
2398    
2399     /* This turns a number into "1st", "3rd", etc. */
2400    
2401     static char *
2402     ordin(int n)
2403     {
2404     static char buffer[8];
2405     char *p = buffer;
2406     sprintf(p, "%d", n);
2407     while (*p != 0) p++;
2408     switch (n%10)
2409     {
2410     case 1: strcpy(p, "st"); break;
2411     case 2: strcpy(p, "nd"); break;
2412     case 3: strcpy(p, "rd"); break;
2413     default: strcpy(p, "th"); break;
2414     }
2415     return buffer;
2416     }
2417    
2418    
2419    
2420     /*************************************************
2421     * Compile a single pattern *
2422     *************************************************/
2423    
2424 ph10 1003 /* Do nothing if the pattern has already been compiled. This is the case for
2425     include/exclude patterns read from a file.
2426 nigel 87
2427 ph10 1003 When the -F option has been used, each "pattern" may be a list of strings,
2428     separated by line breaks. They will be matched literally. We split such a
2429     string and compile the first substring, inserting an additional block into the
2430     pattern chain.
2431    
2432 nigel 87 Arguments:
2433 ph10 1003 p points to the pattern block
2434 nigel 87 options the PCRE options
2435 ph10 1003 popts the processing options
2436     fromfile TRUE if the pattern was read from a file
2437     fromtext file name or identifying text (e.g. "include")
2438 nigel 87 count 0 if this is the only command line pattern, or
2439     number of the command line pattern, or
2440     linenumber for a pattern from a file
2441    
2442     Returns: TRUE on success, FALSE after an error
2443     */
2444    
2445     static BOOL
2446 ph10 1003 compile_pattern(patstr *p, int options, int popts, int fromfile,
2447     const char *fromtext, int count)
2448 nigel 87 {
2449 ph10 644 char buffer[PATBUFSIZE];
2450 nigel 87 const char *error;
2451 ph10 1003 char *ps = p->string;
2452     int patlen = strlen(ps);
2453 nigel 87 int errptr;
2454    
2455 ph10 1003 if (p->compiled != NULL) return TRUE;
2456    
2457     if ((popts & PO_FIXED_STRINGS) != 0)
2458 nigel 87 {
2459 ph10 1003 int ellength;
2460     char *eop = ps + patlen;
2461     char *pe = end_of_line(ps, eop, &ellength);
2462 nigel 87
2463 ph10 1003 if (ellength != 0)
2464     {
2465     if (add_pattern(pe, p) == NULL) return FALSE;
2466     patlen = (int)(pe - ps - ellength);
2467     }
2468 ph10 142 }
2469 nigel 87
2470 ph10 1003 sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2471     p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2472     if (p->compiled != NULL) return TRUE;
2473    
2474 nigel 87 /* Handle compile errors */
2475    
2476 ph10 1003 errptr -= (int)strlen(prefix[popts]);
2477     if (errptr > patlen) errptr = patlen;
2478 nigel 87
2479 ph10 1003 if (fromfile)
2480 nigel 87 {
2481 ph10 1003 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2482     "at offset %d: %s\n", count, fromtext, errptr, error);
2483 nigel 87 }
2484     else
2485     {
2486 ph10 1003 if (count == 0)
2487     fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2488     fromtext, errptr, error);
2489     else
2490     fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2491     ordin(count), fromtext, errptr, error);
2492 nigel 87 }
2493    
2494     return FALSE;
2495     }
2496    
2497    
2498    
2499     /*************************************************
2500 ph10 1003 * Read and compile a file of patterns *
2501 nigel 87 *************************************************/
2502    
2503 ph10 1003 /* This is used for --filelist, --include-from, and --exclude-from.
2504 nigel 87
2505     Arguments:
2506 ph10 1003 name the name of the file; "-" is stdin
2507     patptr pointer to the pattern chain anchor
2508     patlastptr pointer to the last pattern pointer
2509     popts the process options to pass to pattern_compile()
2510 nigel 87
2511 ph10 1003 Returns: TRUE if all went well
2512 nigel 87 */
2513    
2514     static BOOL
2515 ph10 1003 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2516 nigel 87 {
2517 ph10 1003 int linenumber = 0;
2518     FILE *f;
2519     char *filename;
2520     char buffer[PATBUFSIZE];
2521    
2522     if (strcmp(name, "-") == 0)
2523 nigel 87 {
2524 ph10 1003 f = stdin;
2525     filename = stdin_name;
2526     }
2527     else
2528     {
2529     f = fopen(name, "r");
2530     if (f == NULL)
2531     {
2532     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2533     return FALSE;
2534     }
2535     filename = name;
2536     }
2537    
2538     while (fgets(buffer, PATBUFSIZE, f) != NULL)
2539     {
2540     char *s = buffer + (int)strlen(buffer);
2541     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2542     *s = 0;
2543     linenumber++;
2544     if (buffer[0] == 0) continue; /* Skip blank lines */
2545    
2546     /* Note: this call to add_pattern() puts a pointer to the local variable
2547     "buffer" into the pattern chain. However, that pointer is used only when
2548     compiling the pattern, which happens immediately below, so we flatten it
2549     afterwards, as a precaution against any later code trying to use it. */
2550    
2551     *patlastptr = add_pattern(buffer, *patlastptr);
2552     if (*patlastptr == NULL) return FALSE;
2553     if (*patptr == NULL) *patptr = *patlastptr;
2554    
2555     /* This loop is needed because compiling a "pattern" when -F is set may add
2556     on additional literal patterns if the original contains a newline. In the
2557     common case, it never will, because fgets() stops at a newline. However,
2558     the -N option can be used to give pcregrep a different newline setting. */
2559    
2560 nigel 87 for(;;)
2561     {
2562 ph10 1003 if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2563     linenumber))
2564 nigel 87 return FALSE;
2565 ph10 1003 (*patlastptr)->string = NULL; /* Insurance */
2566     if ((*patlastptr)->next == NULL) break;
2567     *patlastptr = (*patlastptr)->next;
2568 nigel 87 }
2569     }
2570 ph10 1003
2571     if (f != stdin) fclose(f);
2572     return TRUE;
2573 nigel 87 }
2574    
2575    
2576    
2577     /*************************************************
2578 nigel 49 * Main program *
2579     *************************************************/
2580    
2581 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2582    
2583 nigel 49 int
2584     main(int argc, char **argv)
2585     {
2586 nigel 53 int i, j;
2587 nigel 49 int rc = 1;
2588 nigel 87 BOOL only_one_at_top;
2589 ph10 1003 patstr *cp;
2590     fnstr *fn;
2591 nigel 87 const char *locale_from = "--locale";
2592 nigel 49 const char *error;
2593    
2594 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
2595     pcre_jit_stack *jit_stack = NULL;
2596     #endif
2597    
2598 nigel 93 /* Set the default line ending value from the default in the PCRE library;
2599     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2600 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
2601 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
2602 ph10 391 rather than escapes such as as '\r'. */
2603 nigel 91
2604     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2605     switch(i)
2606     {
2607 ph10 391 default: newline = (char *)"lf"; break;
2608     case 13: newline = (char *)"cr"; break;
2609     case (13 << 8) | 10: newline = (char *)"crlf"; break;
2610     case -1: newline = (char *)"any"; break;
2611     case -2: newline = (char *)"anycrlf"; break;
2612 nigel 91 }
2613    
2614 nigel 49 /* Process the options */
2615    
2616     for (i = 1; i < argc; i++)
2617     {
2618 nigel 77 option_item *op = NULL;
2619     char *option_data = (char *)""; /* default to keep compiler happy */
2620     BOOL longop;
2621     BOOL longopwasequals = FALSE;
2622    
2623 nigel 49 if (argv[i][0] != '-') break;
2624 nigel 53
2625 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2626 nigel 87 but only if we have previously had -e or -f to define the patterns. */
2627 nigel 63
2628 nigel 77 if (argv[i][1] == 0)
2629     {
2630 ph10 1003 if (pattern_files != NULL || patterns != NULL) break;
2631 ph10 561 else pcregrep_exit(usage(2));
2632 nigel 77 }
2633 nigel 63
2634 nigel 77 /* Handle a long name option, or -- to terminate the options */
2635 nigel 53
2636     if (argv[i][1] == '-')
2637 nigel 49 {
2638 nigel 77 char *arg = argv[i] + 2;
2639     char *argequals = strchr(arg, '=');
2640 nigel 53
2641 nigel 77 if (*arg == 0) /* -- terminates options */
2642 nigel 49 {
2643 nigel 77 i++;
2644     break; /* out of the options-handling loop */
2645 nigel 53 }
2646 nigel 49
2647 nigel 77 longop = TRUE;
2648    
2649     /* Some long options have data that follows after =, for example file=name.
2650     Some options have variations in the long name spelling: specifically, we
2651     allow "regexp" because GNU grep allows it, though I personally go along
2652 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2653 ph10 422 These options are entered in the table as "regex(p)". Options can be in
2654     both these categories. */
2655 nigel 77
2656 nigel 53 for (op = optionlist; op->one_char != 0; op++)
2657     {
2658 nigel 77 char *opbra = strchr(op->long_name, '(');
2659     char *equals = strchr(op->long_name, '=');
2660 ph10 461
2661 ph10 422 /* Handle options with only one spelling of the name */
2662 ph10 461
2663 ph10 422 if (opbra == NULL) /* Does not contain '(' */
2664 nigel 53 {
2665 nigel 77 if (equals == NULL) /* Not thing=data case */
2666     {
2667     if (strcmp(arg, op->long_name) == 0) break;
2668     }
2669     else /* Special case xxx=data */
2670     {
2671 ph10 530 int oplen = (int)(equals - op->long_name);
2672 ph10 535 int arglen = (argequals == NULL)?
2673 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2674 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2675     {
2676     option_data = arg + arglen;
2677     if (*option_data == '=')
2678     {
2679     option_data++;
2680     longopwasequals = TRUE;
2681     }
2682     break;
2683     }
2684     }
2685 nigel 53 }
2686 ph10 461
2687 ph10 422 /* Handle options with an alternate spelling of the name */
2688 ph10 461
2689     else
2690 nigel 77 {
2691     char buff1[24];
2692     char buff2[24];
2693 ph10 461
2694 ph10 530 int baselen = (int)(opbra - op->long_name);
2695     int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2696 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
2697 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2698 ph10 461
2699 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
2700 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2701 ph10 461
2702     if (strncmp(arg, buff1, arglen) == 0 ||
2703 ph10 422 strncmp(arg, buff2, arglen) == 0)
2704     {
2705     if (equals != NULL && argequals != NULL)
2706     {
2707 ph10 461 option_data = argequals;
2708 ph10 422 if (*option_data == '=')
2709     {
2710 ph10 461 option_data++;
2711 ph10 422 longopwasequals = TRUE;
2712 ph10 461 }
2713     }
2714 nigel 77 break;
2715 ph10 461 }
2716 nigel 77 }
2717 nigel 53 }
2718 nigel 77
2719 nigel 53 if (op->one_char == 0)
2720     {
2721     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2722 ph10 561 pcregrep_exit(usage(2));
2723 nigel 53 }
2724     }
2725 nigel 49
2726 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2727     are not in the right form for putting in the option table because they use
2728     only one hyphen, yet are more than one character long. By putting them
2729     separately here, they will not get displayed as part of the help() output,
2730     but I don't think Jeffrey will care about that. */
2731    
2732     #ifdef JFRIEDL_DEBUG
2733     else if (strcmp(argv[i], "-pre") == 0) {
2734     jfriedl_prefix = argv[++i];
2735     continue;
2736     } else if (strcmp(argv[i], "-post") == 0) {
2737     jfriedl_postfix = argv[++i];
2738     continue;
2739     } else if (strcmp(argv[i], "-XT") == 0) {
2740     sscanf(argv[++i], "%d", &jfriedl_XT);
2741     continue;
2742     } else if (strcmp(argv[i], "-XR") == 0) {
2743     sscanf(argv[++i], "%d", &jfriedl_XR);
2744     continue;
2745     }
2746     #endif
2747    
2748    
2749 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2750     continue till we hit the last one or one that needs data. */
2751 nigel 53
2752     else
2753     {
2754     char *s = argv[i] + 1;
2755 nigel 77 longop = FALSE;
2756 ph10 1221
2757 nigel 53 while (*s != 0)
2758     {
2759 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2760 ph10 579 {
2761     if (*s == op->one_char) break;
2762 ph10 565 }
2763 nigel 77 if (op->one_char == 0)
2764 nigel 53 {
2765 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2766     *s, argv[i]);
2767 ph10 561 pcregrep_exit(usage(2));
2768 nigel 77 }
2769 ph10 1221
2770 ph10 1039 option_data = s+1;
2771 ph10 1221
2772     /* Break out if this is the last character in the string; it's handled
2773 ph10 1039 below like a single multi-char option. */
2774 ph10 579
2775 ph10 1221 if (*option_data == 0) break;
2776    
2777 ph10 1039 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2778     are used for ones that either have a numerical number or defaults, i.e.
2779     the data is optional. If a digit follows, there is data; if not, carry on
2780 ph10 565 with other single-character options in the same string. */
2781 ph10 579
2782 ph10 1039 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2783 ph10 579 {
2784     if (isdigit((unsigned char)s[1])) break;
2785 nigel 53 }
2786 ph10 1039 else /* Check for an option with data */
2787 ph10 579 {
2788 ph10 1039 if (op->type != OP_NODATA) break;
2789 ph10 579 }
2790    
2791     /* Handle a single-character option with no data, then loop for the
2792 ph10 565 next character in the string. */
2793 ph10 1221
2794 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2795 nigel 49 }
2796     }
2797 ph10 1221
2798 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2799     is NO_DATA, it means that there is no data, and the option might set
2800     something in the PCRE options. */
2801 nigel 77
2802     if (op->type == OP_NODATA)
2803     {
2804 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2805     continue;
2806     }
2807    
2808 ph10 1039 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2809 nigel 87 either has a value or defaults to something. It cannot have data in a
2810 ph10 579 separate item. At the moment, the only such options are "colo(u)r",
2811 ph10 565 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2812 ph10 1221
2813 nigel 87 if (*option_data == 0 &&
2814 ph10 1039 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2815     op->type == OP_OP_NUMBERS))
2816 nigel 87 {
2817     switch (op->one_char)
2818 nigel 77 {
2819 nigel 87 case N_COLOUR:
2820     colour_option = (char *)"auto";
2821     break;
2822 ph10 579
2823 ph10 565 case 'o':
2824 ph10 1039 only_matching_last = add_number(0, only_matching_last);
2825     if (only_matching == NULL) only_matching = only_matching_last;
2826 ph10 579 break;
2827    
2828 nigel 87 #ifdef JFRIEDL_DEBUG
2829     case 'S':
2830     S_arg = 0;
2831     break;
2832     #endif
2833 nigel 77 }
2834 nigel 87 continue;
2835     }
2836 nigel 77
2837 nigel 87 /* Otherwise, find the data string for the option. */
2838    
2839     if (*option_data == 0)
2840     {
2841     if (i >= argc - 1 || longopwasequals)
2842 nigel 77 {
2843 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2844 ph10 561 pcregrep_exit(usage(2));
2845 nigel 87 }
2846     option_data = argv[++i];
2847     }
2848    
2849 ph10 1039 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2850     added to a chain of numbers. */
2851    
2852     if (op->type == OP_OP_NUMBERS)
2853     {
2854     unsigned long int n = decode_number(option_data, op, longop);
2855     omdatastr *omd = (omdatastr *)op->dataptr;
2856     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2857     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2858     }
2859    
2860 ph10 1003 /* If the option type is OP_PATLIST, it's the -e option, or one of the
2861     include/exclude options, which can be called multiple times to create lists
2862     of patterns. */
2863 ph10 975
2864 ph10 1039 else if (op->type == OP_PATLIST)
2865     {
2866     patdatastr *pd = (patdatastr *)op->dataptr;
2867     *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2868     if (*(pd->lastptr) == NULL) goto EXIT2;
2869     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2870     }
2871 ph10 1003
2872     /* If the option type is OP_FILELIST, it's one of the options that names a
2873     file. */
2874    
2875     else if (op->type == OP_FILELIST)
2876 nigel 87 {
2877 ph10 1003 fndatastr *fd = (fndatastr *)op->dataptr;
2878     fn = (fnstr *)malloc(sizeof(fnstr));
2879     if (fn == NULL)
2880 nigel 87 {
2881 ph10 1003 fprintf(stderr, "pcregrep: malloc failed\n");
2882     goto EXIT2;
2883 nigel 87 }
2884 ph10 1003 fn->next = NULL;
2885     fn->name = option_data;
2886     if (*(fd->anchor) == NULL)
2887     *(fd->anchor) = fn;
2888     else
2889     (*(fd->lastptr))->next = fn;
2890     *(fd->lastptr) = fn;
2891 nigel 87 }
2892 ph10 975
2893 ph10 947 /* Handle OP_BINARY_FILES */
2894 ph10 975
2895 ph10 947 else if (op->type == OP_BINFILES)
2896     {
2897     if (strcmp(option_data, "binary") == 0)
2898     binary_files = BIN_BINARY;
2899     else if (strcmp(option_data, "without-match") == 0)
2900     binary_files = BIN_NOMATCH;
2901     else if (strcmp(option_data, "text") == 0)
2902     binary_files = BIN_TEXT;
2903     else
2904     {
2905 ph10 975 fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2906     option_data);
2907 ph10 947 pcregrep_exit(usage(2));
2908 ph10 975 }
2909     }
2910 nigel 87
2911 ph10 1039 /* Otherwise, deal with a single string or numeric data value. */
2912 nigel 87
2913 ph10 584 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2914     op->type != OP_OP_NUMBER)
2915 nigel 87 {
2916     *((char **)op->dataptr) = option_data;
2917     }
2918     else
2919     {
2920 ph10 1039 unsigned long int n = decode_number(option_data, op, longop);
2921     if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2922     else *((int *)op->dataptr) = n;
2923 nigel 77 }
2924 nigel 49 }
2925    
2926 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2927     for -A and -B. */
2928    
2929     if (both_context > 0)
2930     {
2931     if (after_context == 0) after_context = both_context;
2932     if (before_context == 0) before_context = both_context;
2933     }
2934 ph10 286
2935     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2936 ph10 1039 However, all three set show_only_matching because they display, each in their
2937     own way, only the data that has matched. */
2938 nigel 77
2939 ph10 1039 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2940 ph10 286 (file_offsets && line_offsets))
2941 ph10 280 {
2942     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2943     "and/or --line-offsets\n");
2944 ph10 561 pcregrep_exit(usage(2));
2945 ph10 280 }
2946    
2947 ph10 1039 if (only_matching != NULL || file_offsets || line_offsets)
2948     show_only_matching = TRUE;
2949 ph10 286
2950 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2951     LC_ALL environment variable is set, and if so, use it. */
2952 nigel 49
2953 nigel 87 if (locale == NULL)
2954 nigel 53 {
2955 nigel 87 locale = getenv("LC_ALL");
2956     locale_from = "LCC_ALL";
2957 nigel 53 }
2958 nigel 49
2959 nigel 87 if (locale == NULL)
2960     {
2961     locale = getenv("LC_CTYPE");
2962     locale_from = "LC_CTYPE";
2963     }
2964 nigel 49
2965 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2966     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2967    
2968     if (locale != NULL)
2969 nigel 49 {
2970 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2971 nigel 53 {
2972 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2973     locale, locale_from);
2974 nigel 53 return 2;
2975     }
2976 nigel 87 pcretables = pcre_maketables();
2977     }
2978 nigel 77
2979 nigel 87 /* Sort out colouring */
2980    
2981     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2982     {
2983     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2984     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2985     else
2986 nigel 53 {
2987 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2988     colour_option);
2989     return 2;
2990 nigel 77 }
2991 nigel 87 if (do_colour)
2992 nigel 77 {
2993 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2994     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2995     if (cs != NULL) colour_string = cs;
2996 nigel 77 }
2997 nigel 87 }
2998 ph10 535
2999 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
3000    
3001     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
3002     {
3003     pcre_options |= PCRE_NEWLINE_CR;
3004 nigel 93 endlinetype = EL_CR;
3005 nigel 91 }
3006     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
3007     {
3008     pcre_options |= PCRE_NEWLINE_LF;
3009 nigel 93 endlinetype = EL_LF;
3010 nigel 91 }
3011     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
3012     {
3013     pcre_options |= PCRE_NEWLINE_CRLF;
3014 nigel 93 endlinetype = EL_CRLF;
3015 nigel 91 }
3016 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
3017     {
3018     pcre_options |= PCRE_NEWLINE_ANY;
3019     endlinetype = EL_ANY;
3020     }
3021 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
3022     {
3023     pcre_options |= PCRE_NEWLINE_ANYCRLF;
3024     endlinetype = EL_ANYCRLF;
3025     }
3026 nigel 91 else
3027     {
3028     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
3029     return 2;
3030     }
3031    
3032 nigel 87 /* Interpret the text values for -d and -D */
3033    
3034     if (dee_option != NULL)
3035     {
3036     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
3037     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
3038     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
3039     else
3040 nigel 77 {
3041 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
3042     return 2;
3043 nigel 53 }
3044 nigel 49 }
3045    
3046 nigel 87 if (DEE_option != NULL)
3047     {
3048     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
3049     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
3050     else
3051     {
3052     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
3053     return 2;
3054     }
3055     }
3056 nigel 49
3057 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
3058 nigel 87
3059     #ifdef JFRIEDL_DEBUG
3060     if (S_arg > 9)
3061 nigel 49 {
3062 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
3063     return 2;
3064     }
3065 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
3066     {
3067     if (jfriedl_XT == 0) jfriedl_XT = 1;
3068     if (jfriedl_XR == 0) jfriedl_XR = 1;
3069     }
3070 nigel 87 #endif
3071 nigel 77
3072 ph10 1003 /* Get memory for the main buffer. */
3073 nigel 87
3074 ph10 644 bufsize = 3*bufthird;
3075     main_buffer = (char *)malloc(bufsize);
3076 nigel 87
3077 ph10 1003 if (main_buffer == NULL)
3078 nigel 87 {
3079     fprintf(stderr, "pcregrep: malloc failed\n");
3080 ph10 123 goto EXIT2;
3081 nigel 87 }
3082    
3083 ph10 1003 /* If no patterns were provided by -e, and there are no files provided by -f,
3084 nigel 87 the first argument is the one and only pattern, and it must exist. */
3085    
3086 ph10 1003 if (patterns == NULL && pattern_files == NULL)
3087 nigel 87 {
3088 nigel 63 if (i >= argc) return usage(2);
3089 ph10 1003 patterns = patterns_last = add_pattern(argv[i++], NULL);
3090     if (patterns == NULL) goto EXIT2;
3091 nigel 87 }
3092 nigel 77
3093 nigel 87 /* Compile the patterns that were provided on the command line, either by
3094 ph10 1003 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3095     after all the command-line options are read so that we know which PCRE options
3096     to use. When -F is used, compile_pattern() may add another block into the
3097     chain, so we must not access the next pointer till after the compile. */
3098 nigel 87
3099 ph10 1003 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3100 nigel 87 {
3101 ph10 1003 if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3102     (j == 1 && patterns->next == NULL)? 0 : j))
3103 ph10 123 goto EXIT2;
3104 nigel 87 }
3105    
3106 ph10 1003 /* Read and compile the regular expressions that are provided in files. */
3107 nigel 87
3108 ph10 1003 for (fn = pattern_files; fn != NULL; fn = fn->next)
3109 nigel 87 {
3110 ph10 1003 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3111     goto EXIT2;
3112 ph10 1004 }
3113 nigel 87
3114 ph10 1039 /* Study the regular expressions, as we will be running them many times. If an
3115 ph10 1035 extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3116     returned, even if studying produces no data. */
3117 nigel 53
3118 ph10 1035 if (match_limit > 0 || match_limit_recursion > 0)
3119     study_options |= PCRE_STUDY_EXTRA_NEEDED;
3120    
3121     /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3122    
3123 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
3124     if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3125     jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3126 ph10 691 #endif
3127    
3128 ph10 1003 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3129 nigel 53 {
3130 ph10 1003 cp->hint = pcre_study(cp->compiled, study_options, &error);
3131 nigel 53 if (error != NULL)
3132     {
3133     char s[16];
3134 ph10 1003 if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3135 nigel 53 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3136 ph10 121 goto EXIT2;
3137 nigel 53 }
3138 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
3139 ph10 1003 if (jit_stack != NULL && cp->hint != NULL)
3140     pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3141 ph10 685 #endif
3142 nigel 53 }
3143 ph10 579
3144 ph10 561 /* If --match-limit or --recursion-limit was set, put the value(s) into the
3145 ph10 1039 pcre_extra block for each pattern. There will always be an extra block because
3146 ph10 1035 of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3147 nigel 53
3148 ph10 1035 for (cp = patterns; cp != NULL; cp = cp->next)
3149 ph10 561 {
3150 ph10 1035 if (match_limit > 0)
3151 ph10 561 {
3152 ph10 1035 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3153     cp->hint->match_limit = match_limit;
3154 ph10 561 }
3155 ph10 1039
3156 ph10 1035 if (match_limit_recursion > 0)
3157     {
3158     cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3159     cp->hint->match_limit_recursion = match_limit_recursion;
3160     }
3161 ph10 579 }
3162 ph10 561
3163 ph10 1003 /* If there are include or exclude patterns read from the command line, compile
3164     them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3165     0. */
3166 nigel 77
3167 ph10 1003 for (j = 0; j < 4; j++)
3168 nigel 77 {
3169 ph10 1003 int k;
3170     for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3171 nigel 77 {
3172 ph10 1003 if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3173     (k == 1 && cp->next == NULL)? 0 : k))
3174     goto EXIT2;
3175 nigel 77 }
3176     }
3177    
3178 ph10 1003 /* Read and compile include/exclude patterns from files. */
3179    
3180     for (fn = include_from; fn != NULL; fn = fn->next)
3181 nigel 77 {
3182 ph10 1003 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3183 ph10 121 goto EXIT2;
3184 nigel 77 }
3185    
3186 ph10 1003 for (fn = exclude_from; fn != NULL; fn = fn->next)
3187 ph10 325 {
3188 ph10 1003 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3189 ph10 325 goto EXIT2;
3190     }
3191    
3192 ph10 1003 /* If there are no files that contain lists of files to search, and there are
3193     no file arguments, search stdin, and then exit. */
3194    
3195     if (file_lists == NULL && i >= argc)
3196 ph10 325 {
3197 ph10 1003 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3198     (filenames > FN_DEFAULT)? stdin_name : NULL);
3199     goto EXIT;
3200 ph10 325 }
3201 ph10 975
3202 ph10 1003 /* If any files that contains a list of files to search have been specified,
3203     read them line by line and search the given files. */
3204 ph10 325
3205 ph10 1003 for (fn = file_lists; fn != NULL; fn = fn->next)
3206 ph10 944 {
3207     char buffer[PATBUFSIZE];
3208     FILE *fl;
3209 ph10 1003 if (strcmp(fn->name, "-") == 0) fl = stdin; else
3210 ph10 975 {
3211 ph10 1003 fl = fopen(fn->name, "rb");
3212 ph10 944 if (fl == NULL)
3213     {
3214 ph10 1003 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3215 ph10 944 strerror(errno));
3216     goto EXIT2;
3217 ph10 975 }
3218     }
3219 ph10 944 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3220     {
3221     int frc;
3222     char *end = buffer + (int)strlen(buffer);
3223     while (end > buffer && isspace(end[-1])) end--;
3224 ph10 975 *end = 0;
3225     if (*buffer != 0)
3226     {
3227     frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3228 ph10 944 if (frc > 1) rc = frc;
3229 ph10 975 else if (frc == 0 && rc == 1) rc = 0;
3230     }
3231     }
3232 ph10 1003 if (fl != stdin) fclose(fl);
3233 ph10 975 }
3234 nigel 49
3235 ph10 1003 /* After handling file-list, work through remaining arguments. Pass in the fact
3236     that there is only one argument at top level - this suppresses the file name if
3237     the argument is not a directory and filenames are not otherwise forced. */
3238 ph10 944
3239 ph10 1003 only_one_at_top = i == argc - 1 && file_lists == NULL;
3240 nigel 49
3241     for (; i < argc; i++)
3242     {
3243 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3244     only_one_at_top);
3245 nigel 77 if (frc > 1) rc = frc;
3246     else if (frc == 0 && rc == 1) rc = 0;
3247 nigel 49 }
3248    
3249 ph10 121 EXIT:
3250 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
3251     if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3252     #endif
3253 ph10 1003
3254 ph10 644 if (main_buffer != NULL) free(main_buffer);
3255 ph10 1003
3256     free_pattern_chain(patterns);
3257     free_pattern_chain(include_patterns);
3258     free_pattern_chain(include_dir_patterns);
3259     free_pattern_chain(exclude_patterns);
3260     free_pattern_chain(exclude_dir_patterns);
3261    
3262     free_file_chain(exclude_from);
3263     free_file_chain(include_from);
3264     free_file_chain(pattern_files);
3265     free_file_chain(file_lists);
3266    
3267 ph10 1039 while (only_matching != NULL)
3268     {
3269     omstr *this = only_matching;
3270     only_matching = this->next;
3271     free(this);
3272     }
3273    
3274 ph10 561 pcregrep_exit(rc);
3275 ph10 121
3276     EXIT2:
3277     rc = 2;
3278     goto EXIT;
3279 nigel 49 }
3280    
3281 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12