/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1502 - (hide annotations) (download)
Mon Sep 15 13:56:18 2014 UTC (2 months, 1 week ago) by ph10
File MIME type: text/plain
File size: 95130 byte(s)
Files tidied for 8.36-RC1.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 ph10 1354 its pattern matching. On Unix-like, Windows, and native z/OS systems it can
7 ph10 1404 recurse into directories, and in z/OS it can handle PDS files.
8 nigel 49
9 ph10 1354 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
10     additional header is required. That header is not included in the main PCRE
11     distribution because other apparatus is needed to compile pcregrep for z/OS.
12     The header can be found in the special z/OS distribution, which is available
13     from www.zaconsultants.net or from www.cbttape.org.
14 nigel 75
15 ph10 1467 Copyright (c) 1997-2014 University of Cambridge
16 ph10 1354
17 nigel 75 -----------------------------------------------------------------------------
18     Redistribution and use in source and binary forms, with or without
19     modification, are permitted provided that the following conditions are met:
20    
21     * Redistributions of source code must retain the above copyright notice,
22     this list of conditions and the following disclaimer.
23    
24     * Redistributions in binary form must reproduce the above copyright
25     notice, this list of conditions and the following disclaimer in the
26     documentation and/or other materials provided with the distribution.
27    
28     * Neither the name of the University of Cambridge nor the names of its
29     contributors may be used to endorse or promote products derived from
30     this software without specific prior written permission.
31    
32     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
36     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
42     POSSIBILITY OF SUCH DAMAGE.
43     -----------------------------------------------------------------------------
44     */
45    
46 ph10 97 #ifdef HAVE_CONFIG_H
47 ph10 236 #include "config.h"
48 ph10 97 #endif
49    
50 nigel 53 #include <ctype.h>
51 nigel 87 #include <locale.h>
52 nigel 49 #include <stdio.h>
53     #include <string.h>
54     #include <stdlib.h>
55     #include <errno.h>
56 nigel 77
57     #include <sys/types.h>
58     #include <sys/stat.h>
59 ph10 199
60 ph10 137 #ifdef HAVE_UNISTD_H
61 ph10 199 #include <unistd.h>
62 ph10 137 #endif
63 nigel 77
64 ph10 286 #ifdef SUPPORT_LIBZ
65     #include <zlib.h>
66     #endif
67    
68     #ifdef SUPPORT_LIBBZ2
69     #include <bzlib.h>
70     #endif
71    
72 ph10 236 #include "pcre.h"
73 nigel 49
74     #define FALSE 0
75     #define TRUE 1
76    
77     typedef int BOOL;
78    
79 ph10 378 #define OFFSET_SIZE 99
80 nigel 49
81 nigel 77 #if BUFSIZ > 8192
82 ph10 1003 #define MAXPATLEN BUFSIZ
83 nigel 77 #else
84 ph10 1003 #define MAXPATLEN 8192
85 nigel 77 #endif
86 nigel 49
87 ph10 1003 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
88    
89 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
90     output. The order is important; it is assumed that a file name is wanted for
91     all values greater than FN_DEFAULT. */
92 nigel 77
93 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
94 nigel 87
95 ph10 286 /* File reading styles */
96    
97     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
98    
99 nigel 87 /* Actions for the -d and -D options */
100    
101     enum { dee_READ, dee_SKIP, dee_RECURSE };
102     enum { DEE_READ, DEE_SKIP };
103    
104     /* Actions for special processing options (flag bits) */
105    
106     #define PO_WORD_MATCH 0x0001
107     #define PO_LINE_MATCH 0x0002
108     #define PO_FIXED_STRINGS 0x0004
109    
110 nigel 93 /* Line ending types */
111 nigel 87
112 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
113 nigel 87
114 ph10 947 /* Binary file options */
115    
116     enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
117    
118 ph10 535 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
119     environments), a warning is issued if the value of fwrite() is ignored.
120     Unfortunately, casting to (void) does not suppress the warning. To get round
121     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
122 ph10 515 apply to fprintf(). */
123 nigel 93
124 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
125 nigel 93
126 ph10 515
127    
128 nigel 49 /*************************************************
129     * Global variables *
130     *************************************************/
131    
132 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
133     regular code. */
134    
135     #ifdef JFRIEDL_DEBUG
136     static int S_arg = -1;
137 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
138     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
139     static const char *jfriedl_prefix = "";
140     static const char *jfriedl_postfix = "";
141 nigel 87 #endif
142    
143 nigel 93 static int endlinetype;
144 nigel 91
145 nigel 87 static char *colour_string = (char *)"1;31";
146     static char *colour_option = NULL;
147     static char *dee_option = NULL;
148     static char *DEE_option = NULL;
149 ph10 1003 static char *locale = NULL;
150 ph10 644 static char *main_buffer = NULL;
151 nigel 91 static char *newline = NULL;
152 ph10 1039 static char *om_separator = (char *)"";
153 nigel 77 static char *stdin_name = (char *)"(standard input)";
154 nigel 87
155     static const unsigned char *pcretables = NULL;
156    
157 nigel 77 static int after_context = 0;
158     static int before_context = 0;
159 ph10 947 static int binary_files = BIN_BINARY;
160 nigel 77 static int both_context = 0;
161 ph10 644 static int bufthird = PCREGREP_BUFSIZE;
162     static int bufsize = 3*PCREGREP_BUFSIZE;
163 ph10 1003
164     #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
165     static int dee_action = dee_SKIP;
166     #else
167 nigel 87 static int dee_action = dee_READ;
168 ph10 1003 #endif
169    
170 nigel 87 static int DEE_action = DEE_READ;
171     static int error_count = 0;
172     static int filenames = FN_DEFAULT;
173 ph10 1003 static int pcre_options = 0;
174 nigel 87 static int process_options = 0;
175 ph10 685
176     #ifdef SUPPORT_PCREGREP_JIT
177     static int study_options = PCRE_STUDY_JIT_COMPILE;
178     #else
179 ph10 667 static int study_options = 0;
180 ph10 685 #endif
181 nigel 77
182 ph10 561 static unsigned long int match_limit = 0;
183     static unsigned long int match_limit_recursion = 0;
184    
185 nigel 49 static BOOL count_only = FALSE;
186 nigel 87 static BOOL do_colour = FALSE;
187 ph10 280 static BOOL file_offsets = FALSE;
188 nigel 77 static BOOL hyphenpending = FALSE;
189 nigel 49 static BOOL invert = FALSE;
190 ph10 519 static BOOL line_buffered = FALSE;
191 ph10 280 static BOOL line_offsets = FALSE;
192 nigel 77 static BOOL multiline = FALSE;
193 nigel 49 static BOOL number = FALSE;
194 ph10 420 static BOOL omit_zero_count = FALSE;
195 ph10 561 static BOOL resource_error = FALSE;
196 nigel 77 static BOOL quiet = FALSE;
197 ph10 1039 static BOOL show_only_matching = FALSE;
198 nigel 49 static BOOL silent = FALSE;
199 nigel 93 static BOOL utf8 = FALSE;
200 nigel 49
201 ph10 1039 /* Structure for list of --only-matching capturing numbers. */
202    
203     typedef struct omstr {
204     struct omstr *next;
205     int groupnum;
206     } omstr;
207    
208     static omstr *only_matching = NULL;
209     static omstr *only_matching_last = NULL;
210    
211     /* Structure for holding the two variables that describe a number chain. */
212    
213     typedef struct omdatastr {
214     omstr **anchor;
215     omstr **lastptr;
216     } omdatastr;
217    
218     static omdatastr only_matching_data = { &only_matching, &only_matching_last };
219    
220 ph10 1003 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
221    
222     typedef struct fnstr {
223     struct fnstr *next;
224     char *name;
225     } fnstr;
226    
227     static fnstr *exclude_from = NULL;
228     static fnstr *exclude_from_last = NULL;
229     static fnstr *include_from = NULL;
230     static fnstr *include_from_last = NULL;
231    
232     static fnstr *file_lists = NULL;
233     static fnstr *file_lists_last = NULL;
234     static fnstr *pattern_files = NULL;
235     static fnstr *pattern_files_last = NULL;
236    
237     /* Structure for holding the two variables that describe a file name chain. */
238    
239     typedef struct fndatastr {
240     fnstr **anchor;
241     fnstr **lastptr;
242     } fndatastr;
243    
244     static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
245     static fndatastr include_from_data = { &include_from, &include_from_last };
246     static fndatastr file_lists_data = { &file_lists, &file_lists_last };
247     static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
248    
249     /* Structure for pattern and its compiled form; used for matching patterns and
250     also for include/exclude patterns. */
251    
252     typedef struct patstr {
253     struct patstr *next;
254     char *string;
255     pcre *compiled;
256     pcre_extra *hint;
257     } patstr;
258    
259     static patstr *patterns = NULL;
260     static patstr *patterns_last = NULL;
261     static patstr *include_patterns = NULL;
262     static patstr *include_patterns_last = NULL;
263     static patstr *exclude_patterns = NULL;
264     static patstr *exclude_patterns_last = NULL;
265     static patstr *include_dir_patterns = NULL;
266     static patstr *include_dir_patterns_last = NULL;
267     static patstr *exclude_dir_patterns = NULL;
268     static patstr *exclude_dir_patterns_last = NULL;
269    
270     /* Structure holding the two variables that describe a pattern chain. A pointer
271     to such structures is used for each appropriate option. */
272    
273     typedef struct patdatastr {
274     patstr **anchor;
275     patstr **lastptr;
276     } patdatastr;
277    
278     static patdatastr match_patdata = { &patterns, &patterns_last };
279     static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
280     static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
281     static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
282     static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
283    
284     static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
285     &include_dir_patterns, &exclude_dir_patterns };
286    
287     static const char *incexname[4] = { "--include", "--exclude",
288     "--include-dir", "--exclude-dir" };
289    
290 nigel 53 /* Structure for options and list of them */
291 nigel 49
292 ph10 584 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
293 ph10 1039 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
294 nigel 77
295 nigel 53 typedef struct option_item {
296 nigel 77 int type;
297 nigel 53 int one_char;
298 nigel 77 void *dataptr;
299 nigel 67 const char *long_name;
300     const char *help_text;
301 nigel 53 } option_item;
302 nigel 49
303 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
304     used to identify them. */
305    
306 ph10 325 #define N_COLOUR (-1)
307     #define N_EXCLUDE (-2)
308     #define N_EXCLUDE_DIR (-3)
309     #define N_HELP (-4)
310     #define N_INCLUDE (-5)
311     #define N_INCLUDE_DIR (-6)
312     #define N_LABEL (-7)
313     #define N_LOCALE (-8)
314     #define N_NULL (-9)
315     #define N_LOFFSETS (-10)
316     #define N_FOFFSETS (-11)
317 ph10 519 #define N_LBUFFER (-12)
318 ph10 561 #define N_M_LIMIT (-13)
319     #define N_M_LIMIT_REC (-14)
320 ph10 644 #define N_BUFSIZE (-15)
321 ph10 685 #define N_NOJIT (-16)
322 ph10 944 #define N_FILE_LIST (-17)
323 ph10 947 #define N_BINARY_FILES (-18)
324 ph10 1003 #define N_EXCLUDE_FROM (-19)
325     #define N_INCLUDE_FROM (-20)
326 ph10 1039 #define N_OM_SEPARATOR (-21)
327 nigel 87
328 nigel 53 static option_item optionlist[] = {
329 ph10 947 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
330 ph10 584 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
331     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
332 ph10 947 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
333 ph10 584 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
334 ph10 947 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
335 ph10 644 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
336 ph10 584 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
337     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
338     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
339     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
340     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
341     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
342 ph10 1003 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
343 ph10 584 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
344 ph10 1003 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
345     { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
346 ph10 584 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
347     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
348     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
349 ph10 947 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
350 ph10 584 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
351 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
352     { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
353     #else
354     { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
355     #endif
356 ph10 584 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
357     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
358     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
359     { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
360     { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
361     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
362     { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
363     { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
364     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
365     { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
366     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
367 ph10 1039 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
368     { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
369 ph10 584 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
370     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
371 ph10 1003 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
372     { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
373     { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
374     { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
375     { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
376     { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
377 ph10 571
378     /* These two were accidentally implemented with underscores instead of
379     hyphens in the option names. As this was not discovered for several releases,
380     the incorrect versions are left in the table for compatibility. However, the
381     --help function misses out any option that has an underscore in its name. */
382 ph10 579
383 ph10 1003 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
384     { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
385 ph10 571
386 nigel 87 #ifdef JFRIEDL_DEBUG
387     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
388     #endif
389     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
390     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
391     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
392     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
393     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
394     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
395     { OP_NODATA, 0, NULL, NULL, NULL }
396 nigel 53 };
397    
398 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
399     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
400     that the combination of -w and -x has the same effect as -x on its own, so we
401 ph10 1003 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
402     prefix+suffix is 10 characters; if anything longer is added, it must be
403     adjusted. */
404 nigel 53
405 nigel 87 static const char *prefix[] = {
406     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
407    
408     static const char *suffix[] = {
409     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
410    
411 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
412 nigel 87
413 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
414 nigel 87
415 nigel 93 const char utf8_table4[] = {
416     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
417     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
418     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
419     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
420    
421    
422    
423 nigel 53 /*************************************************
424 ph10 1039 * Exit from the program *
425     *************************************************/
426    
427     /* If there has been a resource error, give a suitable message.
428    
429     Argument: the return code
430     Returns: does not return
431     */
432    
433     static void
434     pcregrep_exit(int rc)
435     {
436     if (resource_error)
437     {
438     fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
439     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
440     PCRE_ERROR_JIT_STACKLIMIT);
441     fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
442     }
443     exit(rc);
444     }
445    
446    
447     /*************************************************
448 ph10 1003 * Add item to chain of patterns *
449     *************************************************/
450    
451     /* Used to add an item onto a chain, or just return an unconnected item if the
452     "after" argument is NULL.
453    
454     Arguments:
455     s pattern string to add
456     after if not NULL points to item to insert after
457    
458 ph10 1492 Returns: new pattern block or NULL on error
459 ph10 1003 */
460    
461     static patstr *
462     add_pattern(char *s, patstr *after)
463     {
464     patstr *p = (patstr *)malloc(sizeof(patstr));
465     if (p == NULL)
466     {
467     fprintf(stderr, "pcregrep: malloc failed\n");
468 ph10 1039 pcregrep_exit(2);
469 ph10 1003 }
470     if (strlen(s) > MAXPATLEN)
471     {
472     fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
473     MAXPATLEN);
474 ph10 1502 free(p);
475 ph10 1003 return NULL;
476     }
477     p->next = NULL;
478     p->string = s;
479     p->compiled = NULL;
480     p->hint = NULL;
481    
482     if (after != NULL)
483     {
484     p->next = after->next;
485     after->next = p;
486     }
487     return p;
488     }
489    
490    
491     /*************************************************
492     * Free chain of patterns *
493     *************************************************/
494    
495     /* Used for several chains of patterns.
496    
497     Argument: pointer to start of chain
498     Returns: nothing
499     */
500    
501     static void
502     free_pattern_chain(patstr *pc)
503     {
504     while (pc != NULL)
505     {
506     patstr *p = pc;
507     pc = p->next;
508     if (p->hint != NULL) pcre_free_study(p->hint);
509     if (p->compiled != NULL) pcre_free(p->compiled);
510     free(p);
511     }
512     }
513    
514    
515     /*************************************************
516     * Free chain of file names *
517     *************************************************/
518    
519     /*
520     Argument: pointer to start of chain
521     Returns: nothing
522     */
523    
524     static void
525     free_file_chain(fnstr *fn)
526     {
527     while (fn != NULL)
528     {
529     fnstr *f = fn;
530     fn = f->next;
531     free(f);
532     }
533     }
534    
535    
536     /*************************************************
537 nigel 87 * OS-specific functions *
538 nigel 53 *************************************************/
539    
540 ph10 1354 /* These functions are defined so that they can be made system specific.
541     At present there are versions for Unix-style environments, Windows, native
542     z/OS, and "no support". */
543 nigel 53
544    
545 ph10 1354 /************* Directory scanning Unix-style and z/OS ***********/
546 nigel 53
547 ph10 1354 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
548 nigel 53 #include <sys/types.h>
549     #include <sys/stat.h>
550     #include <dirent.h>
551    
552 ph10 1354 #if defined NATIVE_ZOS
553     /************* Directory and PDS/E scanning for z/OS ***********/
554     /************* z/OS looks mostly like Unix with USS ************/
555     /* However, z/OS needs the #include statements in this header */
556     #include "pcrzosfs.h"
557     /* That header is not included in the main PCRE distribution because
558     other apparatus is needed to compile pcregrep for z/OS. The header
559     can be found in the special z/OS distribution, which is available
560     from www.zaconsultants.net or from www.cbttape.org. */
561     #endif
562    
563 nigel 53 typedef DIR directory_type;
564 ph10 1003 #define FILESEP '/'
565 nigel 53
566 nigel 67 static int
567 nigel 53 isdirectory(char *filename)
568     {
569     struct stat statbuf;
570     if (stat(filename, &statbuf) < 0)
571     return 0; /* In the expectation that opening as a file will fail */
572 ph10 1003 return (statbuf.st_mode & S_IFMT) == S_IFDIR;
573 nigel 53 }
574    
575 nigel 67 static directory_type *
576 nigel 53 opendirectory(char *filename)
577     {
578     return opendir(filename);
579     }
580    
581 nigel 67 static char *
582 nigel 53 readdirectory(directory_type *dir)
583     {
584     for (;;)
585     {
586     struct dirent *dent = readdir(dir);
587     if (dent == NULL) return NULL;
588     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
589     return dent->d_name;
590     }
591 ph10 151 /* Control never reaches here */
592 nigel 53 }
593    
594 nigel 67 static void
595 nigel 53 closedirectory(directory_type *dir)
596     {
597     closedir(dir);
598     }
599    
600    
601 ph10 1354 /************* Test for regular file, Unix-style **********/
602 nigel 87
603     static int
604     isregfile(char *filename)
605     {
606     struct stat statbuf;
607     if (stat(filename, &statbuf) < 0)
608     return 1; /* In the expectation that opening as a file will fail */
609     return (statbuf.st_mode & S_IFMT) == S_IFREG;
610     }
611    
612    
613 ph10 1354 #if defined NATIVE_ZOS
614     /************* Test for a terminal in z/OS **********/
615     /* isatty() does not work in a TSO environment, so always give FALSE.*/
616 nigel 87
617     static BOOL
618     is_stdout_tty(void)
619     {
620 ph10 1354 return FALSE;
621     }
622    
623     static BOOL
624     is_file_tty(FILE *f)
625     {
626     return FALSE;
627     }
628    
629    
630     /************* Test for a terminal, Unix-style **********/
631    
632     #else
633     static BOOL
634     is_stdout_tty(void)
635     {
636 nigel 87 return isatty(fileno(stdout));
637     }
638    
639 ph10 519 static BOOL
640     is_file_tty(FILE *f)
641     {
642     return isatty(fileno(f));
643     }
644 ph10 1354 #endif
645 nigel 87
646 ph10 1354 /* End of Unix-style or native z/OS environment functions. */
647 ph10 519
648 nigel 53
649 ph10 1354 /************* Directory scanning in Windows ***********/
650    
651 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
652 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
653 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
654     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
655 ph10 558 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
656     undefined when it is indeed undefined. */
657 nigel 53
658 ph10 558 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
659 nigel 63
660     #ifndef STRICT
661     # define STRICT
662     #endif
663     #ifndef WIN32_LEAN_AND_MEAN
664     # define WIN32_LEAN_AND_MEAN
665     #endif
666 ph10 283
667     #include <windows.h>
668    
669 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
670     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
671     #endif
672    
673 nigel 63 typedef struct directory_type
674     {
675     HANDLE handle;
676     BOOL first;
677     WIN32_FIND_DATA data;
678     } directory_type;
679    
680 ph10 1004 #define FILESEP '/'
681 ph10 1003
682 nigel 63 int
683     isdirectory(char *filename)
684     {
685     DWORD attr = GetFileAttributes(filename);
686     if (attr == INVALID_FILE_ATTRIBUTES)
687     return 0;
688 ph10 1003 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
689 nigel 63 }
690    
691     directory_type *
692     opendirectory(char *filename)
693     {
694     size_t len;
695     char *pattern;
696     directory_type *dir;
697     DWORD err;
698     len = strlen(filename);
699 ph10 1003 pattern = (char *)malloc(len + 3);
700     dir = (directory_type *)malloc(sizeof(*dir));
701 nigel 63 if ((pattern == NULL) || (dir == NULL))
702     {
703     fprintf(stderr, "pcregrep: malloc failed\n");
704 ph10 561 pcregrep_exit(2);
705 nigel 63 }
706     memcpy(pattern, filename, len);
707     memcpy(&(pattern[len]), "\\*", 3);
708     dir->handle = FindFirstFile(pattern, &(dir->data));
709     if (dir->handle != INVALID_HANDLE_VALUE)
710     {
711     free(pattern);
712     dir->first = TRUE;
713     return dir;
714     }
715     err = GetLastError();
716     free(pattern);
717     free(dir);
718     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
719     return NULL;
720     }
721    
722     char *
723     readdirectory(directory_type *dir)
724     {
725     for (;;)
726     {
727     if (!dir->first)
728     {
729     if (!FindNextFile(dir->handle, &(dir->data)))
730     return NULL;
731     }
732     else
733     {
734     dir->first = FALSE;
735     }
736     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
737     return dir->data.cFileName;
738     }
739     #ifndef _MSC_VER
740     return NULL; /* Keep compiler happy; never executed */
741     #endif
742     }
743    
744     void
745     closedirectory(directory_type *dir)
746     {
747     FindClose(dir->handle);
748     free(dir);
749     }
750    
751    
752 ph10 1354 /************* Test for regular file in Windows **********/
753 nigel 87
754     /* I don't know how to do this, or if it can be done; assume all paths are
755     regular if they are not directories. */
756    
757     int isregfile(char *filename)
758     {
759 ph10 283 return !isdirectory(filename);
760 nigel 87 }
761    
762    
763 ph10 1354 /************* Test for a terminal in Windows **********/
764 nigel 87
765     /* I don't know how to do this; assume never */
766    
767     static BOOL
768     is_stdout_tty(void)
769     {
770 ph10 283 return FALSE;
771 nigel 87 }
772    
773 ph10 519 static BOOL
774     is_file_tty(FILE *f)
775     {
776     return FALSE;
777     }
778 nigel 87
779 ph10 1354 /* End of Windows functions */
780 ph10 519
781 ph10 1354
782 nigel 53 /************* Directory scanning when we can't do it ***********/
783    
784     /* The type is void, and apart from isdirectory(), the functions do nothing. */
785    
786 nigel 63 #else
787    
788 ph10 1005 #define FILESEP 0
789 nigel 53 typedef void directory_type;
790    
791 nigel 87 int isdirectory(char *filename) { return 0; }
792 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
793     char *readdirectory(directory_type *dir) { return (char*)0;}
794 nigel 53 void closedirectory(directory_type *dir) {}
795    
796 nigel 87
797 ph10 1354 /************* Test for regular file when we can't do it **********/
798 nigel 87
799     /* Assume all files are regular. */
800    
801     int isregfile(char *filename) { return 1; }
802    
803    
804 ph10 519 /************* Test for a terminal when we can't do it **********/
805 nigel 87
806     static BOOL
807     is_stdout_tty(void)
808     {
809     return FALSE;
810     }
811    
812 ph10 519 static BOOL
813     is_file_tty(FILE *f)
814     {
815     return FALSE;
816     }
817 nigel 87
818 ph10 1354 #endif /* End of system-specific functions */
819 nigel 53
820    
821    
822 ph10 137 #ifndef HAVE_STRERROR
823 nigel 49 /*************************************************
824     * Provide strerror() for non-ANSI libraries *
825     *************************************************/
826    
827     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
828     in their libraries, but can provide the same facility by this simple
829     alternative function. */
830    
831     extern int sys_nerr;
832     extern char *sys_errlist[];
833    
834     char *
835     strerror(int n)
836     {
837     if (n < 0 || n >= sys_nerr) return "unknown error number";
838     return sys_errlist[n];
839     }
840     #endif /* HAVE_STRERROR */
841    
842    
843    
844     /*************************************************
845 ph10 1039 * Usage function *
846     *************************************************/
847    
848     static int
849     usage(int rc)
850     {
851     option_item *op;
852     fprintf(stderr, "Usage: pcregrep [-");
853     for (op = optionlist; op->one_char != 0; op++)
854     {
855     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
856     }
857     fprintf(stderr, "] [long options] [pattern] [files]\n");
858     fprintf(stderr, "Type `pcregrep --help' for more information and the long "
859     "options.\n");
860     return rc;
861     }
862    
863    
864    
865     /*************************************************
866     * Help function *
867     *************************************************/
868    
869     static void
870     help(void)
871     {
872     option_item *op;
873    
874     printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
875     printf("Search for PATTERN in each FILE or standard input.\n");
876     printf("PATTERN must be present if neither -e nor -f is used.\n");
877     printf("\"-\" can be used as a file name to mean STDIN.\n");
878    
879     #ifdef SUPPORT_LIBZ
880     printf("Files whose names end in .gz are read using zlib.\n");
881     #endif
882    
883     #ifdef SUPPORT_LIBBZ2
884     printf("Files whose names end in .bz2 are read using bzlib2.\n");
885     #endif
886    
887     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
888     printf("Other files and the standard input are read as plain files.\n\n");
889     #else
890     printf("All files are read as plain files, without any interpretation.\n\n");
891     #endif
892    
893     printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
894     printf("Options:\n");
895    
896     for (op = optionlist; op->one_char != 0; op++)
897     {
898     int n;
899     char s[4];
900    
901     /* Two options were accidentally implemented and documented with underscores
902     instead of hyphens in their names, something that was not noticed for quite a
903     few releases. When fixing this, I left the underscored versions in the list
904     in case people were using them. However, we don't want to display them in the
905     help data. There are no other options that contain underscores, and we do not
906     expect ever to implement such options. Therefore, just omit any option that
907     contains an underscore. */
908    
909     if (strchr(op->long_name, '_') != NULL) continue;
910    
911     if (op->one_char > 0 && (op->long_name)[0] == 0)
912     n = 31 - printf(" -%c", op->one_char);
913     else
914     {
915     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
916     else strcpy(s, " ");
917     n = 31 - printf(" %s --%s", s, op->long_name);
918     }
919    
920     if (n < 1) n = 1;
921     printf("%.*s%s\n", n, " ", op->help_text);
922     }
923    
924     printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
925     printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
926     printf("When reading patterns or file names from a file, trailing white\n");
927     printf("space is removed and blank lines are ignored.\n");
928     printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
929    
930     printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
931     printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
932     }
933    
934    
935    
936     /*************************************************
937 ph10 1003 * Test exclude/includes *
938     *************************************************/
939    
940     /* If any exclude pattern matches, the path is excluded. Otherwise, unless
941     there are no includes, the path must match an include pattern.
942    
943     Arguments:
944     path the path to be matched
945     ip the chain of include patterns
946     ep the chain of exclude patterns
947    
948     Returns: TRUE if the path is not excluded
949     */
950    
951     static BOOL
952     test_incexc(char *path, patstr *ip, patstr *ep)
953     {
954     int plen = strlen(path);
955    
956     for (; ep != NULL; ep = ep->next)
957     {
958     if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
959     return FALSE;
960     }
961    
962     if (ip == NULL) return TRUE;
963    
964     for (; ip != NULL; ip = ip->next)
965     {
966     if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
967     return TRUE;
968     }
969    
970     return FALSE;
971     }
972    
973    
974    
975     /*************************************************
976 ph10 1039 * Decode integer argument value *
977     *************************************************/
978    
979     /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
980     because SunOS4 doesn't have it. This is used only for unpicking arguments, so
981     just keep it simple.
982    
983     Arguments:
984     option_data the option data string
985     op the option item (for error messages)
986     longop TRUE if option given in long form
987    
988     Returns: a long integer
989     */
990    
991     static long int
992     decode_number(char *option_data, option_item *op, BOOL longop)
993     {
994     unsigned long int n = 0;
995     char *endptr = option_data;
996     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
997     while (isdigit((unsigned char)(*endptr)))
998     n = n * 10 + (int)(*endptr++ - '0');
999     if (toupper(*endptr) == 'K')
1000     {
1001     n *= 1024;
1002     endptr++;
1003     }
1004     else if (toupper(*endptr) == 'M')
1005     {
1006     n *= 1024*1024;
1007     endptr++;
1008     }
1009    
1010     if (*endptr != 0) /* Error */
1011     {
1012     if (longop)
1013     {
1014     char *equals = strchr(op->long_name, '=');
1015     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1016     (int)(equals - op->long_name);
1017     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1018     option_data, nlen, op->long_name);
1019     }
1020     else
1021     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1022     option_data, op->one_char);
1023     pcregrep_exit(usage(2));
1024     }
1025    
1026     return n;
1027     }
1028    
1029    
1030    
1031     /*************************************************
1032     * Add item to a chain of numbers *
1033     *************************************************/
1034    
1035     /* Used to add an item onto a chain, or just return an unconnected item if the
1036     "after" argument is NULL.
1037    
1038     Arguments:
1039     n the number to add
1040     after if not NULL points to item to insert after
1041    
1042     Returns: new number block
1043     */
1044    
1045     static omstr *
1046     add_number(int n, omstr *after)
1047     {
1048     omstr *om = (omstr *)malloc(sizeof(omstr));
1049    
1050     if (om == NULL)
1051     {
1052     fprintf(stderr, "pcregrep: malloc failed\n");
1053     pcregrep_exit(2);
1054     }
1055     om->next = NULL;
1056     om->groupnum = n;
1057    
1058     if (after != NULL)
1059     {
1060     om->next = after->next;
1061     after->next = om;
1062     }
1063     return om;
1064     }
1065    
1066    
1067    
1068     /*************************************************
1069 ph10 519 * Read one line of input *
1070     *************************************************/
1071    
1072 ph10 535 /* Normally, input is read using fread() into a large buffer, so many lines may
1073     be read at once. However, doing this for tty input means that no output appears
1074 ph10 519 until a lot of input has been typed. Instead, tty input is handled line by
1075     line. We cannot use fgets() for this, because it does not stop at a binary
1076 ph10 535 zero, and therefore there is no way of telling how many characters it has read,
1077 ph10 519 because there may be binary zeros embedded in the data.
1078    
1079     Arguments:
1080     buffer the buffer to read into
1081     length the maximum number of characters to read
1082     f the file
1083 ph10 535
1084 ph10 519 Returns: the number of characters read, zero at end of file
1085 ph10 535 */
1086 ph10 519
1087 ph10 904 static unsigned int
1088 ph10 519 read_one_line(char *buffer, int length, FILE *f)
1089     {
1090     int c;
1091     int yield = 0;
1092     while ((c = fgetc(f)) != EOF)
1093     {
1094     buffer[yield++] = c;
1095 ph10 535 if (c == '\n' || yield >= length) break;
1096     }
1097     return yield;
1098 ph10 519 }
1099    
1100    
1101    
1102     /*************************************************
1103 nigel 93 * Find end of line *
1104     *************************************************/
1105    
1106     /* The length of the endline sequence that is found is set via lenptr. This may
1107     be zero at the very end of the file if there is no line-ending sequence there.
1108    
1109     Arguments:
1110     p current position in line
1111     endptr end of available data
1112     lenptr where to put the length of the eol sequence
1113    
1114 ph10 654 Returns: pointer after the last byte of the line,
1115 ph10 644 including the newline byte(s)
1116 nigel 93 */
1117    
1118     static char *
1119     end_of_line(char *p, char *endptr, int *lenptr)
1120     {
1121     switch(endlinetype)
1122     {
1123     default: /* Just in case */
1124     case EL_LF:
1125     while (p < endptr && *p != '\n') p++;
1126     if (p < endptr)
1127     {
1128     *lenptr = 1;
1129     return p + 1;
1130     }
1131     *lenptr = 0;
1132     return endptr;
1133    
1134     case EL_CR:
1135     while (p < endptr && *p != '\r') p++;
1136     if (p < endptr)
1137     {
1138     *lenptr = 1;
1139     return p + 1;
1140     }
1141     *lenptr = 0;
1142     return endptr;
1143    
1144     case EL_CRLF:
1145     for (;;)
1146     {
1147     while (p < endptr && *p != '\r') p++;
1148     if (++p >= endptr)
1149     {
1150     *lenptr = 0;
1151     return endptr;
1152     }
1153     if (*p == '\n')
1154     {
1155     *lenptr = 2;
1156     return p + 1;
1157     }
1158     }
1159     break;
1160    
1161 ph10 149 case EL_ANYCRLF:
1162     while (p < endptr)
1163     {
1164     int extra = 0;
1165     register int c = *((unsigned char *)p);
1166    
1167     if (utf8 && c >= 0xc0)
1168     {
1169     int gcii, gcss;
1170     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1171     gcss = 6*extra;
1172     c = (c & utf8_table3[extra]) << gcss;
1173     for (gcii = 1; gcii <= extra; gcii++)
1174     {
1175     gcss -= 6;
1176     c |= (p[gcii] & 0x3f) << gcss;
1177     }
1178     }
1179    
1180     p += 1 + extra;
1181    
1182     switch (c)
1183     {
1184 ph10 1033 case '\n':
1185 ph10 149 *lenptr = 1;
1186     return p;
1187    
1188 ph10 1033 case '\r':
1189     if (p < endptr && *p == '\n')
1190 ph10 149 {
1191     *lenptr = 2;
1192     p++;
1193     }
1194     else *lenptr = 1;
1195     return p;
1196 ph10 150
1197 ph10 149 default:
1198     break;
1199     }
1200     } /* End of loop for ANYCRLF case */
1201 ph10 150
1202 ph10 149 *lenptr = 0; /* Must have hit the end */
1203     return endptr;
1204    
1205 nigel 93 case EL_ANY:
1206     while (p < endptr)
1207     {
1208     int extra = 0;
1209     register int c = *((unsigned char *)p);
1210    
1211     if (utf8 && c >= 0xc0)
1212     {
1213     int gcii, gcss;
1214     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1215     gcss = 6*extra;
1216     c = (c & utf8_table3[extra]) << gcss;
1217     for (gcii = 1; gcii <= extra; gcii++)
1218     {
1219     gcss -= 6;
1220     c |= (p[gcii] & 0x3f) << gcss;
1221     }
1222     }
1223    
1224     p += 1 + extra;
1225    
1226     switch (c)
1227     {
1228 ph10 1033 case '\n': /* LF */
1229     case '\v': /* VT */
1230     case '\f': /* FF */
1231 nigel 93 *lenptr = 1;
1232     return p;
1233    
1234 ph10 1033 case '\r': /* CR */
1235     if (p < endptr && *p == '\n')
1236 nigel 93 {
1237     *lenptr = 2;
1238     p++;
1239     }
1240     else *lenptr = 1;
1241     return p;
1242    
1243 ph10 1033 #ifndef EBCDIC
1244     case 0x85: /* Unicode NEL */
1245 nigel 93 *lenptr = utf8? 2 : 1;
1246     return p;
1247    
1248 ph10 1033 case 0x2028: /* Unicode LS */
1249     case 0x2029: /* Unicode PS */
1250 nigel 93 *lenptr = 3;
1251     return p;
1252 ph10 1039 #endif /* Not EBCDIC */
1253 nigel 93
1254     default:
1255     break;
1256     }
1257     } /* End of loop for ANY case */
1258    
1259     *lenptr = 0; /* Must have hit the end */
1260     return endptr;
1261     } /* End of overall switch */
1262     }
1263    
1264    
1265    
1266     /*************************************************
1267     * Find start of previous line *
1268     *************************************************/
1269    
1270     /* This is called when looking back for before lines to print.
1271    
1272     Arguments:
1273     p start of the subsequent line
1274     startptr start of available data
1275    
1276     Returns: pointer to the start of the previous line
1277     */
1278    
1279     static char *
1280     previous_line(char *p, char *startptr)
1281     {
1282     switch(endlinetype)
1283     {
1284     default: /* Just in case */
1285     case EL_LF:
1286     p--;
1287     while (p > startptr && p[-1] != '\n') p--;
1288     return p;
1289    
1290     case EL_CR:
1291     p--;
1292     while (p > startptr && p[-1] != '\n') p--;
1293     return p;
1294    
1295     case EL_CRLF:
1296     for (;;)
1297     {
1298     p -= 2;
1299     while (p > startptr && p[-1] != '\n') p--;
1300     if (p <= startptr + 1 || p[-2] == '\r') return p;
1301     }
1302 ph10 1467 /* Control can never get here */
1303 nigel 93
1304     case EL_ANY:
1305 ph10 150 case EL_ANYCRLF:
1306 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1307     if (utf8) while ((*p & 0xc0) == 0x80) p--;
1308    
1309     while (p > startptr)
1310     {
1311 chpe 1096 register unsigned int c;
1312 nigel 93 char *pp = p - 1;
1313    
1314     if (utf8)
1315     {
1316     int extra = 0;
1317     while ((*pp & 0xc0) == 0x80) pp--;
1318     c = *((unsigned char *)pp);
1319     if (c >= 0xc0)
1320     {
1321     int gcii, gcss;
1322     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1323     gcss = 6*extra;
1324     c = (c & utf8_table3[extra]) << gcss;
1325     for (gcii = 1; gcii <= extra; gcii++)
1326     {
1327     gcss -= 6;
1328     c |= (pp[gcii] & 0x3f) << gcss;
1329     }
1330     }
1331     }
1332     else c = *((unsigned char *)pp);
1333    
1334 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
1335 nigel 93 {
1336 ph10 1033 case '\n': /* LF */
1337     case '\r': /* CR */
1338 ph10 149 return p;
1339 ph10 150
1340 ph10 149 default:
1341     break;
1342 ph10 150 }
1343 ph10 149
1344     else switch (c)
1345     {
1346 ph10 1033 case '\n': /* LF */
1347     case '\v': /* VT */
1348     case '\f': /* FF */
1349     case '\r': /* CR */
1350 ph10 1039 #ifndef EBCDIE
1351 ph10 1033 case 0x85: /* Unicode NEL */
1352     case 0x2028: /* Unicode LS */
1353     case 0x2029: /* Unicode PS */
1354 ph10 1039 #endif /* Not EBCDIC */
1355 nigel 93 return p;
1356    
1357     default:
1358     break;
1359     }
1360    
1361     p = pp; /* Back one character */
1362     } /* End of loop for ANY case */
1363    
1364     return startptr; /* Hit start of data */
1365     } /* End of overall switch */
1366     }
1367    
1368    
1369    
1370    
1371    
1372     /*************************************************
1373 nigel 77 * Print the previous "after" lines *
1374 nigel 49 *************************************************/
1375    
1376 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
1377 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
1378     that a binary zero does not terminate it.
1379 nigel 77
1380     Arguments:
1381     lastmatchnumber the number of the last matching line, plus one
1382     lastmatchrestart where we restarted after the last match
1383     endptr end of available data
1384     printname filename for printing
1385    
1386     Returns: nothing
1387     */
1388    
1389 ph10 1003 static void
1390     do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1391     char *printname)
1392 nigel 77 {
1393     if (after_context > 0 && lastmatchnumber > 0)
1394     {
1395     int count = 0;
1396     while (lastmatchrestart < endptr && count++ < after_context)
1397     {
1398 nigel 93 int ellength;
1399 nigel 77 char *pp = lastmatchrestart;
1400     if (printname != NULL) fprintf(stdout, "%s-", printname);
1401     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1402 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1403 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1404 nigel 93 lastmatchrestart = pp;
1405 nigel 77 }
1406     hyphenpending = TRUE;
1407     }
1408     }
1409    
1410    
1411    
1412     /*************************************************
1413 ph10 378 * Apply patterns to subject till one matches *
1414     *************************************************/
1415    
1416 ph10 392 /* This function is called to run through all patterns, looking for a match. It
1417     is used multiple times for the same subject when colouring is enabled, in order
1418 ph10 378 to find all possible matches.
1419    
1420     Arguments:
1421 ph10 632 matchptr the start of the subject
1422     length the length of the subject to match
1423 ph10 1335 options options for pcre_exec
1424 ph10 632 startoffset where to start matching
1425     offsets the offets vector to fill in
1426     mrc address of where to put the result of pcre_exec()
1427 ph10 392
1428     Returns: TRUE if there was a match
1429 ph10 378 FALSE if there was no match
1430     invert if there was a non-fatal error
1431 ph10 392 */
1432 ph10 378
1433     static BOOL
1434 ph10 1335 match_patterns(char *matchptr, size_t length, unsigned int options,
1435 ph10 1324 int startoffset, int *offsets, int *mrc)
1436 ph10 378 {
1437     int i;
1438 ph10 561 size_t slen = length;
1439 ph10 1003 patstr *p = patterns;
1440 ph10 561 const char *msg = "this text:\n\n";
1441 ph10 1003
1442 ph10 561 if (slen > 200)
1443     {
1444     slen = 200;
1445     msg = "text that starts:\n\n";
1446 ph10 579 }
1447 ph10 1003 for (i = 1; p != NULL; p = p->next, i++)
1448 ph10 378 {
1449 ph10 1003 *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1450 ph10 1324 startoffset, options, offsets, OFFSET_SIZE);
1451 ph10 378 if (*mrc >= 0) return TRUE;
1452     if (*mrc == PCRE_ERROR_NOMATCH) continue;
1453 ph10 561 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1454 ph10 1003 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1455 ph10 561 fprintf(stderr, "%s", msg);
1456     FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1457     fprintf(stderr, "\n\n");
1458 ph10 685 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1459     *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1460 ph10 561 resource_error = TRUE;
1461 ph10 378 if (error_count++ > 20)
1462     {
1463 ph10 561 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1464     pcregrep_exit(2);
1465 ph10 378 }
1466     return invert; /* No more matching; don't show the line again */
1467     }
1468    
1469     return FALSE; /* No match, no errors */
1470     }
1471    
1472    
1473    
1474     /*************************************************
1475 nigel 77 * Grep an individual file *
1476     *************************************************/
1477    
1478     /* This is called from grep_or_recurse() below. It uses a buffer that is three
1479 ph10 644 times the value of bufthird. The matching point is never allowed to stray into
1480 nigel 77 the top third of the buffer, thus keeping more of the file available for
1481     context printing or for multiline scanning. For large files, the pointer will
1482     be in the middle third most of the time, so the bottom third is available for
1483     "before" context printing.
1484    
1485     Arguments:
1486 ph10 286 handle the fopened FILE stream for a normal file
1487     the gzFile pointer when reading is via libz
1488     the BZFILE pointer when reading is via libbz2
1489     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1490 ph10 644 filename the file name or NULL (for errors)
1491 nigel 77 printname the file name if it is to be printed for each match
1492     or NULL if the file name is not to be printed
1493     it cannot be NULL if filenames[_nomatch]_only is set
1494    
1495     Returns: 0 if there was at least one match
1496     1 otherwise (no matches)
1497 ph10 654 2 if an overlong line is encountered
1498 ph10 644 3 if there is a read error on a .bz2 file
1499 nigel 77 */
1500    
1501 nigel 49 static int
1502 ph10 644 pcregrep(void *handle, int frtype, char *filename, char *printname)
1503 nigel 49 {
1504     int rc = 1;
1505 nigel 77 int linenumber = 1;
1506     int lastmatchnumber = 0;
1507 nigel 49 int count = 0;
1508 ph10 280 int filepos = 0;
1509 ph10 378 int offsets[OFFSET_SIZE];
1510 nigel 77 char *lastmatchrestart = NULL;
1511 ph10 644 char *ptr = main_buffer;
1512 nigel 77 char *endptr;
1513     size_t bufflength;
1514 ph10 947 BOOL binary = FALSE;
1515 nigel 77 BOOL endhyphenpending = FALSE;
1516 ph10 519 BOOL input_line_buffered = line_buffered;
1517 ph10 286 FILE *in = NULL; /* Ensure initialized */
1518 nigel 49
1519 ph10 286 #ifdef SUPPORT_LIBZ
1520     gzFile ingz = NULL;
1521     #endif
1522 nigel 77
1523 ph10 286 #ifdef SUPPORT_LIBBZ2
1524     BZFILE *inbz2 = NULL;
1525     #endif
1526    
1527    
1528     /* Do the first read into the start of the buffer and set up the pointer to end
1529     of what we have. In the case of libz, a non-zipped .gz file will be read as a
1530     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1531     fail. */
1532    
1533 chpe 1136 (void)frtype;
1534    
1535 ph10 286 #ifdef SUPPORT_LIBZ
1536     if (frtype == FR_LIBZ)
1537     {
1538     ingz = (gzFile)handle;
1539 ph10 644 bufflength = gzread (ingz, main_buffer, bufsize);
1540 ph10 286 }
1541     else
1542     #endif
1543    
1544     #ifdef SUPPORT_LIBBZ2
1545     if (frtype == FR_LIBBZ2)
1546     {
1547     inbz2 = (BZFILE *)handle;
1548 ph10 644 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1549 ph10 286 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1550     } /* without the cast it is unsigned. */
1551     else
1552     #endif
1553    
1554     {
1555     in = (FILE *)handle;
1556 ph10 519 if (is_file_tty(in)) input_line_buffered = TRUE;
1557 ph10 535 bufflength = input_line_buffered?
1558 ph10 644 read_one_line(main_buffer, bufsize, in) :
1559     fread(main_buffer, 1, bufsize, in);
1560 ph10 286 }
1561 ph10 535
1562 ph10 644 endptr = main_buffer + bufflength;
1563 nigel 77
1564 ph10 947 /* Unless binary-files=text, see if we have a binary file. This uses the same
1565 ph10 975 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1566 ph10 947 file. */
1567    
1568     if (binary_files != BIN_TEXT)
1569     {
1570 ph10 975 binary =
1571 ph10 947 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1572     if (binary && binary_files == BIN_NOMATCH) return 1;
1573 ph10 975 }
1574 ph10 947
1575 nigel 77 /* Loop while the current pointer is not at the end of the file. For large
1576     files, endptr will be at the end of the buffer when we are in the middle of the
1577     file, but ptr will never get there, because as soon as it gets over 2/3 of the
1578     way, the buffer is shifted left and re-filled. */
1579    
1580     while (ptr < endptr)
1581 nigel 49 {
1582 ph10 378 int endlinelength;
1583 nigel 87 int mrc = 0;
1584 ph10 654 int startoffset = 0;
1585 ph10 1335 unsigned int options = 0;
1586 ph10 378 BOOL match;
1587 ph10 286 char *matchptr = ptr;
1588 nigel 77 char *t = ptr;
1589     size_t length, linelength;
1590 nigel 49
1591 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
1592     of the subject string to pass to pcre_exec(). In multiline mode, it is the
1593     length remainder of the data in the buffer. Otherwise, it is the length of
1594 ph10 378 the next line, excluding the terminating newline. After matching, we always
1595     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1596     option is used for compiling, so that any match is constrained to be in the
1597     first line. */
1598 nigel 77
1599 nigel 93 t = end_of_line(t, endptr, &endlinelength);
1600     linelength = t - ptr - endlinelength;
1601 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
1602 ph10 654
1603     /* Check to see if the line we are looking at extends right to the very end
1604     of the buffer without a line terminator. This means the line is too long to
1605 ph10 644 handle. */
1606 ph10 654
1607 ph10 644 if (endlinelength == 0 && t == main_buffer + bufsize)
1608     {
1609     fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1610 ph10 646 "pcregrep: check the --buffer-size option\n",
1611 ph10 654 linenumber,
1612 ph10 644 (filename == NULL)? "" : " of file ",
1613     (filename == NULL)? "" : filename);
1614     return 2;
1615 ph10 654 }
1616 nigel 77
1617 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
1618    
1619     #ifdef JFRIEDL_DEBUG
1620     if (jfriedl_XT || jfriedl_XR)
1621     {
1622 zherczeg 1216 # include <sys/time.h>
1623     # include <time.h>
1624 nigel 89 struct timeval start_time, end_time;
1625     struct timezone dummy;
1626 ph10 392 int i;
1627 nigel 89
1628     if (jfriedl_XT)
1629     {
1630     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1631     const char *orig = ptr;
1632     ptr = malloc(newlen + 1);
1633     if (!ptr) {
1634     printf("out of memory");
1635 ph10 561 pcregrep_exit(2);
1636 nigel 89 }
1637     endptr = ptr;
1638     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1639     for (i = 0; i < jfriedl_XT; i++) {
1640     strncpy(endptr, orig, length);
1641     endptr += length;
1642     }
1643     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1644     length = newlen;
1645     }
1646    
1647     if (gettimeofday(&start_time, &dummy) != 0)
1648     perror("bad gettimeofday");
1649    
1650    
1651     for (i = 0; i < jfriedl_XR; i++)
1652 ph10 1003 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1653 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1654 nigel 89
1655     if (gettimeofday(&end_time, &dummy) != 0)
1656     perror("bad gettimeofday");
1657    
1658     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1659     -
1660     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1661    
1662     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1663     return 0;
1664     }
1665     #endif
1666    
1667 ph10 1039 /* We come back here after a match when show_only_matching is set, in order
1668     to find any further matches in the same line. This applies to
1669     --only-matching, --file-offsets, and --line-offsets. */
1670 nigel 89
1671 ph10 286 ONLY_MATCHING_RESTART:
1672    
1673 ph10 392 /* Run through all the patterns until one matches or there is an error other
1674 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1675 ph10 1335 finding subsequent matches when colouring matched lines. After finding one
1676     match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1677 ph10 1324 this line. */
1678 ph10 392
1679 ph10 1324 match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1680     options = PCRE_NOTEMPTY;
1681 nigel 77
1682 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1683 nigel 77
1684 nigel 49 if (match != invert)
1685     {
1686 nigel 77 BOOL hyphenprinted = FALSE;
1687    
1688 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1689 nigel 77
1690 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1691    
1692     /* Just count if just counting is wanted. */
1693    
1694 nigel 49 if (count_only) count++;
1695 ph10 975
1696     /* When handling a binary file and binary-files==binary, the "binary"
1697     variable will be set true (it's false in all other cases). In this
1698 ph10 947 situation we just want to output the file name. No need to scan further. */
1699 ph10 975
1700 ph10 947 else if (binary)
1701     {
1702     fprintf(stdout, "Binary file %s matches\n", filename);
1703 ph10 975 return 0;
1704     }
1705 nigel 49
1706 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1707     in the file. */
1708    
1709 ph10 420 else if (filenames == FN_MATCH_ONLY)
1710 nigel 49 {
1711 nigel 77 fprintf(stdout, "%s\n", printname);
1712 nigel 49 return 0;
1713     }
1714    
1715 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1716    
1717 nigel 77 else if (quiet) return 0;
1718 nigel 49
1719 ph10 1039 /* The --only-matching option prints just the substring that matched,
1720     and/or one or more captured portions of it, as long as these strings are
1721     not empty. The --file-offsets and --line-offsets options output offsets for
1722     the matching substring (all three set show_only_matching). None of these
1723     mutually exclusive options prints any context. Afterwards, adjust the start
1724     and then jump back to look for further matches in the same line. If we are
1725     in invert mode, however, nothing is printed and we do not restart - this
1726     could still be useful because the return code is set. */
1727 nigel 87
1728 ph10 1039 else if (show_only_matching)
1729 nigel 87 {
1730 ph10 279 if (!invert)
1731 ph10 286 {
1732 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1733     if (number) fprintf(stdout, "%d:", linenumber);
1734 ph10 1039
1735     /* Handle --line-offsets */
1736    
1737 ph10 280 if (line_offsets)
1738 ph10 565 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1739 ph10 286 offsets[1] - offsets[0]);
1740 ph10 1039
1741     /* Handle --file-offsets */
1742    
1743 ph10 280 else if (file_offsets)
1744 ph10 579 fprintf(stdout, "%d,%d\n",
1745 ph10 565 (int)(filepos + matchptr + offsets[0] - ptr),
1746 ph10 286 offsets[1] - offsets[0]);
1747 ph10 1039
1748     /* Handle --only-matching, which may occur many times */
1749    
1750     else
1751 ph10 377 {
1752 ph10 1039 BOOL printed = FALSE;
1753     omstr *om;
1754 ph10 1221
1755 ph10 1039 for (om = only_matching; om != NULL; om = om->next)
1756 ph10 579 {
1757 ph10 1039 int n = om->groupnum;
1758     if (n < mrc)
1759     {
1760     int plen = offsets[2*n + 1] - offsets[2*n];
1761     if (plen > 0)
1762     {
1763 ph10 1221 if (printed) fprintf(stdout, "%s", om_separator);
1764 ph10 1039 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1765     FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1766     if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1767     printed = TRUE;
1768     }
1769     }
1770 ph10 579 }
1771 ph10 1221
1772 ph10 1039 if (printed || printname != NULL || number) fprintf(stdout, "\n");
1773 ph10 392 }
1774 ph10 1039
1775     /* Prepare to repeat to find the next match */
1776    
1777 ph10 286 match = FALSE;
1778 ph10 564 if (line_buffered) fflush(stdout);
1779 ph10 636 rc = 0; /* Had some success */
1780     startoffset = offsets[1]; /* Restart after the match */
1781 ph10 286 goto ONLY_MATCHING_RESTART;
1782     }
1783 nigel 87 }
1784    
1785     /* This is the default case when none of the above options is set. We print
1786     the matching lines(s), possibly preceded and/or followed by other lines of
1787     context. */
1788    
1789 nigel 49 else
1790     {
1791 nigel 77 /* See if there is a requirement to print some "after" lines from a
1792     previous match. We never print any overlaps. */
1793    
1794     if (after_context > 0 && lastmatchnumber > 0)
1795     {
1796 nigel 93 int ellength;
1797 nigel 77 int linecount = 0;
1798     char *p = lastmatchrestart;
1799    
1800     while (p < ptr && linecount < after_context)
1801     {
1802 nigel 93 p = end_of_line(p, ptr, &ellength);
1803 nigel 77 linecount++;
1804     }
1805    
1806     /* It is important to advance lastmatchrestart during this printing so
1807 nigel 87 that it interacts correctly with any "before" printing below. Print
1808     each line's data using fwrite() in case there are binary zeroes. */
1809 nigel 77
1810     while (lastmatchrestart < p)
1811     {
1812     char *pp = lastmatchrestart;
1813     if (printname != NULL) fprintf(stdout, "%s-", printname);
1814     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1815 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1816 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1817 nigel 93 lastmatchrestart = pp;
1818 nigel 77 }
1819     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1820     }
1821    
1822     /* If there were non-contiguous lines printed above, insert hyphens. */
1823    
1824     if (hyphenpending)
1825     {
1826     fprintf(stdout, "--\n");
1827     hyphenpending = FALSE;
1828     hyphenprinted = TRUE;
1829     }
1830    
1831     /* See if there is a requirement to print some "before" lines for this
1832     match. Again, don't print overlaps. */
1833    
1834     if (before_context > 0)
1835     {
1836     int linecount = 0;
1837     char *p = ptr;
1838    
1839 ph10 644 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1840 nigel 87 linecount < before_context)
1841 nigel 77 {
1842 nigel 87 linecount++;
1843 ph10 644 p = previous_line(p, main_buffer);
1844 nigel 77 }
1845    
1846     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1847     fprintf(stdout, "--\n");
1848    
1849     while (p < ptr)
1850     {
1851 nigel 93 int ellength;
1852 nigel 77 char *pp = p;
1853     if (printname != NULL) fprintf(stdout, "%s-", printname);
1854     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1855 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1856 ph10 515 FWRITE(p, 1, pp - p, stdout);
1857 nigel 93 p = pp;
1858 nigel 77 }
1859     }
1860    
1861     /* Now print the matching line(s); ensure we set hyphenpending at the end
1862 nigel 85 of the file if any context lines are being output. */
1863 nigel 77
1864 nigel 85 if (after_context > 0 || before_context > 0)
1865     endhyphenpending = TRUE;
1866    
1867 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1868 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1869 nigel 77
1870     /* In multiline mode, we want to print to the end of the line in which
1871     the end of the matched string is found, so we adjust linelength and the
1872 ph10 222 line number appropriately, but only when there actually was a match
1873     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1874     the match will always be before the first newline sequence. */
1875 nigel 77
1876 ph10 587 if (multiline & !invert)
1877 nigel 77 {
1878 ph10 587 char *endmatch = ptr + offsets[1];
1879     t = ptr;
1880 ph10 1353 while (t <= endmatch)
1881 nigel 93 {
1882 ph10 587 t = end_of_line(t, endptr, &endlinelength);
1883     if (t < endmatch) linenumber++; else break;
1884 nigel 93 }
1885 ph10 587 linelength = t - ptr - endlinelength;
1886 nigel 77 }
1887    
1888 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1889     zeroes are treated as just another data character. */
1890    
1891     /* This extra option, for Jeffrey Friedl's debugging requirements,
1892     replaces the matched string, or a specific captured string if it exists,
1893     with X. When this happens, colouring is ignored. */
1894    
1895     #ifdef JFRIEDL_DEBUG
1896     if (S_arg >= 0 && S_arg < mrc)
1897     {
1898     int first = S_arg * 2;
1899     int last = first + 1;
1900 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1901 nigel 87 fprintf(stdout, "X");
1902 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1903 nigel 87 }
1904     else
1905     #endif
1906    
1907 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1908 ph10 585 matches, but not of course if the line is a non-match. */
1909 ph10 589
1910 ph10 585 if (do_colour && !invert)
1911 nigel 87 {
1912 ph10 589 int plength;
1913 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1914 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1915 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1916 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1917 ph10 378 for (;;)
1918     {
1919 ph10 632 startoffset = offsets[1];
1920 ph10 718 if (startoffset >= (int)linelength + endlinelength ||
1921 ph10 1324 !match_patterns(matchptr, length, options, startoffset, offsets,
1922     &mrc))
1923 ph10 632 break;
1924     FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1925 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1926 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1927 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1928     }
1929 ph10 587
1930     /* In multiline mode, we may have already printed the complete line
1931 ph10 589 and its line-ending characters (if they matched the pattern), so there
1932 ph10 587 may be no more to print. */
1933 ph10 589
1934 ph10 836 plength = (int)((linelength + endlinelength) - startoffset);
1935 ph10 636 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1936 nigel 87 }
1937 ph10 392
1938 ph10 378 /* Not colouring; no need to search for further matches */
1939 ph10 392
1940 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1941 nigel 49 }
1942    
1943 ph10 519 /* End of doing what has to be done for a match. If --line-buffered was
1944     given, flush the output. */
1945 nigel 87
1946 ph10 519 if (line_buffered) fflush(stdout);
1947 nigel 77 rc = 0; /* Had some success */
1948    
1949     /* Remember where the last match happened for after_context. We remember
1950     where we are about to restart, and that line's number. */
1951    
1952 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1953 nigel 77 lastmatchnumber = linenumber + 1;
1954 nigel 49 }
1955 nigel 77
1956 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1957     anything to be printed), we have to move on to the end of the match before
1958     proceeding. */
1959    
1960     if (multiline && invert && match)
1961     {
1962     int ellength;
1963     char *endmatch = ptr + offsets[1];
1964     t = ptr;
1965     while (t < endmatch)
1966     {
1967     t = end_of_line(t, endptr, &ellength);
1968     if (t <= endmatch) linenumber++; else break;
1969     }
1970     endmatch = end_of_line(endmatch, endptr, &ellength);
1971     linelength = endmatch - ptr - ellength;
1972     }
1973    
1974 ph10 286 /* Advance to after the newline and increment the line number. The file
1975 ph10 280 offset to the current line is maintained in filepos. */
1976 nigel 77
1977 nigel 93 ptr += linelength + endlinelength;
1978 ph10 530 filepos += (int)(linelength + endlinelength);
1979 nigel 77 linenumber++;
1980 ph10 535
1981     /* If input is line buffered, and the buffer is not yet full, read another
1982 ph10 519 line and add it into the buffer. */
1983 ph10 535
1984 ph10 718 if (input_line_buffered && bufflength < (size_t)bufsize)
1985 ph10 519 {
1986 ph10 836 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1987 ph10 519 bufflength += add;
1988 ph10 535 endptr += add;
1989     }
1990 nigel 77
1991     /* If we haven't yet reached the end of the file (the buffer is full), and
1992     the current point is in the top 1/3 of the buffer, slide the buffer down by
1993     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1994     about to be lost, print them. */
1995    
1996 ph10 718 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1997 nigel 77 {
1998     if (after_context > 0 &&
1999     lastmatchnumber > 0 &&
2000 ph10 644 lastmatchrestart < main_buffer + bufthird)
2001 nigel 77 {
2002     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2003     lastmatchnumber = 0;
2004     }
2005    
2006     /* Now do the shuffle */
2007    
2008 ph10 644 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2009     ptr -= bufthird;
2010 ph10 286
2011     #ifdef SUPPORT_LIBZ
2012     if (frtype == FR_LIBZ)
2013 ph10 644 bufflength = 2*bufthird +
2014     gzread (ingz, main_buffer + 2*bufthird, bufthird);
2015 ph10 286 else
2016     #endif
2017    
2018     #ifdef SUPPORT_LIBBZ2
2019     if (frtype == FR_LIBBZ2)
2020 ph10 644 bufflength = 2*bufthird +
2021     BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2022 ph10 286 else
2023     #endif
2024    
2025 ph10 644 bufflength = 2*bufthird +
2026 ph10 535 (input_line_buffered?
2027 ph10 644 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2028     fread(main_buffer + 2*bufthird, 1, bufthird, in));
2029     endptr = main_buffer + bufflength;
2030 nigel 77
2031     /* Adjust any last match point */
2032    
2033 ph10 644 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2034 nigel 77 }
2035     } /* Loop through the whole file */
2036    
2037     /* End of file; print final "after" lines if wanted; do_after_lines sets
2038     hyphenpending if it prints something. */
2039    
2040 ph10 1039 if (!show_only_matching && !count_only)
2041 nigel 87 {
2042     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2043     hyphenpending |= endhyphenpending;
2044     }
2045 nigel 77
2046     /* Print the file name if we are looking for those without matches and there
2047     were none. If we found a match, we won't have got this far. */
2048    
2049 nigel 87 if (filenames == FN_NOMATCH_ONLY)
2050 nigel 77 {
2051     fprintf(stdout, "%s\n", printname);
2052     return 0;
2053 nigel 49 }
2054    
2055 nigel 77 /* Print the match count if wanted */
2056    
2057 nigel 49 if (count_only)
2058     {
2059 ph10 420 if (count > 0 || !omit_zero_count)
2060 ph10 461 {
2061     if (printname != NULL && filenames != FN_NONE)
2062 ph10 420 fprintf(stdout, "%s:", printname);
2063     fprintf(stdout, "%d\n", count);
2064 ph10 461 }
2065 nigel 49 }
2066    
2067     return rc;
2068     }
2069    
2070    
2071    
2072     /*************************************************
2073 nigel 53 * Grep a file or recurse into a directory *
2074     *************************************************/
2075    
2076 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
2077     recursing; if it's a file, grep it.
2078    
2079     Arguments:
2080     pathname the path to investigate
2081 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
2082 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
2083    
2084 ph10 1003 Returns: -1 the file/directory was skipped
2085     0 if there was at least one match
2086 nigel 77 1 if there were no matches
2087     2 there was some kind of error
2088    
2089     However, file opening failures are suppressed if "silent" is set.
2090     */
2091    
2092 nigel 53 static int
2093 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2094 nigel 53 {
2095     int rc = 1;
2096 ph10 286 int frtype;
2097     void *handle;
2098 ph10 1003 char *lastcomp;
2099 ph10 286 FILE *in = NULL; /* Ensure initialized */
2100 nigel 53
2101 ph10 286 #ifdef SUPPORT_LIBZ
2102     gzFile ingz = NULL;
2103     #endif
2104    
2105     #ifdef SUPPORT_LIBBZ2
2106     BZFILE *inbz2 = NULL;
2107     #endif
2108    
2109 ph10 971 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2110 ph10 879 int pathlen;
2111     #endif
2112    
2113 ph10 1354 #if defined NATIVE_ZOS
2114     int zos_type;
2115     FILE *zos_test_file;
2116     #endif
2117    
2118 nigel 77 /* If the file name is "-" we scan stdin */
2119 nigel 53
2120 nigel 77 if (strcmp(pathname, "-") == 0)
2121 nigel 53 {
2122 ph10 644 return pcregrep(stdin, FR_PLAIN, stdin_name,
2123 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2124 nigel 77 stdin_name : NULL);
2125     }
2126    
2127 ph10 1003 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2128     directories, whereas --include and --exclude apply to everything else. The test
2129     is against the final component of the path. */
2130 nigel 87
2131 ph10 1003 lastcomp = strrchr(pathname, FILESEP);
2132     lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2133    
2134     /* If the file is a directory, skip if not recursing or if explicitly excluded.
2135     Otherwise, scan the directory and recurse for each path within it. The scanning
2136     code is localized so it can be made system-specific. */
2137    
2138 ph10 1354
2139     /* For z/OS, determine the file type. */
2140    
2141     #if defined NATIVE_ZOS
2142     zos_test_file = fopen(pathname,"rb");
2143    
2144     if (zos_test_file == NULL)
2145     {
2146 ph10 1404 if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
2147 ph10 1354 pathname, strerror(errno));
2148     return -1;
2149     }
2150     zos_type = identifyzosfiletype (zos_test_file);
2151     fclose (zos_test_file);
2152    
2153     /* Handle a PDS in separate code */
2154    
2155     if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2156     {
2157 ph10 1355 return travelonpdsdir (pathname, only_one_at_top);
2158 ph10 1354 }
2159    
2160     /* Deal with regular files in the normal way below. These types are:
2161     zos_type == __ZOS_PDS_MEMBER
2162     zos_type == __ZOS_PS
2163     zos_type == __ZOS_VSAM_KSDS
2164     zos_type == __ZOS_VSAM_ESDS
2165     zos_type == __ZOS_VSAM_RRDS
2166     */
2167    
2168     /* Handle a z/OS directory using common code. */
2169    
2170     else if (zos_type == __ZOS_HFS)
2171     {
2172     #endif /* NATIVE_ZOS */
2173    
2174    
2175     /* Handle directories: common code for all OS */
2176    
2177 ph10 1003 if (isdirectory(pathname))
2178 nigel 77 {
2179 ph10 1003 if (dee_action == dee_SKIP ||
2180     !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2181     return -1;
2182    
2183 nigel 87 if (dee_action == dee_RECURSE)
2184 nigel 53 {
2185 nigel 87 char buffer[1024];
2186     char *nextfile;
2187     directory_type *dir = opendirectory(pathname);
2188 nigel 53
2189 nigel 87 if (dir == NULL)
2190     {
2191     if (!silent)
2192     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2193     strerror(errno));
2194     return 2;
2195     }
2196 nigel 77
2197 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
2198     {
2199 ph10 1003 int frc;
2200     sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2201 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2202     if (frc > 1) rc = frc;
2203     else if (frc == 0 && rc == 1) rc = 0;
2204     }
2205    
2206     closedirectory(dir);
2207     return rc;
2208 nigel 53 }
2209     }
2210    
2211 ph10 1354 #if defined NATIVE_ZOS
2212     }
2213     #endif
2214 nigel 53
2215 ph10 1354 /* If the file is not a directory, check for a regular file, and if it is not,
2216     skip it if that's been requested. Otherwise, check for an explicit inclusion or
2217     exclusion. */
2218 nigel 87
2219 ph10 1354 else if (
2220     #if defined NATIVE_ZOS
2221     (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2222     #else /* all other OS */
2223     (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2224     #endif
2225     !test_incexc(lastcomp, include_patterns, exclude_patterns))
2226     return -1; /* File skipped */
2227    
2228 nigel 87 /* Control reaches here if we have a regular file, or if we have a directory
2229     and recursion or skipping was not requested, or if we have anything else and
2230     skipping was not requested. The scan proceeds. If this is the first and only
2231     argument at top level, we don't show the file name, unless we are only showing
2232     the file name, or the filename was forced (-H). */
2233    
2234 ph10 971 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2235 ph10 530 pathlen = (int)(strlen(pathname));
2236 ph10 879 #endif
2237 ph10 286
2238     /* Open using zlib if it is supported and the file name ends with .gz. */
2239    
2240     #ifdef SUPPORT_LIBZ
2241     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2242 nigel 53 {
2243 ph10 286 ingz = gzopen(pathname, "rb");
2244     if (ingz == NULL)
2245     {
2246     if (!silent)
2247     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2248     strerror(errno));
2249     return 2;
2250     }
2251     handle = (void *)ingz;
2252     frtype = FR_LIBZ;
2253     }
2254     else
2255     #endif
2256    
2257     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2258    
2259     #ifdef SUPPORT_LIBBZ2
2260     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2261     {
2262     inbz2 = BZ2_bzopen(pathname, "rb");
2263     handle = (void *)inbz2;
2264     frtype = FR_LIBBZ2;
2265     }
2266     else
2267     #endif
2268    
2269     /* Otherwise use plain fopen(). The label is so that we can come back here if
2270     an attempt to read a .bz2 file indicates that it really is a plain file. */
2271    
2272     #ifdef SUPPORT_LIBBZ2
2273     PLAIN_FILE:
2274     #endif
2275     {
2276 ph10 419 in = fopen(pathname, "rb");
2277 ph10 286 handle = (void *)in;
2278     frtype = FR_PLAIN;
2279     }
2280    
2281     /* All the opening methods return errno when they fail. */
2282    
2283     if (handle == NULL)
2284     {
2285 nigel 77 if (!silent)
2286     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2287     strerror(errno));
2288 nigel 53 return 2;
2289     }
2290    
2291 ph10 286 /* Now grep the file */
2292    
2293 ph10 644 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2294 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2295 nigel 77
2296 ph10 286 /* Close in an appropriate manner. */
2297    
2298     #ifdef SUPPORT_LIBZ
2299     if (frtype == FR_LIBZ)
2300     gzclose(ingz);
2301     else
2302     #endif
2303    
2304 ph10 644 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2305 ph10 286 read failed. If the error indicates that the file isn't in fact bzipped, try
2306     again as a normal file. */
2307    
2308     #ifdef SUPPORT_LIBBZ2
2309     if (frtype == FR_LIBBZ2)
2310     {
2311 ph10 644 if (rc == 3)
2312 ph10 286 {
2313     int errnum;
2314     const char *err = BZ2_bzerror(inbz2, &errnum);
2315     if (errnum == BZ_DATA_ERROR_MAGIC)
2316     {
2317     BZ2_bzclose(inbz2);
2318     goto PLAIN_FILE;
2319     }
2320     else if (!silent)
2321     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2322     pathname, err);
2323 ph10 654 rc = 2; /* The normal "something went wrong" code */
2324 ph10 286 }
2325     BZ2_bzclose(inbz2);
2326     }
2327     else
2328     #endif
2329    
2330     /* Normal file close */
2331    
2332 nigel 53 fclose(in);
2333 ph10 286
2334     /* Pass back the yield from pcregrep(). */
2335    
2336 nigel 53 return rc;
2337     }
2338    
2339    
2340    
2341     /*************************************************
2342 nigel 77 * Handle a single-letter, no data option *
2343 nigel 53 *************************************************/
2344    
2345     static int
2346     handle_option(int letter, int options)
2347     {
2348     switch(letter)
2349     {
2350 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
2351 ph10 561 case N_HELP: help(); pcregrep_exit(0);
2352 ph10 685 case N_LBUFFER: line_buffered = TRUE; break;
2353 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
2354 ph10 691 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2355 ph10 947 case 'a': binary_files = BIN_TEXT; break;
2356 nigel 53 case 'c': count_only = TRUE; break;
2357 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
2358     case 'H': filenames = FN_FORCE; break;
2359 ph10 947 case 'I': binary_files = BIN_NOMATCH; break;
2360 nigel 87 case 'h': filenames = FN_NONE; break;
2361 nigel 53 case 'i': options |= PCRE_CASELESS; break;
2362 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2363 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
2364 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2365 nigel 53 case 'n': number = TRUE; break;
2366 ph10 1221
2367 ph10 1039 case 'o':
2368     only_matching_last = add_number(0, only_matching_last);
2369     if (only_matching == NULL) only_matching = only_matching_last;
2370     break;
2371 ph10 1221
2372 nigel 77 case 'q': quiet = TRUE; break;
2373 nigel 87 case 'r': dee_action = dee_RECURSE; break;
2374 nigel 53 case 's': silent = TRUE; break;
2375 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2376 nigel 53 case 'v': invert = TRUE; break;
2377 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
2378     case 'x': process_options |= PO_LINE_MATCH; break;
2379 nigel 53
2380     case 'V':
2381 ph10 1003 fprintf(stdout, "pcregrep version %s\n", pcre_version());
2382 ph10 561 pcregrep_exit(0);
2383 nigel 53 break;
2384    
2385     default:
2386     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2387 ph10 561 pcregrep_exit(usage(2));
2388 nigel 53 }
2389    
2390     return options;
2391     }
2392    
2393    
2394    
2395    
2396     /*************************************************
2397 nigel 87 * Construct printed ordinal *
2398     *************************************************/
2399    
2400     /* This turns a number into "1st", "3rd", etc. */
2401    
2402     static char *
2403     ordin(int n)
2404     {
2405     static char buffer[8];
2406     char *p = buffer;
2407     sprintf(p, "%d", n);
2408     while (*p != 0) p++;
2409     switch (n%10)
2410     {
2411     case 1: strcpy(p, "st"); break;
2412     case 2: strcpy(p, "nd"); break;
2413     case 3: strcpy(p, "rd"); break;
2414     default: strcpy(p, "th"); break;
2415     }
2416     return buffer;
2417     }
2418    
2419    
2420    
2421     /*************************************************
2422     * Compile a single pattern *
2423     *************************************************/
2424    
2425 ph10 1003 /* Do nothing if the pattern has already been compiled. This is the case for
2426     include/exclude patterns read from a file.
2427 nigel 87
2428 ph10 1003 When the -F option has been used, each "pattern" may be a list of strings,
2429     separated by line breaks. They will be matched literally. We split such a
2430     string and compile the first substring, inserting an additional block into the
2431     pattern chain.
2432    
2433 nigel 87 Arguments:
2434 ph10 1003 p points to the pattern block
2435 nigel 87 options the PCRE options
2436 ph10 1003 popts the processing options
2437     fromfile TRUE if the pattern was read from a file
2438     fromtext file name or identifying text (e.g. "include")
2439 nigel 87 count 0 if this is the only command line pattern, or
2440     number of the command line pattern, or
2441     linenumber for a pattern from a file
2442    
2443     Returns: TRUE on success, FALSE after an error
2444     */
2445    
2446     static BOOL
2447 ph10 1003 compile_pattern(patstr *p, int options, int popts, int fromfile,
2448     const char *fromtext, int count)
2449 nigel 87 {
2450 ph10 644 char buffer[PATBUFSIZE];
2451 nigel 87 const char *error;
2452 ph10 1003 char *ps = p->string;
2453     int patlen = strlen(ps);
2454 nigel 87 int errptr;
2455    
2456 ph10 1003 if (p->compiled != NULL) return TRUE;
2457    
2458     if ((popts & PO_FIXED_STRINGS) != 0)
2459 nigel 87 {
2460 ph10 1003 int ellength;
2461     char *eop = ps + patlen;
2462     char *pe = end_of_line(ps, eop, &ellength);
2463 nigel 87
2464 ph10 1003 if (ellength != 0)
2465     {
2466     if (add_pattern(pe, p) == NULL) return FALSE;
2467     patlen = (int)(pe - ps - ellength);
2468     }
2469 ph10 142 }
2470 nigel 87
2471 ph10 1003 sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2472     p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2473     if (p->compiled != NULL) return TRUE;
2474    
2475 nigel 87 /* Handle compile errors */
2476    
2477 ph10 1003 errptr -= (int)strlen(prefix[popts]);
2478     if (errptr > patlen) errptr = patlen;
2479 nigel 87
2480 ph10 1003 if (fromfile)
2481 nigel 87 {
2482 ph10 1003 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2483     "at offset %d: %s\n", count, fromtext, errptr, error);
2484 nigel 87 }
2485     else
2486     {
2487 ph10 1003 if (count == 0)
2488     fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2489     fromtext, errptr, error);
2490     else
2491     fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2492     ordin(count), fromtext, errptr, error);
2493 nigel 87 }
2494    
2495     return FALSE;
2496     }
2497    
2498    
2499    
2500     /*************************************************
2501 ph10 1003 * Read and compile a file of patterns *
2502 nigel 87 *************************************************/
2503    
2504 ph10 1003 /* This is used for --filelist, --include-from, and --exclude-from.
2505 nigel 87
2506     Arguments:
2507 ph10 1003 name the name of the file; "-" is stdin
2508     patptr pointer to the pattern chain anchor
2509     patlastptr pointer to the last pattern pointer
2510     popts the process options to pass to pattern_compile()
2511 nigel 87
2512 ph10 1003 Returns: TRUE if all went well
2513 nigel 87 */
2514    
2515     static BOOL
2516 ph10 1003 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2517 nigel 87 {
2518 ph10 1003 int linenumber = 0;
2519     FILE *f;
2520     char *filename;
2521     char buffer[PATBUFSIZE];
2522    
2523     if (strcmp(name, "-") == 0)
2524 nigel 87 {
2525 ph10 1003 f = stdin;
2526     filename = stdin_name;
2527     }
2528     else
2529     {
2530     f = fopen(name, "r");
2531     if (f == NULL)
2532     {
2533     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2534     return FALSE;
2535     }
2536     filename = name;
2537     }
2538    
2539     while (fgets(buffer, PATBUFSIZE, f) != NULL)
2540     {
2541     char *s = buffer + (int)strlen(buffer);
2542     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2543     *s = 0;
2544     linenumber++;
2545     if (buffer[0] == 0) continue; /* Skip blank lines */
2546    
2547     /* Note: this call to add_pattern() puts a pointer to the local variable
2548     "buffer" into the pattern chain. However, that pointer is used only when
2549     compiling the pattern, which happens immediately below, so we flatten it
2550     afterwards, as a precaution against any later code trying to use it. */
2551    
2552     *patlastptr = add_pattern(buffer, *patlastptr);
2553 ph10 1502 if (*patlastptr == NULL)
2554 ph10 1492 {
2555     if (f != stdin) fclose(f);
2556     return FALSE;
2557 ph10 1502 }
2558 ph10 1003 if (*patptr == NULL) *patptr = *patlastptr;
2559    
2560     /* This loop is needed because compiling a "pattern" when -F is set may add
2561     on additional literal patterns if the original contains a newline. In the
2562     common case, it never will, because fgets() stops at a newline. However,
2563     the -N option can be used to give pcregrep a different newline setting. */
2564    
2565 nigel 87 for(;;)
2566     {
2567 ph10 1003 if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2568     linenumber))
2569 ph10 1502 {
2570 ph10 1492 if (f != stdin) fclose(f);
2571 nigel 87 return FALSE;
2572 ph10 1502 }
2573 ph10 1003 (*patlastptr)->string = NULL; /* Insurance */
2574     if ((*patlastptr)->next == NULL) break;
2575     *patlastptr = (*patlastptr)->next;
2576 nigel 87 }
2577     }
2578 ph10 1003
2579     if (f != stdin) fclose(f);
2580     return TRUE;
2581 nigel 87 }
2582    
2583    
2584    
2585     /*************************************************
2586 nigel 49 * Main program *
2587     *************************************************/
2588    
2589 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2590    
2591 nigel 49 int
2592     main(int argc, char **argv)
2593     {
2594 nigel 53 int i, j;
2595 nigel 49 int rc = 1;
2596 nigel 87 BOOL only_one_at_top;
2597 ph10 1003 patstr *cp;
2598     fnstr *fn;
2599 nigel 87 const char *locale_from = "--locale";
2600 nigel 49 const char *error;
2601    
2602 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
2603     pcre_jit_stack *jit_stack = NULL;
2604     #endif
2605    
2606 nigel 93 /* Set the default line ending value from the default in the PCRE library;
2607     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2608 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
2609 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
2610 ph10 391 rather than escapes such as as '\r'. */
2611 nigel 91
2612     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2613     switch(i)
2614     {
2615 ph10 391 default: newline = (char *)"lf"; break;
2616     case 13: newline = (char *)"cr"; break;
2617     case (13 << 8) | 10: newline = (char *)"crlf"; break;
2618     case -1: newline = (char *)"any"; break;
2619     case -2: newline = (char *)"anycrlf"; break;
2620 nigel 91 }
2621    
2622 nigel 49 /* Process the options */
2623    
2624     for (i = 1; i < argc; i++)
2625     {
2626 nigel 77 option_item *op = NULL;
2627     char *option_data = (char *)""; /* default to keep compiler happy */
2628     BOOL longop;
2629     BOOL longopwasequals = FALSE;
2630    
2631 nigel 49 if (argv[i][0] != '-') break;
2632 nigel 53
2633 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2634 nigel 87 but only if we have previously had -e or -f to define the patterns. */
2635 nigel 63
2636 nigel 77 if (argv[i][1] == 0)
2637     {
2638 ph10 1003 if (pattern_files != NULL || patterns != NULL) break;
2639 ph10 561 else pcregrep_exit(usage(2));
2640 nigel 77 }
2641 nigel 63
2642 nigel 77 /* Handle a long name option, or -- to terminate the options */
2643 nigel 53
2644     if (argv[i][1] == '-')
2645 nigel 49 {
2646 nigel 77 char *arg = argv[i] + 2;
2647     char *argequals = strchr(arg, '=');
2648 nigel 53
2649 nigel 77 if (*arg == 0) /* -- terminates options */
2650 nigel 49 {
2651 nigel 77 i++;
2652     break; /* out of the options-handling loop */
2653 nigel 53 }
2654 nigel 49
2655 nigel 77 longop = TRUE;
2656    
2657     /* Some long options have data that follows after =, for example file=name.
2658     Some options have variations in the long name spelling: specifically, we
2659     allow "regexp" because GNU grep allows it, though I personally go along
2660 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2661 ph10 422 These options are entered in the table as "regex(p)". Options can be in
2662     both these categories. */
2663 nigel 77
2664 nigel 53 for (op = optionlist; op->one_char != 0; op++)
2665     {
2666 nigel 77 char *opbra = strchr(op->long_name, '(');
2667     char *equals = strchr(op->long_name, '=');
2668 ph10 461
2669 ph10 422 /* Handle options with only one spelling of the name */
2670 ph10 461
2671 ph10 422 if (opbra == NULL) /* Does not contain '(' */
2672 nigel 53 {
2673 nigel 77 if (equals == NULL) /* Not thing=data case */
2674     {
2675     if (strcmp(arg, op->long_name) == 0) break;
2676     }
2677     else /* Special case xxx=data */
2678     {
2679 ph10 530 int oplen = (int)(equals - op->long_name);
2680 ph10 535 int arglen = (argequals == NULL)?
2681 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2682 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2683     {
2684     option_data = arg + arglen;
2685     if (*option_data == '=')
2686     {
2687     option_data++;
2688     longopwasequals = TRUE;
2689     }
2690     break;
2691     }
2692     }
2693 nigel 53 }
2694 ph10 461
2695 ph10 422 /* Handle options with an alternate spelling of the name */
2696 ph10 461
2697     else
2698 nigel 77 {
2699     char buff1[24];
2700     char buff2[24];
2701 ph10 461
2702 ph10 530 int baselen = (int)(opbra - op->long_name);
2703     int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2704 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
2705 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2706 ph10 461
2707 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
2708 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2709 ph10 461
2710     if (strncmp(arg, buff1, arglen) == 0 ||
2711 ph10 422 strncmp(arg, buff2, arglen) == 0)
2712     {
2713     if (equals != NULL && argequals != NULL)
2714     {
2715 ph10 461 option_data = argequals;
2716 ph10 422 if (*option_data == '=')
2717     {
2718 ph10 461 option_data++;
2719 ph10 422 longopwasequals = TRUE;
2720 ph10 461 }
2721     }
2722 nigel 77 break;
2723 ph10 461 }
2724 nigel 77 }
2725 nigel 53 }
2726 nigel 77
2727 nigel 53 if (op->one_char == 0)
2728     {
2729     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2730 ph10 561 pcregrep_exit(usage(2));
2731 nigel 53 }
2732     }
2733 nigel 49
2734 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2735     are not in the right form for putting in the option table because they use
2736     only one hyphen, yet are more than one character long. By putting them
2737     separately here, they will not get displayed as part of the help() output,
2738     but I don't think Jeffrey will care about that. */
2739    
2740     #ifdef JFRIEDL_DEBUG
2741     else if (strcmp(argv[i], "-pre") == 0) {
2742     jfriedl_prefix = argv[++i];
2743     continue;
2744     } else if (strcmp(argv[i], "-post") == 0) {
2745     jfriedl_postfix = argv[++i];
2746     continue;
2747     } else if (strcmp(argv[i], "-XT") == 0) {
2748     sscanf(argv[++i], "%d", &jfriedl_XT);
2749     continue;
2750     } else if (strcmp(argv[i], "-XR") == 0) {
2751     sscanf(argv[++i], "%d", &jfriedl_XR);
2752     continue;
2753     }
2754     #endif
2755    
2756    
2757 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2758     continue till we hit the last one or one that needs data. */
2759 nigel 53
2760     else
2761     {
2762     char *s = argv[i] + 1;
2763 nigel 77 longop = FALSE;
2764 ph10 1221
2765 nigel 53 while (*s != 0)
2766     {
2767 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2768 ph10 579 {
2769     if (*s == op->one_char) break;
2770 ph10 565 }
2771 nigel 77 if (op->one_char == 0)
2772 nigel 53 {
2773 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2774     *s, argv[i]);
2775 ph10 561 pcregrep_exit(usage(2));
2776 nigel 77 }
2777 ph10 1221
2778 ph10 1039 option_data = s+1;
2779 ph10 1221
2780     /* Break out if this is the last character in the string; it's handled
2781 ph10 1039 below like a single multi-char option. */
2782 ph10 579
2783 ph10 1221 if (*option_data == 0) break;
2784    
2785 ph10 1039 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2786     are used for ones that either have a numerical number or defaults, i.e.
2787     the data is optional. If a digit follows, there is data; if not, carry on
2788 ph10 565 with other single-character options in the same string. */
2789 ph10 579
2790 ph10 1039 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2791 ph10 579 {
2792     if (isdigit((unsigned char)s[1])) break;
2793 nigel 53 }
2794 ph10 1039 else /* Check for an option with data */
2795 ph10 579 {
2796 ph10 1039 if (op->type != OP_NODATA) break;
2797 ph10 579 }
2798    
2799     /* Handle a single-character option with no data, then loop for the
2800 ph10 565 next character in the string. */
2801 ph10 1221
2802 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2803 nigel 49 }
2804     }
2805 ph10 1221
2806 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2807     is NO_DATA, it means that there is no data, and the option might set
2808     something in the PCRE options. */
2809 nigel 77
2810     if (op->type == OP_NODATA)
2811     {
2812 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2813     continue;
2814     }
2815    
2816 ph10 1039 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2817 nigel 87 either has a value or defaults to something. It cannot have data in a
2818 ph10 579 separate item. At the moment, the only such options are "colo(u)r",
2819 ph10 565 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2820 ph10 1221
2821 nigel 87 if (*option_data == 0 &&
2822 ph10 1039 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2823     op->type == OP_OP_NUMBERS))
2824 nigel 87 {
2825     switch (op->one_char)
2826 nigel 77 {
2827 nigel 87 case N_COLOUR:
2828     colour_option = (char *)"auto";
2829     break;
2830 ph10 579
2831 ph10 565 case 'o':
2832 ph10 1039 only_matching_last = add_number(0, only_matching_last);
2833     if (only_matching == NULL) only_matching = only_matching_last;
2834 ph10 579 break;
2835    
2836 nigel 87 #ifdef JFRIEDL_DEBUG
2837     case 'S':
2838     S_arg = 0;
2839     break;
2840     #endif
2841 nigel 77 }
2842 nigel 87 continue;
2843     }
2844 nigel 77
2845 nigel 87 /* Otherwise, find the data string for the option. */
2846    
2847     if (*option_data == 0)
2848     {
2849     if (i >= argc - 1 || longopwasequals)
2850 nigel 77 {
2851 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2852 ph10 561 pcregrep_exit(usage(2));
2853 nigel 87 }
2854     option_data = argv[++i];
2855     }
2856    
2857 ph10 1039 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2858     added to a chain of numbers. */
2859    
2860     if (op->type == OP_OP_NUMBERS)
2861     {
2862     unsigned long int n = decode_number(option_data, op, longop);
2863     omdatastr *omd = (omdatastr *)op->dataptr;
2864     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2865     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2866     }
2867    
2868 ph10 1003 /* If the option type is OP_PATLIST, it's the -e option, or one of the
2869     include/exclude options, which can be called multiple times to create lists
2870     of patterns. */
2871 ph10 975
2872 ph10 1039 else if (op->type == OP_PATLIST)
2873     {
2874     patdatastr *pd = (patdatastr *)op->dataptr;
2875     *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2876     if (*(pd->lastptr) == NULL) goto EXIT2;
2877     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2878     }
2879 ph10 1003
2880     /* If the option type is OP_FILELIST, it's one of the options that names a
2881     file. */
2882    
2883     else if (op->type == OP_FILELIST)
2884 nigel 87 {
2885 ph10 1003 fndatastr *fd = (fndatastr *)op->dataptr;
2886     fn = (fnstr *)malloc(sizeof(fnstr));
2887     if (fn == NULL)
2888 nigel 87 {
2889 ph10 1003 fprintf(stderr, "pcregrep: malloc failed\n");
2890     goto EXIT2;
2891 nigel 87 }
2892 ph10 1003 fn->next = NULL;
2893     fn->name = option_data;
2894     if (*(fd->anchor) == NULL)
2895     *(fd->anchor) = fn;
2896     else
2897     (*(fd->lastptr))->next = fn;
2898     *(fd->lastptr) = fn;
2899 nigel 87 }
2900 ph10 975
2901 ph10 947 /* Handle OP_BINARY_FILES */
2902 ph10 975
2903 ph10 947 else if (op->type == OP_BINFILES)
2904     {
2905     if (strcmp(option_data, "binary") == 0)
2906     binary_files = BIN_BINARY;
2907     else if (strcmp(option_data, "without-match") == 0)
2908     binary_files = BIN_NOMATCH;
2909     else if (strcmp(option_data, "text") == 0)
2910     binary_files = BIN_TEXT;
2911     else
2912     {
2913 ph10 975 fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2914     option_data);
2915 ph10 947 pcregrep_exit(usage(2));
2916 ph10 975 }
2917     }
2918 nigel 87
2919 ph10 1039 /* Otherwise, deal with a single string or numeric data value. */
2920 nigel 87
2921 ph10 584 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2922     op->type != OP_OP_NUMBER)
2923 nigel 87 {
2924     *((char **)op->dataptr) = option_data;
2925     }
2926     else
2927     {
2928 ph10 1039 unsigned long int n = decode_number(option_data, op, longop);
2929     if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2930     else *((int *)op->dataptr) = n;
2931 nigel 77 }
2932 nigel 49 }
2933    
2934 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2935     for -A and -B. */
2936    
2937     if (both_context > 0)
2938     {
2939     if (after_context == 0) after_context = both_context;
2940     if (before_context == 0) before_context = both_context;
2941     }
2942 ph10 286
2943     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2944 ph10 1039 However, all three set show_only_matching because they display, each in their
2945     own way, only the data that has matched. */
2946 nigel 77
2947 ph10 1039 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2948 ph10 286 (file_offsets && line_offsets))
2949 ph10 280 {
2950     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2951     "and/or --line-offsets\n");
2952 ph10 561 pcregrep_exit(usage(2));
2953 ph10 280 }
2954    
2955 ph10 1039 if (only_matching != NULL || file_offsets || line_offsets)
2956     show_only_matching = TRUE;
2957 ph10 286
2958 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2959     LC_ALL environment variable is set, and if so, use it. */
2960 nigel 49
2961 nigel 87 if (locale == NULL)
2962 nigel 53 {
2963 nigel 87 locale = getenv("LC_ALL");
2964     locale_from = "LCC_ALL";
2965 nigel 53 }
2966 nigel 49
2967 nigel 87 if (locale == NULL)
2968     {
2969     locale = getenv("LC_CTYPE");
2970     locale_from = "LC_CTYPE";
2971     }
2972 nigel 49
2973 ph10 1492 /* If a locale is set, use it to generate the tables the PCRE needs. Otherwise,
2974     pcretables==NULL, which causes the use of default tables. */
2975 nigel 87
2976     if (locale != NULL)
2977 nigel 49 {
2978 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2979 nigel 53 {
2980 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2981     locale, locale_from);
2982 ph10 1492 goto EXIT2;
2983 nigel 53 }
2984 nigel 87 pcretables = pcre_maketables();
2985     }
2986 nigel 77
2987 nigel 87 /* Sort out colouring */
2988    
2989     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2990     {
2991     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2992     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2993     else
2994 nigel 53 {
2995 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2996     colour_option);
2997 ph10 1492 goto EXIT2;
2998 nigel 77 }
2999 nigel 87 if (do_colour)
3000 nigel 77 {
3001 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
3002     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
3003     if (cs != NULL) colour_string = cs;
3004 nigel 77 }
3005 nigel 87 }
3006 ph10 535
3007 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
3008    
3009     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
3010     {
3011     pcre_options |= PCRE_NEWLINE_CR;
3012 nigel 93 endlinetype = EL_CR;
3013 nigel 91 }
3014     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
3015     {
3016     pcre_options |= PCRE_NEWLINE_LF;
3017 nigel 93 endlinetype = EL_LF;
3018 nigel 91 }
3019     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
3020     {
3021     pcre_options |= PCRE_NEWLINE_CRLF;
3022 nigel 93 endlinetype = EL_CRLF;
3023 nigel 91 }
3024 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
3025     {
3026     pcre_options |= PCRE_NEWLINE_ANY;
3027     endlinetype = EL_ANY;
3028     }
3029 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
3030     {
3031     pcre_options |= PCRE_NEWLINE_ANYCRLF;
3032     endlinetype = EL_ANYCRLF;
3033     }
3034 nigel 91 else
3035     {
3036     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
3037 ph10 1492 goto EXIT2;
3038 nigel 91 }
3039    
3040 nigel 87 /* Interpret the text values for -d and -D */
3041    
3042     if (dee_option != NULL)
3043     {
3044     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
3045     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
3046     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
3047     else
3048 nigel 77 {
3049 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
3050 ph10 1492 goto EXIT2;
3051 nigel 53 }
3052 nigel 49 }
3053    
3054 nigel 87 if (DEE_option != NULL)
3055     {
3056     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
3057     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
3058     else
3059     {
3060     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
3061 ph10 1492 goto EXIT2;
3062 nigel 87 }
3063     }
3064 nigel 49
3065 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
3066 nigel 87
3067     #ifdef JFRIEDL_DEBUG
3068     if (S_arg > 9)
3069 nigel 49 {
3070 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
3071     return 2;
3072     }
3073 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
3074     {
3075     if (jfriedl_XT == 0) jfriedl_XT = 1;
3076     if (jfriedl_XR == 0) jfriedl_XR = 1;
3077     }
3078 nigel 87 #endif
3079 nigel 77
3080 ph10 1003 /* Get memory for the main buffer. */
3081 nigel 87
3082 ph10 644 bufsize = 3*bufthird;
3083     main_buffer = (char *)malloc(bufsize);
3084 nigel 87
3085 ph10 1003 if (main_buffer == NULL)
3086 nigel 87 {
3087     fprintf(stderr, "pcregrep: malloc failed\n");
3088 ph10 123 goto EXIT2;
3089 nigel 87 }
3090    
3091 ph10 1003 /* If no patterns were provided by -e, and there are no files provided by -f,
3092 nigel 87 the first argument is the one and only pattern, and it must exist. */
3093    
3094 ph10 1003 if (patterns == NULL && pattern_files == NULL)
3095 nigel 87 {
3096 nigel 63 if (i >= argc) return usage(2);
3097 ph10 1003 patterns = patterns_last = add_pattern(argv[i++], NULL);
3098     if (patterns == NULL) goto EXIT2;
3099 nigel 87 }
3100 nigel 77
3101 nigel 87 /* Compile the patterns that were provided on the command line, either by
3102 ph10 1003 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3103     after all the command-line options are read so that we know which PCRE options
3104     to use. When -F is used, compile_pattern() may add another block into the
3105     chain, so we must not access the next pointer till after the compile. */
3106 nigel 87
3107 ph10 1003 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3108 nigel 87 {
3109 ph10 1003 if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3110     (j == 1 && patterns->next == NULL)? 0 : j))
3111 ph10 123 goto EXIT2;
3112 nigel 87 }
3113    
3114 ph10 1003 /* Read and compile the regular expressions that are provided in files. */
3115 nigel 87
3116 ph10 1003 for (fn = pattern_files; fn != NULL; fn = fn->next)
3117 nigel 87 {
3118 ph10 1003 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3119     goto EXIT2;
3120 ph10 1004 }
3121 nigel 87
3122 ph10 1039 /* Study the regular expressions, as we will be running them many times. If an
3123 ph10 1035 extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3124     returned, even if studying produces no data. */
3125 nigel 53
3126 ph10 1035 if (match_limit > 0 || match_limit_recursion > 0)
3127     study_options |= PCRE_STUDY_EXTRA_NEEDED;
3128    
3129     /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3130    
3131 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
3132     if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3133     jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3134 ph10 691 #endif
3135    
3136 ph10 1003 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3137 nigel 53 {
3138 ph10 1003 cp->hint = pcre_study(cp->compiled, study_options, &error);
3139 nigel 53 if (error != NULL)
3140     {
3141     char s[16];
3142 ph10 1003 if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3143 nigel 53 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3144 ph10 121 goto EXIT2;
3145 nigel 53 }
3146 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
3147 ph10 1003 if (jit_stack != NULL && cp->hint != NULL)
3148     pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3149 ph10 685 #endif
3150 nigel 53 }
3151 ph10 579
3152 ph10 561 /* If --match-limit or --recursion-limit was set, put the value(s) into the
3153 ph10 1039 pcre_extra block for each pattern. There will always be an extra block because
3154 ph10 1035 of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3155 nigel 53
3156 ph10 1035 for (cp = patterns; cp != NULL; cp = cp->next)
3157 ph10 561 {
3158 ph10 1035 if (match_limit > 0)
3159 ph10 561 {
3160 ph10 1035 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3161     cp->hint->match_limit = match_limit;
3162 ph10 561 }
3163 ph10 1039
3164 ph10 1035 if (match_limit_recursion > 0)
3165     {
3166     cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3167     cp->hint->match_limit_recursion = match_limit_recursion;
3168     }
3169 ph10 579 }
3170 ph10 561
3171 ph10 1003 /* If there are include or exclude patterns read from the command line, compile
3172     them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3173     0. */
3174 nigel 77
3175 ph10 1003 for (j = 0; j < 4; j++)
3176 nigel 77 {
3177 ph10 1003 int k;
3178     for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3179 nigel 77 {
3180 ph10 1003 if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3181     (k == 1 && cp->next == NULL)? 0 : k))
3182     goto EXIT2;
3183 nigel 77 }
3184     }
3185    
3186 ph10 1003 /* Read and compile include/exclude patterns from files. */
3187    
3188     for (fn = include_from; fn != NULL; fn = fn->next)
3189 nigel 77 {
3190 ph10 1003 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3191 ph10 121 goto EXIT2;
3192 nigel 77 }
3193    
3194 ph10 1003 for (fn = exclude_from; fn != NULL; fn = fn->next)
3195 ph10 325 {
3196 ph10 1003 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3197 ph10 325 goto EXIT2;
3198     }
3199    
3200 ph10 1003 /* If there are no files that contain lists of files to search, and there are
3201     no file arguments, search stdin, and then exit. */
3202    
3203     if (file_lists == NULL && i >= argc)
3204 ph10 325 {
3205 ph10 1003 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3206     (filenames > FN_DEFAULT)? stdin_name : NULL);
3207     goto EXIT;
3208 ph10 325 }
3209 ph10 975
3210 ph10 1003 /* If any files that contains a list of files to search have been specified,
3211     read them line by line and search the given files. */
3212 ph10 325
3213 ph10 1003 for (fn = file_lists; fn != NULL; fn = fn->next)
3214 ph10 944 {
3215     char buffer[PATBUFSIZE];
3216     FILE *fl;
3217 ph10 1003 if (strcmp(fn->name, "-") == 0) fl = stdin; else
3218 ph10 975 {
3219 ph10 1003 fl = fopen(fn->name, "rb");
3220 ph10 944 if (fl == NULL)
3221     {
3222 ph10 1003 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3223 ph10 944 strerror(errno));
3224     goto EXIT2;
3225 ph10 975 }
3226     }
3227 ph10 944 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3228     {
3229     int frc;
3230     char *end = buffer + (int)strlen(buffer);
3231     while (end > buffer && isspace(end[-1])) end--;
3232 ph10 975 *end = 0;
3233     if (*buffer != 0)
3234     {
3235     frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3236 ph10 944 if (frc > 1) rc = frc;
3237 ph10 975 else if (frc == 0 && rc == 1) rc = 0;
3238     }
3239     }
3240 ph10 1003 if (fl != stdin) fclose(fl);
3241 ph10 975 }
3242 nigel 49
3243 ph10 1003 /* After handling file-list, work through remaining arguments. Pass in the fact
3244     that there is only one argument at top level - this suppresses the file name if
3245     the argument is not a directory and filenames are not otherwise forced. */
3246 ph10 944
3247 ph10 1003 only_one_at_top = i == argc - 1 && file_lists == NULL;
3248 nigel 49
3249     for (; i < argc; i++)
3250     {
3251 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3252     only_one_at_top);
3253 nigel 77 if (frc > 1) rc = frc;
3254     else if (frc == 0 && rc == 1) rc = 0;
3255 nigel 49 }
3256    
3257 ph10 121 EXIT:
3258 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
3259     if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3260     #endif
3261 ph10 1003
3262 ph10 1492 free(main_buffer);
3263     free((void *)pcretables);
3264 ph10 1003
3265     free_pattern_chain(patterns);
3266     free_pattern_chain(include_patterns);
3267     free_pattern_chain(include_dir_patterns);
3268     free_pattern_chain(exclude_patterns);
3269     free_pattern_chain(exclude_dir_patterns);
3270    
3271     free_file_chain(exclude_from);
3272     free_file_chain(include_from);
3273     free_file_chain(pattern_files);
3274     free_file_chain(file_lists);
3275    
3276 ph10 1039 while (only_matching != NULL)
3277     {
3278     omstr *this = only_matching;
3279     only_matching = this->next;
3280     free(this);
3281     }
3282    
3283 ph10 561 pcregrep_exit(rc);
3284 ph10 121
3285     EXIT2:
3286     rc = 2;
3287     goto EXIT;
3288 nigel 49 }
3289    
3290 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12