/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1548 - (hide annotations) (download)
Tue Apr 14 17:02:30 2015 UTC (4 days, 11 hours ago) by ph10
File MIME type: text/plain
File size: 96850 byte(s)
Documentation and tidies preparatory to 8.37 release.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 ph10 1354 its pattern matching. On Unix-like, Windows, and native z/OS systems it can
7 ph10 1404 recurse into directories, and in z/OS it can handle PDS files.
8 nigel 49
9 ph10 1354 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
10     additional header is required. That header is not included in the main PCRE
11     distribution because other apparatus is needed to compile pcregrep for z/OS.
12     The header can be found in the special z/OS distribution, which is available
13     from www.zaconsultants.net or from www.cbttape.org.
14 nigel 75
15 ph10 1467 Copyright (c) 1997-2014 University of Cambridge
16 ph10 1354
17 nigel 75 -----------------------------------------------------------------------------
18     Redistribution and use in source and binary forms, with or without
19     modification, are permitted provided that the following conditions are met:
20    
21     * Redistributions of source code must retain the above copyright notice,
22     this list of conditions and the following disclaimer.
23    
24     * Redistributions in binary form must reproduce the above copyright
25     notice, this list of conditions and the following disclaimer in the
26     documentation and/or other materials provided with the distribution.
27    
28     * Neither the name of the University of Cambridge nor the names of its
29     contributors may be used to endorse or promote products derived from
30     this software without specific prior written permission.
31    
32     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
36     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
42     POSSIBILITY OF SUCH DAMAGE.
43     -----------------------------------------------------------------------------
44     */
45    
46 ph10 97 #ifdef HAVE_CONFIG_H
47 ph10 236 #include "config.h"
48 ph10 97 #endif
49    
50 nigel 53 #include <ctype.h>
51 nigel 87 #include <locale.h>
52 nigel 49 #include <stdio.h>
53     #include <string.h>
54     #include <stdlib.h>
55     #include <errno.h>
56 nigel 77
57     #include <sys/types.h>
58     #include <sys/stat.h>
59 ph10 199
60 ph10 137 #ifdef HAVE_UNISTD_H
61 ph10 199 #include <unistd.h>
62 ph10 137 #endif
63 nigel 77
64 ph10 286 #ifdef SUPPORT_LIBZ
65     #include <zlib.h>
66     #endif
67    
68     #ifdef SUPPORT_LIBBZ2
69     #include <bzlib.h>
70     #endif
71    
72 ph10 236 #include "pcre.h"
73 nigel 49
74     #define FALSE 0
75     #define TRUE 1
76    
77     typedef int BOOL;
78    
79 ph10 378 #define OFFSET_SIZE 99
80 nigel 49
81 nigel 77 #if BUFSIZ > 8192
82 ph10 1003 #define MAXPATLEN BUFSIZ
83 nigel 77 #else
84 ph10 1003 #define MAXPATLEN 8192
85 nigel 77 #endif
86 nigel 49
87 ph10 1003 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
88    
89 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
90     output. The order is important; it is assumed that a file name is wanted for
91     all values greater than FN_DEFAULT. */
92 nigel 77
93 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
94 nigel 87
95 ph10 286 /* File reading styles */
96    
97     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
98    
99 nigel 87 /* Actions for the -d and -D options */
100    
101     enum { dee_READ, dee_SKIP, dee_RECURSE };
102     enum { DEE_READ, DEE_SKIP };
103    
104     /* Actions for special processing options (flag bits) */
105    
106     #define PO_WORD_MATCH 0x0001
107     #define PO_LINE_MATCH 0x0002
108     #define PO_FIXED_STRINGS 0x0004
109    
110 nigel 93 /* Line ending types */
111 nigel 87
112 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
113 nigel 87
114 ph10 947 /* Binary file options */
115    
116     enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
117    
118 ph10 535 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
119     environments), a warning is issued if the value of fwrite() is ignored.
120     Unfortunately, casting to (void) does not suppress the warning. To get round
121     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
122 ph10 515 apply to fprintf(). */
123 nigel 93
124 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
125 nigel 93
126 ph10 515
127    
128 nigel 49 /*************************************************
129     * Global variables *
130     *************************************************/
131    
132 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
133     regular code. */
134    
135     #ifdef JFRIEDL_DEBUG
136     static int S_arg = -1;
137 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
138     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
139     static const char *jfriedl_prefix = "";
140     static const char *jfriedl_postfix = "";
141 nigel 87 #endif
142    
143 nigel 93 static int endlinetype;
144 nigel 91
145 nigel 87 static char *colour_string = (char *)"1;31";
146     static char *colour_option = NULL;
147     static char *dee_option = NULL;
148     static char *DEE_option = NULL;
149 ph10 1003 static char *locale = NULL;
150 ph10 644 static char *main_buffer = NULL;
151 nigel 91 static char *newline = NULL;
152 ph10 1039 static char *om_separator = (char *)"";
153 nigel 77 static char *stdin_name = (char *)"(standard input)";
154 nigel 87
155     static const unsigned char *pcretables = NULL;
156    
157 nigel 77 static int after_context = 0;
158     static int before_context = 0;
159 ph10 947 static int binary_files = BIN_BINARY;
160 nigel 77 static int both_context = 0;
161 ph10 644 static int bufthird = PCREGREP_BUFSIZE;
162     static int bufsize = 3*PCREGREP_BUFSIZE;
163 ph10 1003
164     #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
165     static int dee_action = dee_SKIP;
166     #else
167 nigel 87 static int dee_action = dee_READ;
168 ph10 1003 #endif
169    
170 nigel 87 static int DEE_action = DEE_READ;
171     static int error_count = 0;
172     static int filenames = FN_DEFAULT;
173 ph10 1003 static int pcre_options = 0;
174 nigel 87 static int process_options = 0;
175 ph10 685
176     #ifdef SUPPORT_PCREGREP_JIT
177     static int study_options = PCRE_STUDY_JIT_COMPILE;
178     #else
179 ph10 667 static int study_options = 0;
180 ph10 685 #endif
181 nigel 77
182 ph10 561 static unsigned long int match_limit = 0;
183     static unsigned long int match_limit_recursion = 0;
184    
185 nigel 49 static BOOL count_only = FALSE;
186 nigel 87 static BOOL do_colour = FALSE;
187 ph10 280 static BOOL file_offsets = FALSE;
188 nigel 77 static BOOL hyphenpending = FALSE;
189 nigel 49 static BOOL invert = FALSE;
190 ph10 519 static BOOL line_buffered = FALSE;
191 ph10 280 static BOOL line_offsets = FALSE;
192 nigel 77 static BOOL multiline = FALSE;
193 nigel 49 static BOOL number = FALSE;
194 ph10 420 static BOOL omit_zero_count = FALSE;
195 ph10 561 static BOOL resource_error = FALSE;
196 nigel 77 static BOOL quiet = FALSE;
197 ph10 1039 static BOOL show_only_matching = FALSE;
198 nigel 49 static BOOL silent = FALSE;
199 nigel 93 static BOOL utf8 = FALSE;
200 nigel 49
201 ph10 1039 /* Structure for list of --only-matching capturing numbers. */
202    
203     typedef struct omstr {
204     struct omstr *next;
205     int groupnum;
206     } omstr;
207    
208     static omstr *only_matching = NULL;
209     static omstr *only_matching_last = NULL;
210    
211     /* Structure for holding the two variables that describe a number chain. */
212    
213     typedef struct omdatastr {
214     omstr **anchor;
215     omstr **lastptr;
216     } omdatastr;
217    
218     static omdatastr only_matching_data = { &only_matching, &only_matching_last };
219    
220 ph10 1003 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
221    
222     typedef struct fnstr {
223     struct fnstr *next;
224     char *name;
225     } fnstr;
226    
227     static fnstr *exclude_from = NULL;
228     static fnstr *exclude_from_last = NULL;
229     static fnstr *include_from = NULL;
230     static fnstr *include_from_last = NULL;
231    
232     static fnstr *file_lists = NULL;
233     static fnstr *file_lists_last = NULL;
234     static fnstr *pattern_files = NULL;
235     static fnstr *pattern_files_last = NULL;
236    
237     /* Structure for holding the two variables that describe a file name chain. */
238    
239     typedef struct fndatastr {
240     fnstr **anchor;
241     fnstr **lastptr;
242     } fndatastr;
243    
244     static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
245     static fndatastr include_from_data = { &include_from, &include_from_last };
246     static fndatastr file_lists_data = { &file_lists, &file_lists_last };
247     static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
248    
249     /* Structure for pattern and its compiled form; used for matching patterns and
250     also for include/exclude patterns. */
251    
252     typedef struct patstr {
253     struct patstr *next;
254     char *string;
255     pcre *compiled;
256     pcre_extra *hint;
257     } patstr;
258    
259     static patstr *patterns = NULL;
260     static patstr *patterns_last = NULL;
261     static patstr *include_patterns = NULL;
262     static patstr *include_patterns_last = NULL;
263     static patstr *exclude_patterns = NULL;
264     static patstr *exclude_patterns_last = NULL;
265     static patstr *include_dir_patterns = NULL;
266     static patstr *include_dir_patterns_last = NULL;
267     static patstr *exclude_dir_patterns = NULL;
268     static patstr *exclude_dir_patterns_last = NULL;
269    
270     /* Structure holding the two variables that describe a pattern chain. A pointer
271     to such structures is used for each appropriate option. */
272    
273     typedef struct patdatastr {
274     patstr **anchor;
275     patstr **lastptr;
276     } patdatastr;
277    
278     static patdatastr match_patdata = { &patterns, &patterns_last };
279     static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
280     static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
281     static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
282     static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
283    
284     static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
285     &include_dir_patterns, &exclude_dir_patterns };
286    
287     static const char *incexname[4] = { "--include", "--exclude",
288     "--include-dir", "--exclude-dir" };
289    
290 nigel 53 /* Structure for options and list of them */
291 nigel 49
292 ph10 584 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
293 ph10 1039 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
294 nigel 77
295 nigel 53 typedef struct option_item {
296 nigel 77 int type;
297 nigel 53 int one_char;
298 nigel 77 void *dataptr;
299 nigel 67 const char *long_name;
300     const char *help_text;
301 nigel 53 } option_item;
302 nigel 49
303 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
304     used to identify them. */
305    
306 ph10 325 #define N_COLOUR (-1)
307     #define N_EXCLUDE (-2)
308     #define N_EXCLUDE_DIR (-3)
309     #define N_HELP (-4)
310     #define N_INCLUDE (-5)
311     #define N_INCLUDE_DIR (-6)
312     #define N_LABEL (-7)
313     #define N_LOCALE (-8)
314     #define N_NULL (-9)
315     #define N_LOFFSETS (-10)
316     #define N_FOFFSETS (-11)
317 ph10 519 #define N_LBUFFER (-12)
318 ph10 561 #define N_M_LIMIT (-13)
319     #define N_M_LIMIT_REC (-14)
320 ph10 644 #define N_BUFSIZE (-15)
321 ph10 685 #define N_NOJIT (-16)
322 ph10 944 #define N_FILE_LIST (-17)
323 ph10 947 #define N_BINARY_FILES (-18)
324 ph10 1003 #define N_EXCLUDE_FROM (-19)
325     #define N_INCLUDE_FROM (-20)
326 ph10 1039 #define N_OM_SEPARATOR (-21)
327 nigel 87
328 nigel 53 static option_item optionlist[] = {
329 ph10 947 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
330 ph10 584 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
331     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
332 ph10 947 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
333 ph10 584 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
334 ph10 947 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
335 ph10 644 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
336 ph10 584 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
337     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
338     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
339     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
340     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
341     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
342 ph10 1003 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
343 ph10 584 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
344 ph10 1003 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
345     { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
346 ph10 584 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
347     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
348     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
349 ph10 947 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
350 ph10 584 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
351 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
352     { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
353     #else
354     { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
355     #endif
356 ph10 584 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
357     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
358     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
359     { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
360     { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
361     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
362     { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
363     { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
364     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
365     { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
366     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
367 ph10 1039 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
368     { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
369 ph10 584 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
370     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
371 ph10 1003 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
372     { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
373     { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
374     { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
375     { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
376     { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
377 ph10 571
378     /* These two were accidentally implemented with underscores instead of
379     hyphens in the option names. As this was not discovered for several releases,
380     the incorrect versions are left in the table for compatibility. However, the
381     --help function misses out any option that has an underscore in its name. */
382 ph10 579
383 ph10 1003 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
384     { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
385 ph10 571
386 nigel 87 #ifdef JFRIEDL_DEBUG
387     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
388     #endif
389     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
390     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
391     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
392     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
393     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
394     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
395     { OP_NODATA, 0, NULL, NULL, NULL }
396 nigel 53 };
397    
398 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
399     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
400     that the combination of -w and -x has the same effect as -x on its own, so we
401 ph10 1003 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
402     prefix+suffix is 10 characters; if anything longer is added, it must be
403     adjusted. */
404 nigel 53
405 nigel 87 static const char *prefix[] = {
406     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
407    
408     static const char *suffix[] = {
409     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
410    
411 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
412 nigel 87
413 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
414 nigel 87
415 nigel 93 const char utf8_table4[] = {
416     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
417     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
418     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
419     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
420    
421    
422    
423 nigel 53 /*************************************************
424 ph10 1039 * Exit from the program *
425     *************************************************/
426    
427     /* If there has been a resource error, give a suitable message.
428    
429     Argument: the return code
430     Returns: does not return
431     */
432    
433     static void
434     pcregrep_exit(int rc)
435     {
436     if (resource_error)
437     {
438     fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
439     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
440     PCRE_ERROR_JIT_STACKLIMIT);
441     fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
442     }
443     exit(rc);
444     }
445    
446    
447     /*************************************************
448 ph10 1003 * Add item to chain of patterns *
449     *************************************************/
450    
451     /* Used to add an item onto a chain, or just return an unconnected item if the
452     "after" argument is NULL.
453    
454     Arguments:
455     s pattern string to add
456     after if not NULL points to item to insert after
457    
458 ph10 1492 Returns: new pattern block or NULL on error
459 ph10 1003 */
460    
461     static patstr *
462     add_pattern(char *s, patstr *after)
463     {
464     patstr *p = (patstr *)malloc(sizeof(patstr));
465     if (p == NULL)
466     {
467     fprintf(stderr, "pcregrep: malloc failed\n");
468 ph10 1039 pcregrep_exit(2);
469 ph10 1003 }
470     if (strlen(s) > MAXPATLEN)
471     {
472     fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
473     MAXPATLEN);
474 ph10 1502 free(p);
475 ph10 1003 return NULL;
476     }
477     p->next = NULL;
478     p->string = s;
479     p->compiled = NULL;
480     p->hint = NULL;
481    
482     if (after != NULL)
483     {
484     p->next = after->next;
485     after->next = p;
486     }
487     return p;
488     }
489    
490    
491     /*************************************************
492     * Free chain of patterns *
493     *************************************************/
494    
495     /* Used for several chains of patterns.
496    
497     Argument: pointer to start of chain
498     Returns: nothing
499     */
500    
501     static void
502     free_pattern_chain(patstr *pc)
503     {
504     while (pc != NULL)
505     {
506     patstr *p = pc;
507     pc = p->next;
508     if (p->hint != NULL) pcre_free_study(p->hint);
509     if (p->compiled != NULL) pcre_free(p->compiled);
510     free(p);
511     }
512     }
513    
514    
515     /*************************************************
516     * Free chain of file names *
517     *************************************************/
518    
519     /*
520     Argument: pointer to start of chain
521     Returns: nothing
522     */
523    
524     static void
525     free_file_chain(fnstr *fn)
526     {
527     while (fn != NULL)
528     {
529     fnstr *f = fn;
530     fn = f->next;
531     free(f);
532     }
533     }
534    
535    
536     /*************************************************
537 nigel 87 * OS-specific functions *
538 nigel 53 *************************************************/
539    
540 ph10 1354 /* These functions are defined so that they can be made system specific.
541     At present there are versions for Unix-style environments, Windows, native
542     z/OS, and "no support". */
543 nigel 53
544    
545 ph10 1354 /************* Directory scanning Unix-style and z/OS ***********/
546 nigel 53
547 ph10 1354 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
548 nigel 53 #include <sys/types.h>
549     #include <sys/stat.h>
550     #include <dirent.h>
551    
552 ph10 1354 #if defined NATIVE_ZOS
553     /************* Directory and PDS/E scanning for z/OS ***********/
554     /************* z/OS looks mostly like Unix with USS ************/
555     /* However, z/OS needs the #include statements in this header */
556     #include "pcrzosfs.h"
557     /* That header is not included in the main PCRE distribution because
558     other apparatus is needed to compile pcregrep for z/OS. The header
559     can be found in the special z/OS distribution, which is available
560     from www.zaconsultants.net or from www.cbttape.org. */
561     #endif
562    
563 nigel 53 typedef DIR directory_type;
564 ph10 1003 #define FILESEP '/'
565 nigel 53
566 nigel 67 static int
567 nigel 53 isdirectory(char *filename)
568     {
569     struct stat statbuf;
570     if (stat(filename, &statbuf) < 0)
571     return 0; /* In the expectation that opening as a file will fail */
572 ph10 1003 return (statbuf.st_mode & S_IFMT) == S_IFDIR;
573 nigel 53 }
574    
575 nigel 67 static directory_type *
576 nigel 53 opendirectory(char *filename)
577     {
578     return opendir(filename);
579     }
580    
581 nigel 67 static char *
582 nigel 53 readdirectory(directory_type *dir)
583     {
584     for (;;)
585     {
586     struct dirent *dent = readdir(dir);
587     if (dent == NULL) return NULL;
588     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
589     return dent->d_name;
590     }
591 ph10 151 /* Control never reaches here */
592 nigel 53 }
593    
594 nigel 67 static void
595 nigel 53 closedirectory(directory_type *dir)
596     {
597     closedir(dir);
598     }
599    
600    
601 ph10 1354 /************* Test for regular file, Unix-style **********/
602 nigel 87
603     static int
604     isregfile(char *filename)
605     {
606     struct stat statbuf;
607     if (stat(filename, &statbuf) < 0)
608     return 1; /* In the expectation that opening as a file will fail */
609     return (statbuf.st_mode & S_IFMT) == S_IFREG;
610     }
611    
612    
613 ph10 1354 #if defined NATIVE_ZOS
614     /************* Test for a terminal in z/OS **********/
615     /* isatty() does not work in a TSO environment, so always give FALSE.*/
616 nigel 87
617     static BOOL
618     is_stdout_tty(void)
619     {
620 ph10 1354 return FALSE;
621     }
622    
623     static BOOL
624     is_file_tty(FILE *f)
625     {
626     return FALSE;
627     }
628    
629    
630     /************* Test for a terminal, Unix-style **********/
631    
632     #else
633     static BOOL
634     is_stdout_tty(void)
635     {
636 nigel 87 return isatty(fileno(stdout));
637     }
638    
639 ph10 519 static BOOL
640     is_file_tty(FILE *f)
641     {
642     return isatty(fileno(f));
643     }
644 ph10 1354 #endif
645 nigel 87
646 ph10 1354 /* End of Unix-style or native z/OS environment functions. */
647 ph10 519
648 nigel 53
649 ph10 1354 /************* Directory scanning in Windows ***********/
650    
651 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
652 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
653 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
654     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
655 ph10 558 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
656     undefined when it is indeed undefined. */
657 nigel 53
658 ph10 558 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
659 nigel 63
660     #ifndef STRICT
661     # define STRICT
662     #endif
663     #ifndef WIN32_LEAN_AND_MEAN
664     # define WIN32_LEAN_AND_MEAN
665     #endif
666 ph10 283
667     #include <windows.h>
668    
669 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
670     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
671     #endif
672    
673 nigel 63 typedef struct directory_type
674     {
675     HANDLE handle;
676     BOOL first;
677     WIN32_FIND_DATA data;
678     } directory_type;
679    
680 ph10 1004 #define FILESEP '/'
681 ph10 1003
682 nigel 63 int
683     isdirectory(char *filename)
684     {
685     DWORD attr = GetFileAttributes(filename);
686     if (attr == INVALID_FILE_ATTRIBUTES)
687     return 0;
688 ph10 1003 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
689 nigel 63 }
690    
691     directory_type *
692     opendirectory(char *filename)
693     {
694     size_t len;
695     char *pattern;
696     directory_type *dir;
697     DWORD err;
698     len = strlen(filename);
699 ph10 1003 pattern = (char *)malloc(len + 3);
700     dir = (directory_type *)malloc(sizeof(*dir));
701 nigel 63 if ((pattern == NULL) || (dir == NULL))
702     {
703     fprintf(stderr, "pcregrep: malloc failed\n");
704 ph10 561 pcregrep_exit(2);
705 nigel 63 }
706     memcpy(pattern, filename, len);
707     memcpy(&(pattern[len]), "\\*", 3);
708     dir->handle = FindFirstFile(pattern, &(dir->data));
709     if (dir->handle != INVALID_HANDLE_VALUE)
710     {
711     free(pattern);
712     dir->first = TRUE;
713     return dir;
714     }
715     err = GetLastError();
716     free(pattern);
717     free(dir);
718     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
719     return NULL;
720     }
721    
722     char *
723     readdirectory(directory_type *dir)
724     {
725     for (;;)
726     {
727     if (!dir->first)
728     {
729     if (!FindNextFile(dir->handle, &(dir->data)))
730     return NULL;
731     }
732     else
733     {
734     dir->first = FALSE;
735     }
736     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
737     return dir->data.cFileName;
738     }
739     #ifndef _MSC_VER
740     return NULL; /* Keep compiler happy; never executed */
741     #endif
742     }
743    
744     void
745     closedirectory(directory_type *dir)
746     {
747     FindClose(dir->handle);
748     free(dir);
749     }
750    
751    
752 ph10 1354 /************* Test for regular file in Windows **********/
753 nigel 87
754     /* I don't know how to do this, or if it can be done; assume all paths are
755     regular if they are not directories. */
756    
757     int isregfile(char *filename)
758     {
759 ph10 283 return !isdirectory(filename);
760 nigel 87 }
761    
762    
763 ph10 1354 /************* Test for a terminal in Windows **********/
764 nigel 87
765     /* I don't know how to do this; assume never */
766    
767     static BOOL
768     is_stdout_tty(void)
769     {
770 ph10 283 return FALSE;
771 nigel 87 }
772    
773 ph10 519 static BOOL
774     is_file_tty(FILE *f)
775     {
776     return FALSE;
777     }
778 nigel 87
779 ph10 1354 /* End of Windows functions */
780 ph10 519
781 ph10 1354
782 nigel 53 /************* Directory scanning when we can't do it ***********/
783    
784     /* The type is void, and apart from isdirectory(), the functions do nothing. */
785    
786 nigel 63 #else
787    
788 ph10 1005 #define FILESEP 0
789 nigel 53 typedef void directory_type;
790    
791 nigel 87 int isdirectory(char *filename) { return 0; }
792 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
793     char *readdirectory(directory_type *dir) { return (char*)0;}
794 nigel 53 void closedirectory(directory_type *dir) {}
795    
796 nigel 87
797 ph10 1354 /************* Test for regular file when we can't do it **********/
798 nigel 87
799     /* Assume all files are regular. */
800    
801     int isregfile(char *filename) { return 1; }
802    
803    
804 ph10 519 /************* Test for a terminal when we can't do it **********/
805 nigel 87
806     static BOOL
807     is_stdout_tty(void)
808     {
809     return FALSE;
810     }
811    
812 ph10 519 static BOOL
813     is_file_tty(FILE *f)
814     {
815     return FALSE;
816     }
817 nigel 87
818 ph10 1354 #endif /* End of system-specific functions */
819 nigel 53
820    
821    
822 ph10 137 #ifndef HAVE_STRERROR
823 nigel 49 /*************************************************
824     * Provide strerror() for non-ANSI libraries *
825     *************************************************/
826    
827     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
828     in their libraries, but can provide the same facility by this simple
829     alternative function. */
830    
831     extern int sys_nerr;
832     extern char *sys_errlist[];
833    
834     char *
835     strerror(int n)
836     {
837     if (n < 0 || n >= sys_nerr) return "unknown error number";
838     return sys_errlist[n];
839     }
840     #endif /* HAVE_STRERROR */
841    
842    
843    
844     /*************************************************
845 ph10 1039 * Usage function *
846     *************************************************/
847    
848     static int
849     usage(int rc)
850     {
851     option_item *op;
852     fprintf(stderr, "Usage: pcregrep [-");
853     for (op = optionlist; op->one_char != 0; op++)
854     {
855     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
856     }
857     fprintf(stderr, "] [long options] [pattern] [files]\n");
858     fprintf(stderr, "Type `pcregrep --help' for more information and the long "
859     "options.\n");
860     return rc;
861     }
862    
863    
864    
865     /*************************************************
866     * Help function *
867     *************************************************/
868    
869     static void
870     help(void)
871     {
872     option_item *op;
873    
874     printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
875     printf("Search for PATTERN in each FILE or standard input.\n");
876     printf("PATTERN must be present if neither -e nor -f is used.\n");
877     printf("\"-\" can be used as a file name to mean STDIN.\n");
878    
879     #ifdef SUPPORT_LIBZ
880     printf("Files whose names end in .gz are read using zlib.\n");
881     #endif
882    
883     #ifdef SUPPORT_LIBBZ2
884     printf("Files whose names end in .bz2 are read using bzlib2.\n");
885     #endif
886    
887     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
888     printf("Other files and the standard input are read as plain files.\n\n");
889     #else
890     printf("All files are read as plain files, without any interpretation.\n\n");
891     #endif
892    
893     printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
894     printf("Options:\n");
895    
896     for (op = optionlist; op->one_char != 0; op++)
897     {
898     int n;
899     char s[4];
900    
901     /* Two options were accidentally implemented and documented with underscores
902     instead of hyphens in their names, something that was not noticed for quite a
903     few releases. When fixing this, I left the underscored versions in the list
904     in case people were using them. However, we don't want to display them in the
905     help data. There are no other options that contain underscores, and we do not
906     expect ever to implement such options. Therefore, just omit any option that
907     contains an underscore. */
908    
909     if (strchr(op->long_name, '_') != NULL) continue;
910    
911     if (op->one_char > 0 && (op->long_name)[0] == 0)
912     n = 31 - printf(" -%c", op->one_char);
913     else
914     {
915     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
916     else strcpy(s, " ");
917     n = 31 - printf(" %s --%s", s, op->long_name);
918     }
919    
920     if (n < 1) n = 1;
921     printf("%.*s%s\n", n, " ", op->help_text);
922     }
923    
924     printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
925     printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
926     printf("When reading patterns or file names from a file, trailing white\n");
927     printf("space is removed and blank lines are ignored.\n");
928     printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
929    
930     printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
931     printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
932     }
933    
934    
935    
936     /*************************************************
937 ph10 1003 * Test exclude/includes *
938     *************************************************/
939    
940     /* If any exclude pattern matches, the path is excluded. Otherwise, unless
941     there are no includes, the path must match an include pattern.
942    
943     Arguments:
944     path the path to be matched
945     ip the chain of include patterns
946     ep the chain of exclude patterns
947    
948     Returns: TRUE if the path is not excluded
949     */
950    
951     static BOOL
952     test_incexc(char *path, patstr *ip, patstr *ep)
953     {
954     int plen = strlen(path);
955    
956     for (; ep != NULL; ep = ep->next)
957     {
958     if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
959     return FALSE;
960     }
961    
962     if (ip == NULL) return TRUE;
963    
964     for (; ip != NULL; ip = ip->next)
965     {
966     if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
967     return TRUE;
968     }
969    
970     return FALSE;
971     }
972    
973    
974    
975     /*************************************************
976 ph10 1039 * Decode integer argument value *
977     *************************************************/
978    
979     /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
980     because SunOS4 doesn't have it. This is used only for unpicking arguments, so
981     just keep it simple.
982    
983     Arguments:
984     option_data the option data string
985     op the option item (for error messages)
986     longop TRUE if option given in long form
987    
988     Returns: a long integer
989     */
990    
991     static long int
992     decode_number(char *option_data, option_item *op, BOOL longop)
993     {
994     unsigned long int n = 0;
995     char *endptr = option_data;
996     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
997     while (isdigit((unsigned char)(*endptr)))
998     n = n * 10 + (int)(*endptr++ - '0');
999     if (toupper(*endptr) == 'K')
1000     {
1001     n *= 1024;
1002     endptr++;
1003     }
1004     else if (toupper(*endptr) == 'M')
1005     {
1006     n *= 1024*1024;
1007     endptr++;
1008     }
1009    
1010     if (*endptr != 0) /* Error */
1011     {
1012     if (longop)
1013     {
1014     char *equals = strchr(op->long_name, '=');
1015     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1016     (int)(equals - op->long_name);
1017     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1018     option_data, nlen, op->long_name);
1019     }
1020     else
1021     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1022     option_data, op->one_char);
1023     pcregrep_exit(usage(2));
1024     }
1025    
1026     return n;
1027     }
1028    
1029    
1030    
1031     /*************************************************
1032     * Add item to a chain of numbers *
1033     *************************************************/
1034    
1035     /* Used to add an item onto a chain, or just return an unconnected item if the
1036     "after" argument is NULL.
1037    
1038     Arguments:
1039     n the number to add
1040     after if not NULL points to item to insert after
1041    
1042     Returns: new number block
1043     */
1044    
1045     static omstr *
1046     add_number(int n, omstr *after)
1047     {
1048     omstr *om = (omstr *)malloc(sizeof(omstr));
1049    
1050     if (om == NULL)
1051     {
1052     fprintf(stderr, "pcregrep: malloc failed\n");
1053     pcregrep_exit(2);
1054     }
1055     om->next = NULL;
1056     om->groupnum = n;
1057    
1058     if (after != NULL)
1059     {
1060     om->next = after->next;
1061     after->next = om;
1062     }
1063     return om;
1064     }
1065    
1066    
1067    
1068     /*************************************************
1069 ph10 519 * Read one line of input *
1070     *************************************************/
1071    
1072 ph10 535 /* Normally, input is read using fread() into a large buffer, so many lines may
1073     be read at once. However, doing this for tty input means that no output appears
1074 ph10 519 until a lot of input has been typed. Instead, tty input is handled line by
1075     line. We cannot use fgets() for this, because it does not stop at a binary
1076 ph10 535 zero, and therefore there is no way of telling how many characters it has read,
1077 ph10 519 because there may be binary zeros embedded in the data.
1078    
1079     Arguments:
1080     buffer the buffer to read into
1081     length the maximum number of characters to read
1082     f the file
1083 ph10 535
1084 ph10 519 Returns: the number of characters read, zero at end of file
1085 ph10 535 */
1086 ph10 519
1087 ph10 904 static unsigned int
1088 ph10 519 read_one_line(char *buffer, int length, FILE *f)
1089     {
1090     int c;
1091     int yield = 0;
1092     while ((c = fgetc(f)) != EOF)
1093     {
1094     buffer[yield++] = c;
1095 ph10 535 if (c == '\n' || yield >= length) break;
1096     }
1097     return yield;
1098 ph10 519 }
1099    
1100    
1101    
1102     /*************************************************
1103 nigel 93 * Find end of line *
1104     *************************************************/
1105    
1106     /* The length of the endline sequence that is found is set via lenptr. This may
1107     be zero at the very end of the file if there is no line-ending sequence there.
1108    
1109     Arguments:
1110     p current position in line
1111     endptr end of available data
1112     lenptr where to put the length of the eol sequence
1113    
1114 ph10 654 Returns: pointer after the last byte of the line,
1115 ph10 644 including the newline byte(s)
1116 nigel 93 */
1117    
1118     static char *
1119     end_of_line(char *p, char *endptr, int *lenptr)
1120     {
1121     switch(endlinetype)
1122     {
1123     default: /* Just in case */
1124     case EL_LF:
1125     while (p < endptr && *p != '\n') p++;
1126     if (p < endptr)
1127     {
1128     *lenptr = 1;
1129     return p + 1;
1130     }
1131     *lenptr = 0;
1132     return endptr;
1133    
1134     case EL_CR:
1135     while (p < endptr && *p != '\r') p++;
1136     if (p < endptr)
1137     {
1138     *lenptr = 1;
1139     return p + 1;
1140     }
1141     *lenptr = 0;
1142     return endptr;
1143    
1144     case EL_CRLF:
1145     for (;;)
1146     {
1147     while (p < endptr && *p != '\r') p++;
1148     if (++p >= endptr)
1149     {
1150     *lenptr = 0;
1151     return endptr;
1152     }
1153     if (*p == '\n')
1154     {
1155     *lenptr = 2;
1156     return p + 1;
1157     }
1158     }
1159     break;
1160    
1161 ph10 149 case EL_ANYCRLF:
1162     while (p < endptr)
1163     {
1164     int extra = 0;
1165     register int c = *((unsigned char *)p);
1166    
1167     if (utf8 && c >= 0xc0)
1168     {
1169     int gcii, gcss;
1170     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1171     gcss = 6*extra;
1172     c = (c & utf8_table3[extra]) << gcss;
1173     for (gcii = 1; gcii <= extra; gcii++)
1174     {
1175     gcss -= 6;
1176     c |= (p[gcii] & 0x3f) << gcss;
1177     }
1178     }
1179    
1180     p += 1 + extra;
1181    
1182     switch (c)
1183     {
1184 ph10 1033 case '\n':
1185 ph10 149 *lenptr = 1;
1186     return p;
1187    
1188 ph10 1033 case '\r':
1189     if (p < endptr && *p == '\n')
1190 ph10 149 {
1191     *lenptr = 2;
1192     p++;
1193     }
1194     else *lenptr = 1;
1195     return p;
1196 ph10 150
1197 ph10 149 default:
1198     break;
1199     }
1200     } /* End of loop for ANYCRLF case */
1201 ph10 150
1202 ph10 149 *lenptr = 0; /* Must have hit the end */
1203     return endptr;
1204    
1205 nigel 93 case EL_ANY:
1206     while (p < endptr)
1207     {
1208     int extra = 0;
1209     register int c = *((unsigned char *)p);
1210    
1211     if (utf8 && c >= 0xc0)
1212     {
1213     int gcii, gcss;
1214     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1215     gcss = 6*extra;
1216     c = (c & utf8_table3[extra]) << gcss;
1217     for (gcii = 1; gcii <= extra; gcii++)
1218     {
1219     gcss -= 6;
1220     c |= (p[gcii] & 0x3f) << gcss;
1221     }
1222     }
1223    
1224     p += 1 + extra;
1225    
1226     switch (c)
1227     {
1228 ph10 1033 case '\n': /* LF */
1229     case '\v': /* VT */
1230     case '\f': /* FF */
1231 nigel 93 *lenptr = 1;
1232     return p;
1233    
1234 ph10 1033 case '\r': /* CR */
1235     if (p < endptr && *p == '\n')
1236 nigel 93 {
1237     *lenptr = 2;
1238     p++;
1239     }
1240     else *lenptr = 1;
1241     return p;
1242    
1243 ph10 1033 #ifndef EBCDIC
1244     case 0x85: /* Unicode NEL */
1245 nigel 93 *lenptr = utf8? 2 : 1;
1246     return p;
1247    
1248 ph10 1033 case 0x2028: /* Unicode LS */
1249     case 0x2029: /* Unicode PS */
1250 nigel 93 *lenptr = 3;
1251     return p;
1252 ph10 1039 #endif /* Not EBCDIC */
1253 nigel 93
1254     default:
1255     break;
1256     }
1257     } /* End of loop for ANY case */
1258    
1259     *lenptr = 0; /* Must have hit the end */
1260     return endptr;
1261     } /* End of overall switch */
1262     }
1263    
1264    
1265    
1266     /*************************************************
1267     * Find start of previous line *
1268     *************************************************/
1269    
1270     /* This is called when looking back for before lines to print.
1271    
1272     Arguments:
1273     p start of the subsequent line
1274     startptr start of available data
1275    
1276     Returns: pointer to the start of the previous line
1277     */
1278    
1279     static char *
1280     previous_line(char *p, char *startptr)
1281     {
1282     switch(endlinetype)
1283     {
1284     default: /* Just in case */
1285     case EL_LF:
1286     p--;
1287     while (p > startptr && p[-1] != '\n') p--;
1288     return p;
1289    
1290     case EL_CR:
1291     p--;
1292     while (p > startptr && p[-1] != '\n') p--;
1293     return p;
1294    
1295     case EL_CRLF:
1296     for (;;)
1297     {
1298     p -= 2;
1299     while (p > startptr && p[-1] != '\n') p--;
1300     if (p <= startptr + 1 || p[-2] == '\r') return p;
1301     }
1302 ph10 1467 /* Control can never get here */
1303 nigel 93
1304     case EL_ANY:
1305 ph10 150 case EL_ANYCRLF:
1306 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1307     if (utf8) while ((*p & 0xc0) == 0x80) p--;
1308    
1309     while (p > startptr)
1310     {
1311 chpe 1096 register unsigned int c;
1312 nigel 93 char *pp = p - 1;
1313    
1314     if (utf8)
1315     {
1316     int extra = 0;
1317     while ((*pp & 0xc0) == 0x80) pp--;
1318     c = *((unsigned char *)pp);
1319     if (c >= 0xc0)
1320     {
1321     int gcii, gcss;
1322     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1323     gcss = 6*extra;
1324     c = (c & utf8_table3[extra]) << gcss;
1325     for (gcii = 1; gcii <= extra; gcii++)
1326     {
1327     gcss -= 6;
1328     c |= (pp[gcii] & 0x3f) << gcss;
1329     }
1330     }
1331     }
1332     else c = *((unsigned char *)pp);
1333    
1334 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
1335 nigel 93 {
1336 ph10 1033 case '\n': /* LF */
1337     case '\r': /* CR */
1338 ph10 149 return p;
1339 ph10 150
1340 ph10 149 default:
1341     break;
1342 ph10 150 }
1343 ph10 149
1344     else switch (c)
1345     {
1346 ph10 1033 case '\n': /* LF */
1347     case '\v': /* VT */
1348     case '\f': /* FF */
1349     case '\r': /* CR */
1350 ph10 1039 #ifndef EBCDIE
1351 ph10 1033 case 0x85: /* Unicode NEL */
1352     case 0x2028: /* Unicode LS */
1353     case 0x2029: /* Unicode PS */
1354 ph10 1039 #endif /* Not EBCDIC */
1355 nigel 93 return p;
1356    
1357     default:
1358     break;
1359     }
1360    
1361     p = pp; /* Back one character */
1362     } /* End of loop for ANY case */
1363    
1364     return startptr; /* Hit start of data */
1365     } /* End of overall switch */
1366     }
1367    
1368    
1369    
1370    
1371    
1372     /*************************************************
1373 nigel 77 * Print the previous "after" lines *
1374 nigel 49 *************************************************/
1375    
1376 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
1377 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
1378     that a binary zero does not terminate it.
1379 nigel 77
1380     Arguments:
1381     lastmatchnumber the number of the last matching line, plus one
1382     lastmatchrestart where we restarted after the last match
1383     endptr end of available data
1384     printname filename for printing
1385    
1386     Returns: nothing
1387     */
1388    
1389 ph10 1003 static void
1390     do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1391     char *printname)
1392 nigel 77 {
1393     if (after_context > 0 && lastmatchnumber > 0)
1394     {
1395     int count = 0;
1396     while (lastmatchrestart < endptr && count++ < after_context)
1397     {
1398 nigel 93 int ellength;
1399 nigel 77 char *pp = lastmatchrestart;
1400     if (printname != NULL) fprintf(stdout, "%s-", printname);
1401     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1402 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1403 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1404 nigel 93 lastmatchrestart = pp;
1405 nigel 77 }
1406     hyphenpending = TRUE;
1407     }
1408     }
1409    
1410    
1411    
1412     /*************************************************
1413 ph10 378 * Apply patterns to subject till one matches *
1414     *************************************************/
1415    
1416 ph10 392 /* This function is called to run through all patterns, looking for a match. It
1417     is used multiple times for the same subject when colouring is enabled, in order
1418 ph10 378 to find all possible matches.
1419    
1420     Arguments:
1421 ph10 632 matchptr the start of the subject
1422     length the length of the subject to match
1423 ph10 1335 options options for pcre_exec
1424 ph10 632 startoffset where to start matching
1425     offsets the offets vector to fill in
1426     mrc address of where to put the result of pcre_exec()
1427 ph10 392
1428     Returns: TRUE if there was a match
1429 ph10 378 FALSE if there was no match
1430     invert if there was a non-fatal error
1431 ph10 392 */
1432 ph10 378
1433     static BOOL
1434 ph10 1335 match_patterns(char *matchptr, size_t length, unsigned int options,
1435 ph10 1324 int startoffset, int *offsets, int *mrc)
1436 ph10 378 {
1437     int i;
1438 ph10 561 size_t slen = length;
1439 ph10 1003 patstr *p = patterns;
1440 ph10 561 const char *msg = "this text:\n\n";
1441 ph10 1003
1442 ph10 561 if (slen > 200)
1443     {
1444     slen = 200;
1445     msg = "text that starts:\n\n";
1446 ph10 579 }
1447 ph10 1003 for (i = 1; p != NULL; p = p->next, i++)
1448 ph10 378 {
1449 ph10 1003 *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1450 ph10 1324 startoffset, options, offsets, OFFSET_SIZE);
1451 ph10 378 if (*mrc >= 0) return TRUE;
1452     if (*mrc == PCRE_ERROR_NOMATCH) continue;
1453 ph10 561 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1454 ph10 1003 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1455 ph10 561 fprintf(stderr, "%s", msg);
1456     FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1457     fprintf(stderr, "\n\n");
1458 ph10 685 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1459     *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1460 ph10 561 resource_error = TRUE;
1461 ph10 378 if (error_count++ > 20)
1462     {
1463 ph10 561 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1464     pcregrep_exit(2);
1465 ph10 378 }
1466     return invert; /* No more matching; don't show the line again */
1467     }
1468    
1469     return FALSE; /* No match, no errors */
1470     }
1471    
1472    
1473    
1474     /*************************************************
1475 nigel 77 * Grep an individual file *
1476     *************************************************/
1477    
1478     /* This is called from grep_or_recurse() below. It uses a buffer that is three
1479 ph10 644 times the value of bufthird. The matching point is never allowed to stray into
1480 nigel 77 the top third of the buffer, thus keeping more of the file available for
1481     context printing or for multiline scanning. For large files, the pointer will
1482     be in the middle third most of the time, so the bottom third is available for
1483     "before" context printing.
1484    
1485     Arguments:
1486 ph10 286 handle the fopened FILE stream for a normal file
1487     the gzFile pointer when reading is via libz
1488     the BZFILE pointer when reading is via libbz2
1489     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1490 ph10 644 filename the file name or NULL (for errors)
1491 nigel 77 printname the file name if it is to be printed for each match
1492     or NULL if the file name is not to be printed
1493     it cannot be NULL if filenames[_nomatch]_only is set
1494    
1495     Returns: 0 if there was at least one match
1496     1 otherwise (no matches)
1497 ph10 654 2 if an overlong line is encountered
1498 ph10 644 3 if there is a read error on a .bz2 file
1499 nigel 77 */
1500    
1501 nigel 49 static int
1502 ph10 644 pcregrep(void *handle, int frtype, char *filename, char *printname)
1503 nigel 49 {
1504     int rc = 1;
1505 nigel 77 int linenumber = 1;
1506     int lastmatchnumber = 0;
1507 nigel 49 int count = 0;
1508 ph10 280 int filepos = 0;
1509 ph10 378 int offsets[OFFSET_SIZE];
1510 nigel 77 char *lastmatchrestart = NULL;
1511 ph10 644 char *ptr = main_buffer;
1512 nigel 77 char *endptr;
1513     size_t bufflength;
1514 ph10 947 BOOL binary = FALSE;
1515 nigel 77 BOOL endhyphenpending = FALSE;
1516 ph10 519 BOOL input_line_buffered = line_buffered;
1517 ph10 286 FILE *in = NULL; /* Ensure initialized */
1518 nigel 49
1519 ph10 286 #ifdef SUPPORT_LIBZ
1520     gzFile ingz = NULL;
1521     #endif
1522 nigel 77
1523 ph10 286 #ifdef SUPPORT_LIBBZ2
1524     BZFILE *inbz2 = NULL;
1525     #endif
1526    
1527    
1528     /* Do the first read into the start of the buffer and set up the pointer to end
1529     of what we have. In the case of libz, a non-zipped .gz file will be read as a
1530     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1531     fail. */
1532    
1533 chpe 1136 (void)frtype;
1534    
1535 ph10 286 #ifdef SUPPORT_LIBZ
1536     if (frtype == FR_LIBZ)
1537     {
1538     ingz = (gzFile)handle;
1539 ph10 644 bufflength = gzread (ingz, main_buffer, bufsize);
1540 ph10 286 }
1541     else
1542     #endif
1543    
1544     #ifdef SUPPORT_LIBBZ2
1545     if (frtype == FR_LIBBZ2)
1546     {
1547     inbz2 = (BZFILE *)handle;
1548 ph10 644 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1549 ph10 286 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1550     } /* without the cast it is unsigned. */
1551     else
1552     #endif
1553    
1554     {
1555     in = (FILE *)handle;
1556 ph10 519 if (is_file_tty(in)) input_line_buffered = TRUE;
1557 ph10 535 bufflength = input_line_buffered?
1558 ph10 644 read_one_line(main_buffer, bufsize, in) :
1559     fread(main_buffer, 1, bufsize, in);
1560 ph10 286 }
1561 ph10 535
1562 ph10 644 endptr = main_buffer + bufflength;
1563 nigel 77
1564 ph10 947 /* Unless binary-files=text, see if we have a binary file. This uses the same
1565 ph10 975 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1566 ph10 947 file. */
1567    
1568     if (binary_files != BIN_TEXT)
1569     {
1570 ph10 975 binary =
1571 ph10 947 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1572     if (binary && binary_files == BIN_NOMATCH) return 1;
1573 ph10 975 }
1574 ph10 947
1575 nigel 77 /* Loop while the current pointer is not at the end of the file. For large
1576     files, endptr will be at the end of the buffer when we are in the middle of the
1577     file, but ptr will never get there, because as soon as it gets over 2/3 of the
1578     way, the buffer is shifted left and re-filled. */
1579    
1580     while (ptr < endptr)
1581 nigel 49 {
1582 ph10 378 int endlinelength;
1583 nigel 87 int mrc = 0;
1584 ph10 654 int startoffset = 0;
1585 ph10 1548 int prevoffsets[2];
1586 ph10 1335 unsigned int options = 0;
1587 ph10 378 BOOL match;
1588 ph10 286 char *matchptr = ptr;
1589 nigel 77 char *t = ptr;
1590     size_t length, linelength;
1591 nigel 49
1592 ph10 1548 prevoffsets[0] = prevoffsets[1] = -1;
1593    
1594 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
1595     of the subject string to pass to pcre_exec(). In multiline mode, it is the
1596     length remainder of the data in the buffer. Otherwise, it is the length of
1597 ph10 378 the next line, excluding the terminating newline. After matching, we always
1598     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1599     option is used for compiling, so that any match is constrained to be in the
1600     first line. */
1601 nigel 77
1602 nigel 93 t = end_of_line(t, endptr, &endlinelength);
1603     linelength = t - ptr - endlinelength;
1604 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
1605 ph10 654
1606     /* Check to see if the line we are looking at extends right to the very end
1607     of the buffer without a line terminator. This means the line is too long to
1608 ph10 644 handle. */
1609 ph10 654
1610 ph10 644 if (endlinelength == 0 && t == main_buffer + bufsize)
1611     {
1612     fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1613 ph10 646 "pcregrep: check the --buffer-size option\n",
1614 ph10 654 linenumber,
1615 ph10 644 (filename == NULL)? "" : " of file ",
1616     (filename == NULL)? "" : filename);
1617     return 2;
1618 ph10 654 }
1619 nigel 77
1620 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
1621    
1622     #ifdef JFRIEDL_DEBUG
1623     if (jfriedl_XT || jfriedl_XR)
1624     {
1625 zherczeg 1216 # include <sys/time.h>
1626     # include <time.h>
1627 nigel 89 struct timeval start_time, end_time;
1628     struct timezone dummy;
1629 ph10 392 int i;
1630 nigel 89
1631     if (jfriedl_XT)
1632     {
1633     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1634     const char *orig = ptr;
1635     ptr = malloc(newlen + 1);
1636     if (!ptr) {
1637     printf("out of memory");
1638 ph10 561 pcregrep_exit(2);
1639 nigel 89 }
1640     endptr = ptr;
1641     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1642     for (i = 0; i < jfriedl_XT; i++) {
1643     strncpy(endptr, orig, length);
1644     endptr += length;
1645     }
1646     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1647     length = newlen;
1648     }
1649    
1650     if (gettimeofday(&start_time, &dummy) != 0)
1651     perror("bad gettimeofday");
1652    
1653    
1654     for (i = 0; i < jfriedl_XR; i++)
1655 ph10 1003 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1656 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1657 nigel 89
1658     if (gettimeofday(&end_time, &dummy) != 0)
1659     perror("bad gettimeofday");
1660    
1661     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1662     -
1663     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1664    
1665     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1666     return 0;
1667     }
1668     #endif
1669    
1670 ph10 1039 /* We come back here after a match when show_only_matching is set, in order
1671     to find any further matches in the same line. This applies to
1672     --only-matching, --file-offsets, and --line-offsets. */
1673 nigel 89
1674 ph10 286 ONLY_MATCHING_RESTART:
1675    
1676 ph10 392 /* Run through all the patterns until one matches or there is an error other
1677 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1678 ph10 1335 finding subsequent matches when colouring matched lines. After finding one
1679     match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1680 ph10 1324 this line. */
1681 ph10 392
1682 ph10 1324 match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1683     options = PCRE_NOTEMPTY;
1684 nigel 77
1685 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1686 nigel 77
1687 nigel 49 if (match != invert)
1688     {
1689 nigel 77 BOOL hyphenprinted = FALSE;
1690    
1691 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1692 nigel 77
1693 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1694    
1695     /* Just count if just counting is wanted. */
1696    
1697 nigel 49 if (count_only) count++;
1698 ph10 975
1699     /* When handling a binary file and binary-files==binary, the "binary"
1700     variable will be set true (it's false in all other cases). In this
1701 ph10 947 situation we just want to output the file name. No need to scan further. */
1702 ph10 975
1703 ph10 947 else if (binary)
1704     {
1705     fprintf(stdout, "Binary file %s matches\n", filename);
1706 ph10 975 return 0;
1707     }
1708 nigel 49
1709 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1710     in the file. */
1711    
1712 ph10 420 else if (filenames == FN_MATCH_ONLY)
1713 nigel 49 {
1714 nigel 77 fprintf(stdout, "%s\n", printname);
1715 nigel 49 return 0;
1716     }
1717    
1718 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1719    
1720 nigel 77 else if (quiet) return 0;
1721 nigel 49
1722 ph10 1039 /* The --only-matching option prints just the substring that matched,
1723     and/or one or more captured portions of it, as long as these strings are
1724     not empty. The --file-offsets and --line-offsets options output offsets for
1725     the matching substring (all three set show_only_matching). None of these
1726     mutually exclusive options prints any context. Afterwards, adjust the start
1727     and then jump back to look for further matches in the same line. If we are
1728     in invert mode, however, nothing is printed and we do not restart - this
1729     could still be useful because the return code is set. */
1730 nigel 87
1731 ph10 1039 else if (show_only_matching)
1732 nigel 87 {
1733 ph10 279 if (!invert)
1734 ph10 286 {
1735 ph10 1543 int oldstartoffset = startoffset;
1736 ph10 1548
1737     /* It is possible, when a lookbehind assertion contains \K, for the
1738     same string to be found again. The code below advances startoffset, but
1739 ph10 1543 until it is past the "bumpalong" offset that gave the match, the same
1740     substring will be returned. The PCRE1 library does not return the
1741     bumpalong offset, so all we can do is ignore repeated strings. (PCRE2
1742     does this better.) */
1743 ph10 1548
1744 ph10 1543 if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1])
1745 ph10 377 {
1746 ph10 1543 prevoffsets[0] = offsets[0];
1747 ph10 1548 prevoffsets[1] = offsets[1];
1748    
1749 ph10 1543 if (printname != NULL) fprintf(stdout, "%s:", printname);
1750     if (number) fprintf(stdout, "%d:", linenumber);
1751 ph10 1548
1752 ph10 1543 /* Handle --line-offsets */
1753 ph10 1548
1754 ph10 1543 if (line_offsets)
1755     fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1756     offsets[1] - offsets[0]);
1757 ph10 1548
1758 ph10 1543 /* Handle --file-offsets */
1759 ph10 1548
1760 ph10 1543 else if (file_offsets)
1761     fprintf(stdout, "%d,%d\n",
1762     (int)(filepos + matchptr + offsets[0] - ptr),
1763     offsets[1] - offsets[0]);
1764 ph10 1548
1765 ph10 1543 /* Handle --only-matching, which may occur many times */
1766 ph10 1548
1767 ph10 1543 else
1768 ph10 579 {
1769 ph10 1543 BOOL printed = FALSE;
1770     omstr *om;
1771 ph10 1548
1772 ph10 1543 for (om = only_matching; om != NULL; om = om->next)
1773 ph10 1039 {
1774 ph10 1543 int n = om->groupnum;
1775     if (n < mrc)
1776 ph10 1039 {
1777 ph10 1543 int plen = offsets[2*n + 1] - offsets[2*n];
1778     if (plen > 0)
1779     {
1780     if (printed) fprintf(stdout, "%s", om_separator);
1781     if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1782     FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1783     if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1784     printed = TRUE;
1785     }
1786 ph10 1039 }
1787     }
1788 ph10 1548
1789 ph10 1543 if (printed || printname != NULL || number) fprintf(stdout, "\n");
1790 ph10 579 }
1791 ph10 1548 }
1792 ph10 1221
1793 ph10 1548 /* Prepare to repeat to find the next match. If the patterned contained
1794     a lookbehind tht included \K, it is possible that the end of the match
1795     might be at or before the actual strting offset we have just used. We
1796     need to start one character further on. Unfortunately, for unanchored
1797     patterns, the actual start offset can be greater that the one that was
1798     set as a result of "bumpalong". PCRE1 does not return the actual start
1799     offset, so we have to check against the original start offset. This may
1800     lead to duplicates - we we need the fudge above to avoid printing them.
1801 ph10 1543 (PCRE2 does this better.) */
1802 ph10 1039
1803 ph10 286 match = FALSE;
1804 ph10 564 if (line_buffered) fflush(stdout);
1805 ph10 636 rc = 0; /* Had some success */
1806     startoffset = offsets[1]; /* Restart after the match */
1807 ph10 1543 if (startoffset <= oldstartoffset)
1808     {
1809 ph10 1548 if ((size_t)startoffset >= length)
1810 ph10 1543 goto END_ONE_MATCH; /* We were at the end */
1811     startoffset = oldstartoffset + 1;
1812     if (utf8)
1813 ph10 1548 while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
1814     }
1815 ph10 286 goto ONLY_MATCHING_RESTART;
1816     }
1817 nigel 87 }
1818    
1819     /* This is the default case when none of the above options is set. We print
1820     the matching lines(s), possibly preceded and/or followed by other lines of
1821     context. */
1822    
1823 nigel 49 else
1824     {
1825 nigel 77 /* See if there is a requirement to print some "after" lines from a
1826     previous match. We never print any overlaps. */
1827    
1828     if (after_context > 0 && lastmatchnumber > 0)
1829     {
1830 nigel 93 int ellength;
1831 nigel 77 int linecount = 0;
1832     char *p = lastmatchrestart;
1833    
1834     while (p < ptr && linecount < after_context)
1835     {
1836 nigel 93 p = end_of_line(p, ptr, &ellength);
1837 nigel 77 linecount++;
1838     }
1839    
1840     /* It is important to advance lastmatchrestart during this printing so
1841 nigel 87 that it interacts correctly with any "before" printing below. Print
1842     each line's data using fwrite() in case there are binary zeroes. */
1843 nigel 77
1844     while (lastmatchrestart < p)
1845     {
1846     char *pp = lastmatchrestart;
1847     if (printname != NULL) fprintf(stdout, "%s-", printname);
1848     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1849 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1850 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1851 nigel 93 lastmatchrestart = pp;
1852 nigel 77 }
1853     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1854     }
1855    
1856     /* If there were non-contiguous lines printed above, insert hyphens. */
1857    
1858     if (hyphenpending)
1859     {
1860     fprintf(stdout, "--\n");
1861     hyphenpending = FALSE;
1862     hyphenprinted = TRUE;
1863     }
1864    
1865     /* See if there is a requirement to print some "before" lines for this
1866     match. Again, don't print overlaps. */
1867    
1868     if (before_context > 0)
1869     {
1870     int linecount = 0;
1871     char *p = ptr;
1872    
1873 ph10 644 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1874 nigel 87 linecount < before_context)
1875 nigel 77 {
1876 nigel 87 linecount++;
1877 ph10 644 p = previous_line(p, main_buffer);
1878 nigel 77 }
1879    
1880     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1881     fprintf(stdout, "--\n");
1882    
1883     while (p < ptr)
1884     {
1885 nigel 93 int ellength;
1886 nigel 77 char *pp = p;
1887     if (printname != NULL) fprintf(stdout, "%s-", printname);
1888     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1889 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1890 ph10 515 FWRITE(p, 1, pp - p, stdout);
1891 nigel 93 p = pp;
1892 nigel 77 }
1893     }
1894    
1895     /* Now print the matching line(s); ensure we set hyphenpending at the end
1896 nigel 85 of the file if any context lines are being output. */
1897 nigel 77
1898 nigel 85 if (after_context > 0 || before_context > 0)
1899     endhyphenpending = TRUE;
1900    
1901 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1902 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1903 nigel 77
1904     /* In multiline mode, we want to print to the end of the line in which
1905     the end of the matched string is found, so we adjust linelength and the
1906 ph10 222 line number appropriately, but only when there actually was a match
1907     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1908     the match will always be before the first newline sequence. */
1909 nigel 77
1910 ph10 587 if (multiline & !invert)
1911 nigel 77 {
1912 ph10 587 char *endmatch = ptr + offsets[1];
1913     t = ptr;
1914 ph10 1353 while (t <= endmatch)
1915 nigel 93 {
1916 ph10 587 t = end_of_line(t, endptr, &endlinelength);
1917     if (t < endmatch) linenumber++; else break;
1918 nigel 93 }
1919 ph10 587 linelength = t - ptr - endlinelength;
1920 nigel 77 }
1921    
1922 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1923     zeroes are treated as just another data character. */
1924    
1925     /* This extra option, for Jeffrey Friedl's debugging requirements,
1926     replaces the matched string, or a specific captured string if it exists,
1927     with X. When this happens, colouring is ignored. */
1928    
1929     #ifdef JFRIEDL_DEBUG
1930     if (S_arg >= 0 && S_arg < mrc)
1931     {
1932     int first = S_arg * 2;
1933     int last = first + 1;
1934 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1935 nigel 87 fprintf(stdout, "X");
1936 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1937 nigel 87 }
1938     else
1939     #endif
1940    
1941 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1942 ph10 585 matches, but not of course if the line is a non-match. */
1943 ph10 589
1944 ph10 585 if (do_colour && !invert)
1945 nigel 87 {
1946 ph10 589 int plength;
1947 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1948 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1949 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1950 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1951 ph10 378 for (;;)
1952     {
1953 ph10 632 startoffset = offsets[1];
1954 ph10 718 if (startoffset >= (int)linelength + endlinelength ||
1955 ph10 1324 !match_patterns(matchptr, length, options, startoffset, offsets,
1956     &mrc))
1957 ph10 632 break;
1958     FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1959 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1960 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1961 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1962     }
1963 ph10 587
1964     /* In multiline mode, we may have already printed the complete line
1965 ph10 589 and its line-ending characters (if they matched the pattern), so there
1966 ph10 587 may be no more to print. */
1967 ph10 589
1968 ph10 836 plength = (int)((linelength + endlinelength) - startoffset);
1969 ph10 636 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1970 nigel 87 }
1971 ph10 392
1972 ph10 378 /* Not colouring; no need to search for further matches */
1973 ph10 392
1974 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1975 nigel 49 }
1976    
1977 ph10 519 /* End of doing what has to be done for a match. If --line-buffered was
1978     given, flush the output. */
1979 nigel 87
1980 ph10 519 if (line_buffered) fflush(stdout);
1981 nigel 77 rc = 0; /* Had some success */
1982    
1983     /* Remember where the last match happened for after_context. We remember
1984     where we are about to restart, and that line's number. */
1985    
1986 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1987 nigel 77 lastmatchnumber = linenumber + 1;
1988 nigel 49 }
1989 nigel 77
1990 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1991     anything to be printed), we have to move on to the end of the match before
1992     proceeding. */
1993    
1994     if (multiline && invert && match)
1995     {
1996     int ellength;
1997     char *endmatch = ptr + offsets[1];
1998     t = ptr;
1999     while (t < endmatch)
2000     {
2001     t = end_of_line(t, endptr, &ellength);
2002     if (t <= endmatch) linenumber++; else break;
2003     }
2004     endmatch = end_of_line(endmatch, endptr, &ellength);
2005     linelength = endmatch - ptr - ellength;
2006     }
2007    
2008 ph10 286 /* Advance to after the newline and increment the line number. The file
2009 ph10 280 offset to the current line is maintained in filepos. */
2010 nigel 77
2011 ph10 1543 END_ONE_MATCH:
2012 nigel 93 ptr += linelength + endlinelength;
2013 ph10 530 filepos += (int)(linelength + endlinelength);
2014 nigel 77 linenumber++;
2015 ph10 535
2016     /* If input is line buffered, and the buffer is not yet full, read another
2017 ph10 519 line and add it into the buffer. */
2018 ph10 535
2019 ph10 718 if (input_line_buffered && bufflength < (size_t)bufsize)
2020 ph10 519 {
2021 ph10 836 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
2022 ph10 519 bufflength += add;
2023 ph10 535 endptr += add;
2024     }
2025 nigel 77
2026     /* If we haven't yet reached the end of the file (the buffer is full), and
2027     the current point is in the top 1/3 of the buffer, slide the buffer down by
2028     1/3 and refill it. Before we do this, if some unprinted "after" lines are
2029     about to be lost, print them. */
2030    
2031 ph10 718 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
2032 nigel 77 {
2033     if (after_context > 0 &&
2034     lastmatchnumber > 0 &&
2035 ph10 644 lastmatchrestart < main_buffer + bufthird)
2036 nigel 77 {
2037     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2038     lastmatchnumber = 0;
2039     }
2040    
2041     /* Now do the shuffle */
2042    
2043 ph10 644 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2044     ptr -= bufthird;
2045 ph10 286
2046     #ifdef SUPPORT_LIBZ
2047     if (frtype == FR_LIBZ)
2048 ph10 644 bufflength = 2*bufthird +
2049     gzread (ingz, main_buffer + 2*bufthird, bufthird);
2050 ph10 286 else
2051     #endif
2052    
2053     #ifdef SUPPORT_LIBBZ2
2054     if (frtype == FR_LIBBZ2)
2055 ph10 644 bufflength = 2*bufthird +
2056     BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2057 ph10 286 else
2058     #endif
2059    
2060 ph10 644 bufflength = 2*bufthird +
2061 ph10 535 (input_line_buffered?
2062 ph10 644 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2063     fread(main_buffer + 2*bufthird, 1, bufthird, in));
2064     endptr = main_buffer + bufflength;
2065 nigel 77
2066     /* Adjust any last match point */
2067    
2068 ph10 644 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2069 nigel 77 }
2070     } /* Loop through the whole file */
2071    
2072     /* End of file; print final "after" lines if wanted; do_after_lines sets
2073     hyphenpending if it prints something. */
2074    
2075 ph10 1039 if (!show_only_matching && !count_only)
2076 nigel 87 {
2077     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2078     hyphenpending |= endhyphenpending;
2079     }
2080 nigel 77
2081     /* Print the file name if we are looking for those without matches and there
2082     were none. If we found a match, we won't have got this far. */
2083    
2084 nigel 87 if (filenames == FN_NOMATCH_ONLY)
2085 nigel 77 {
2086     fprintf(stdout, "%s\n", printname);
2087     return 0;
2088 nigel 49 }
2089    
2090 nigel 77 /* Print the match count if wanted */
2091    
2092 nigel 49 if (count_only)
2093     {
2094 ph10 420 if (count > 0 || !omit_zero_count)
2095 ph10 461 {
2096     if (printname != NULL && filenames != FN_NONE)
2097 ph10 420 fprintf(stdout, "%s:", printname);
2098     fprintf(stdout, "%d\n", count);
2099 ph10 461 }
2100 nigel 49 }
2101    
2102     return rc;
2103     }
2104    
2105    
2106    
2107     /*************************************************
2108 nigel 53 * Grep a file or recurse into a directory *
2109     *************************************************/
2110    
2111 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
2112     recursing; if it's a file, grep it.
2113    
2114     Arguments:
2115     pathname the path to investigate
2116 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
2117 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
2118    
2119 ph10 1003 Returns: -1 the file/directory was skipped
2120     0 if there was at least one match
2121 nigel 77 1 if there were no matches
2122     2 there was some kind of error
2123    
2124     However, file opening failures are suppressed if "silent" is set.
2125     */
2126    
2127 nigel 53 static int
2128 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2129 nigel 53 {
2130     int rc = 1;
2131 ph10 286 int frtype;
2132     void *handle;
2133 ph10 1003 char *lastcomp;
2134 ph10 286 FILE *in = NULL; /* Ensure initialized */
2135 nigel 53
2136 ph10 286 #ifdef SUPPORT_LIBZ
2137     gzFile ingz = NULL;
2138     #endif
2139    
2140     #ifdef SUPPORT_LIBBZ2
2141     BZFILE *inbz2 = NULL;
2142     #endif
2143    
2144 ph10 971 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2145 ph10 879 int pathlen;
2146     #endif
2147    
2148 ph10 1354 #if defined NATIVE_ZOS
2149     int zos_type;
2150     FILE *zos_test_file;
2151     #endif
2152    
2153 nigel 77 /* If the file name is "-" we scan stdin */
2154 nigel 53
2155 nigel 77 if (strcmp(pathname, "-") == 0)
2156 nigel 53 {
2157 ph10 644 return pcregrep(stdin, FR_PLAIN, stdin_name,
2158 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2159 nigel 77 stdin_name : NULL);
2160     }
2161    
2162 ph10 1003 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2163     directories, whereas --include and --exclude apply to everything else. The test
2164     is against the final component of the path. */
2165 nigel 87
2166 ph10 1003 lastcomp = strrchr(pathname, FILESEP);
2167     lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2168    
2169     /* If the file is a directory, skip if not recursing or if explicitly excluded.
2170     Otherwise, scan the directory and recurse for each path within it. The scanning
2171     code is localized so it can be made system-specific. */
2172    
2173 ph10 1354
2174     /* For z/OS, determine the file type. */
2175    
2176     #if defined NATIVE_ZOS
2177     zos_test_file = fopen(pathname,"rb");
2178    
2179     if (zos_test_file == NULL)
2180     {
2181 ph10 1404 if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
2182 ph10 1354 pathname, strerror(errno));
2183     return -1;
2184     }
2185     zos_type = identifyzosfiletype (zos_test_file);
2186     fclose (zos_test_file);
2187    
2188     /* Handle a PDS in separate code */
2189    
2190     if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2191     {
2192 ph10 1355 return travelonpdsdir (pathname, only_one_at_top);
2193 ph10 1354 }
2194    
2195     /* Deal with regular files in the normal way below. These types are:
2196     zos_type == __ZOS_PDS_MEMBER
2197     zos_type == __ZOS_PS
2198     zos_type == __ZOS_VSAM_KSDS
2199     zos_type == __ZOS_VSAM_ESDS
2200     zos_type == __ZOS_VSAM_RRDS
2201     */
2202    
2203     /* Handle a z/OS directory using common code. */
2204    
2205     else if (zos_type == __ZOS_HFS)
2206     {
2207     #endif /* NATIVE_ZOS */
2208    
2209    
2210     /* Handle directories: common code for all OS */
2211    
2212 ph10 1003 if (isdirectory(pathname))
2213 nigel 77 {
2214 ph10 1003 if (dee_action == dee_SKIP ||
2215     !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2216     return -1;
2217    
2218 nigel 87 if (dee_action == dee_RECURSE)
2219 nigel 53 {
2220 nigel 87 char buffer[1024];
2221     char *nextfile;
2222     directory_type *dir = opendirectory(pathname);
2223 nigel 53
2224 nigel 87 if (dir == NULL)
2225     {
2226     if (!silent)
2227     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2228     strerror(errno));
2229     return 2;
2230     }
2231 nigel 77
2232 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
2233     {
2234 ph10 1003 int frc;
2235     sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2236 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2237     if (frc > 1) rc = frc;
2238     else if (frc == 0 && rc == 1) rc = 0;
2239     }
2240    
2241     closedirectory(dir);
2242     return rc;
2243 nigel 53 }
2244     }
2245    
2246 ph10 1354 #if defined NATIVE_ZOS
2247     }
2248     #endif
2249 nigel 53
2250 ph10 1354 /* If the file is not a directory, check for a regular file, and if it is not,
2251     skip it if that's been requested. Otherwise, check for an explicit inclusion or
2252     exclusion. */
2253 nigel 87
2254 ph10 1354 else if (
2255     #if defined NATIVE_ZOS
2256     (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2257     #else /* all other OS */
2258     (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2259     #endif
2260     !test_incexc(lastcomp, include_patterns, exclude_patterns))
2261     return -1; /* File skipped */
2262    
2263 nigel 87 /* Control reaches here if we have a regular file, or if we have a directory
2264     and recursion or skipping was not requested, or if we have anything else and
2265     skipping was not requested. The scan proceeds. If this is the first and only
2266     argument at top level, we don't show the file name, unless we are only showing
2267     the file name, or the filename was forced (-H). */
2268    
2269 ph10 971 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2270 ph10 530 pathlen = (int)(strlen(pathname));
2271 ph10 879 #endif
2272 ph10 286
2273     /* Open using zlib if it is supported and the file name ends with .gz. */
2274    
2275     #ifdef SUPPORT_LIBZ
2276     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2277 nigel 53 {
2278 ph10 286 ingz = gzopen(pathname, "rb");
2279     if (ingz == NULL)
2280     {
2281     if (!silent)
2282     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2283     strerror(errno));
2284     return 2;
2285     }
2286     handle = (void *)ingz;
2287     frtype = FR_LIBZ;
2288     }
2289     else
2290     #endif
2291    
2292     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2293    
2294     #ifdef SUPPORT_LIBBZ2
2295     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2296     {
2297     inbz2 = BZ2_bzopen(pathname, "rb");
2298     handle = (void *)inbz2;
2299     frtype = FR_LIBBZ2;
2300     }
2301     else
2302     #endif
2303    
2304     /* Otherwise use plain fopen(). The label is so that we can come back here if
2305     an attempt to read a .bz2 file indicates that it really is a plain file. */
2306    
2307     #ifdef SUPPORT_LIBBZ2
2308     PLAIN_FILE:
2309     #endif
2310     {
2311 ph10 419 in = fopen(pathname, "rb");
2312 ph10 286 handle = (void *)in;
2313     frtype = FR_PLAIN;
2314     }
2315    
2316     /* All the opening methods return errno when they fail. */
2317    
2318     if (handle == NULL)
2319     {
2320 nigel 77 if (!silent)
2321     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2322     strerror(errno));
2323 nigel 53 return 2;
2324     }
2325    
2326 ph10 286 /* Now grep the file */
2327    
2328 ph10 644 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2329 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2330 nigel 77
2331 ph10 286 /* Close in an appropriate manner. */
2332    
2333     #ifdef SUPPORT_LIBZ
2334     if (frtype == FR_LIBZ)
2335     gzclose(ingz);
2336     else
2337     #endif
2338    
2339 ph10 644 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2340 ph10 286 read failed. If the error indicates that the file isn't in fact bzipped, try
2341     again as a normal file. */
2342    
2343     #ifdef SUPPORT_LIBBZ2
2344     if (frtype == FR_LIBBZ2)
2345     {
2346 ph10 644 if (rc == 3)
2347 ph10 286 {
2348     int errnum;
2349     const char *err = BZ2_bzerror(inbz2, &errnum);
2350     if (errnum == BZ_DATA_ERROR_MAGIC)
2351     {
2352     BZ2_bzclose(inbz2);
2353     goto PLAIN_FILE;
2354     }
2355     else if (!silent)
2356     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2357     pathname, err);
2358 ph10 654 rc = 2; /* The normal "something went wrong" code */
2359 ph10 286 }
2360     BZ2_bzclose(inbz2);
2361     }
2362     else
2363     #endif
2364    
2365     /* Normal file close */
2366    
2367 nigel 53 fclose(in);
2368 ph10 286
2369     /* Pass back the yield from pcregrep(). */
2370    
2371 nigel 53 return rc;
2372     }
2373    
2374    
2375    
2376     /*************************************************
2377 nigel 77 * Handle a single-letter, no data option *
2378 nigel 53 *************************************************/
2379    
2380     static int
2381     handle_option(int letter, int options)
2382     {
2383     switch(letter)
2384     {
2385 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
2386 ph10 561 case N_HELP: help(); pcregrep_exit(0);
2387 ph10 685 case N_LBUFFER: line_buffered = TRUE; break;
2388 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
2389 ph10 691 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2390 ph10 947 case 'a': binary_files = BIN_TEXT; break;
2391 nigel 53 case 'c': count_only = TRUE; break;
2392 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
2393     case 'H': filenames = FN_FORCE; break;
2394 ph10 947 case 'I': binary_files = BIN_NOMATCH; break;
2395 nigel 87 case 'h': filenames = FN_NONE; break;
2396 nigel 53 case 'i': options |= PCRE_CASELESS; break;
2397 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2398 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
2399 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2400 nigel 53 case 'n': number = TRUE; break;
2401 ph10 1221
2402 ph10 1039 case 'o':
2403     only_matching_last = add_number(0, only_matching_last);
2404     if (only_matching == NULL) only_matching = only_matching_last;
2405     break;
2406 ph10 1221
2407 nigel 77 case 'q': quiet = TRUE; break;
2408 nigel 87 case 'r': dee_action = dee_RECURSE; break;
2409 nigel 53 case 's': silent = TRUE; break;
2410 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2411 nigel 53 case 'v': invert = TRUE; break;
2412 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
2413     case 'x': process_options |= PO_LINE_MATCH; break;
2414 nigel 53
2415     case 'V':
2416 ph10 1003 fprintf(stdout, "pcregrep version %s\n", pcre_version());
2417 ph10 561 pcregrep_exit(0);
2418 nigel 53 break;
2419    
2420     default:
2421     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2422 ph10 561 pcregrep_exit(usage(2));
2423 nigel 53 }
2424    
2425     return options;
2426     }
2427    
2428    
2429    
2430    
2431     /*************************************************
2432 nigel 87 * Construct printed ordinal *
2433     *************************************************/
2434    
2435     /* This turns a number into "1st", "3rd", etc. */
2436    
2437     static char *
2438     ordin(int n)
2439     {
2440     static char buffer[8];
2441     char *p = buffer;
2442     sprintf(p, "%d", n);
2443     while (*p != 0) p++;
2444     switch (n%10)
2445     {
2446     case 1: strcpy(p, "st"); break;
2447     case 2: strcpy(p, "nd"); break;
2448     case 3: strcpy(p, "rd"); break;
2449     default: strcpy(p, "th"); break;
2450     }
2451     return buffer;
2452     }
2453    
2454    
2455    
2456     /*************************************************
2457     * Compile a single pattern *
2458     *************************************************/
2459    
2460 ph10 1003 /* Do nothing if the pattern has already been compiled. This is the case for
2461     include/exclude patterns read from a file.
2462 nigel 87
2463 ph10 1003 When the -F option has been used, each "pattern" may be a list of strings,
2464     separated by line breaks. They will be matched literally. We split such a
2465     string and compile the first substring, inserting an additional block into the
2466     pattern chain.
2467    
2468 nigel 87 Arguments:
2469 ph10 1003 p points to the pattern block
2470 nigel 87 options the PCRE options
2471 ph10 1003 popts the processing options
2472     fromfile TRUE if the pattern was read from a file
2473     fromtext file name or identifying text (e.g. "include")
2474 nigel 87 count 0 if this is the only command line pattern, or
2475     number of the command line pattern, or
2476     linenumber for a pattern from a file
2477    
2478     Returns: TRUE on success, FALSE after an error
2479     */
2480    
2481     static BOOL
2482 ph10 1003 compile_pattern(patstr *p, int options, int popts, int fromfile,
2483     const char *fromtext, int count)
2484 nigel 87 {
2485 ph10 644 char buffer[PATBUFSIZE];
2486 nigel 87 const char *error;
2487 ph10 1003 char *ps = p->string;
2488     int patlen = strlen(ps);
2489 nigel 87 int errptr;
2490    
2491 ph10 1003 if (p->compiled != NULL) return TRUE;
2492    
2493     if ((popts & PO_FIXED_STRINGS) != 0)
2494 nigel 87 {
2495 ph10 1003 int ellength;
2496     char *eop = ps + patlen;
2497     char *pe = end_of_line(ps, eop, &ellength);
2498 nigel 87
2499 ph10 1003 if (ellength != 0)
2500     {
2501     if (add_pattern(pe, p) == NULL) return FALSE;
2502     patlen = (int)(pe - ps - ellength);
2503     }
2504 ph10 142 }
2505 nigel 87
2506 ph10 1003 sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2507     p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2508     if (p->compiled != NULL) return TRUE;
2509    
2510 nigel 87 /* Handle compile errors */
2511    
2512 ph10 1003 errptr -= (int)strlen(prefix[popts]);
2513     if (errptr > patlen) errptr = patlen;
2514 nigel 87
2515 ph10 1003 if (fromfile)
2516 nigel 87 {
2517 ph10 1003 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2518     "at offset %d: %s\n", count, fromtext, errptr, error);
2519 nigel 87 }
2520     else
2521     {
2522 ph10 1003 if (count == 0)
2523     fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2524     fromtext, errptr, error);
2525     else
2526     fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2527     ordin(count), fromtext, errptr, error);
2528 nigel 87 }
2529    
2530     return FALSE;
2531     }
2532    
2533    
2534    
2535     /*************************************************
2536 ph10 1003 * Read and compile a file of patterns *
2537 nigel 87 *************************************************/
2538    
2539 ph10 1003 /* This is used for --filelist, --include-from, and --exclude-from.
2540 nigel 87
2541     Arguments:
2542 ph10 1003 name the name of the file; "-" is stdin
2543     patptr pointer to the pattern chain anchor
2544     patlastptr pointer to the last pattern pointer
2545     popts the process options to pass to pattern_compile()
2546 nigel 87
2547 ph10 1003 Returns: TRUE if all went well
2548 nigel 87 */
2549    
2550     static BOOL
2551 ph10 1003 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2552 nigel 87 {
2553 ph10 1003 int linenumber = 0;
2554     FILE *f;
2555     char *filename;
2556     char buffer[PATBUFSIZE];
2557    
2558     if (strcmp(name, "-") == 0)
2559 nigel 87 {
2560 ph10 1003 f = stdin;
2561     filename = stdin_name;
2562     }
2563     else
2564     {
2565     f = fopen(name, "r");
2566     if (f == NULL)
2567     {
2568     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2569     return FALSE;
2570     }
2571     filename = name;
2572     }
2573    
2574     while (fgets(buffer, PATBUFSIZE, f) != NULL)
2575     {
2576     char *s = buffer + (int)strlen(buffer);
2577     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2578     *s = 0;
2579     linenumber++;
2580     if (buffer[0] == 0) continue; /* Skip blank lines */
2581    
2582     /* Note: this call to add_pattern() puts a pointer to the local variable
2583     "buffer" into the pattern chain. However, that pointer is used only when
2584     compiling the pattern, which happens immediately below, so we flatten it
2585     afterwards, as a precaution against any later code trying to use it. */
2586    
2587     *patlastptr = add_pattern(buffer, *patlastptr);
2588 ph10 1502 if (*patlastptr == NULL)
2589 ph10 1492 {
2590     if (f != stdin) fclose(f);
2591     return FALSE;
2592 ph10 1502 }
2593 ph10 1003 if (*patptr == NULL) *patptr = *patlastptr;
2594    
2595     /* This loop is needed because compiling a "pattern" when -F is set may add
2596     on additional literal patterns if the original contains a newline. In the
2597     common case, it never will, because fgets() stops at a newline. However,
2598     the -N option can be used to give pcregrep a different newline setting. */
2599    
2600 nigel 87 for(;;)
2601     {
2602 ph10 1003 if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2603     linenumber))
2604 ph10 1502 {
2605 ph10 1492 if (f != stdin) fclose(f);
2606 nigel 87 return FALSE;
2607 ph10 1502 }
2608 ph10 1003 (*patlastptr)->string = NULL; /* Insurance */
2609     if ((*patlastptr)->next == NULL) break;
2610     *patlastptr = (*patlastptr)->next;
2611 nigel 87 }
2612     }
2613 ph10 1003
2614     if (f != stdin) fclose(f);
2615     return TRUE;
2616 nigel 87 }
2617    
2618    
2619    
2620     /*************************************************
2621 nigel 49 * Main program *
2622     *************************************************/
2623    
2624 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2625    
2626 nigel 49 int
2627     main(int argc, char **argv)
2628     {
2629 nigel 53 int i, j;
2630 nigel 49 int rc = 1;
2631 nigel 87 BOOL only_one_at_top;
2632 ph10 1003 patstr *cp;
2633     fnstr *fn;
2634 nigel 87 const char *locale_from = "--locale";
2635 nigel 49 const char *error;
2636    
2637 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
2638     pcre_jit_stack *jit_stack = NULL;
2639     #endif
2640    
2641 nigel 93 /* Set the default line ending value from the default in the PCRE library;
2642     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2643 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
2644 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
2645 ph10 391 rather than escapes such as as '\r'. */
2646 nigel 91
2647     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2648     switch(i)
2649     {
2650 ph10 391 default: newline = (char *)"lf"; break;
2651     case 13: newline = (char *)"cr"; break;
2652     case (13 << 8) | 10: newline = (char *)"crlf"; break;
2653     case -1: newline = (char *)"any"; break;
2654     case -2: newline = (char *)"anycrlf"; break;
2655 nigel 91 }
2656    
2657 nigel 49 /* Process the options */
2658    
2659     for (i = 1; i < argc; i++)
2660     {
2661 nigel 77 option_item *op = NULL;
2662     char *option_data = (char *)""; /* default to keep compiler happy */
2663     BOOL longop;
2664     BOOL longopwasequals = FALSE;
2665    
2666 nigel 49 if (argv[i][0] != '-') break;
2667 nigel 53
2668 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2669 nigel 87 but only if we have previously had -e or -f to define the patterns. */
2670 nigel 63
2671 nigel 77 if (argv[i][1] == 0)
2672     {
2673 ph10 1003 if (pattern_files != NULL || patterns != NULL) break;
2674 ph10 561 else pcregrep_exit(usage(2));
2675 nigel 77 }
2676 nigel 63
2677 nigel 77 /* Handle a long name option, or -- to terminate the options */
2678 nigel 53
2679     if (argv[i][1] == '-')
2680 nigel 49 {
2681 nigel 77 char *arg = argv[i] + 2;
2682     char *argequals = strchr(arg, '=');
2683 nigel 53
2684 nigel 77 if (*arg == 0) /* -- terminates options */
2685 nigel 49 {
2686 nigel 77 i++;
2687     break; /* out of the options-handling loop */
2688 nigel 53 }
2689 nigel 49
2690 nigel 77 longop = TRUE;
2691    
2692     /* Some long options have data that follows after =, for example file=name.
2693     Some options have variations in the long name spelling: specifically, we
2694     allow "regexp" because GNU grep allows it, though I personally go along
2695 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2696 ph10 422 These options are entered in the table as "regex(p)". Options can be in
2697     both these categories. */
2698 nigel 77
2699 nigel 53 for (op = optionlist; op->one_char != 0; op++)
2700     {
2701 nigel 77 char *opbra = strchr(op->long_name, '(');
2702     char *equals = strchr(op->long_name, '=');
2703 ph10 461
2704 ph10 422 /* Handle options with only one spelling of the name */
2705 ph10 461
2706 ph10 422 if (opbra == NULL) /* Does not contain '(' */
2707 nigel 53 {
2708 nigel 77 if (equals == NULL) /* Not thing=data case */
2709     {
2710     if (strcmp(arg, op->long_name) == 0) break;
2711     }
2712     else /* Special case xxx=data */
2713     {
2714 ph10 530 int oplen = (int)(equals - op->long_name);
2715 ph10 535 int arglen = (argequals == NULL)?
2716 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2717 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2718     {
2719     option_data = arg + arglen;
2720     if (*option_data == '=')
2721     {
2722     option_data++;
2723     longopwasequals = TRUE;
2724     }
2725     break;
2726     }
2727     }
2728 nigel 53 }
2729 ph10 461
2730 ph10 422 /* Handle options with an alternate spelling of the name */
2731 ph10 461
2732     else
2733 nigel 77 {
2734     char buff1[24];
2735     char buff2[24];
2736 ph10 461
2737 ph10 530 int baselen = (int)(opbra - op->long_name);
2738     int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2739 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
2740 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2741 ph10 461
2742 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
2743 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2744 ph10 461
2745     if (strncmp(arg, buff1, arglen) == 0 ||
2746 ph10 422 strncmp(arg, buff2, arglen) == 0)
2747     {
2748     if (equals != NULL && argequals != NULL)
2749     {
2750 ph10 461 option_data = argequals;
2751 ph10 422 if (*option_data == '=')
2752     {
2753 ph10 461 option_data++;
2754 ph10 422 longopwasequals = TRUE;
2755 ph10 461 }
2756     }
2757 nigel 77 break;
2758 ph10 461 }
2759 nigel 77 }
2760 nigel 53 }
2761 nigel 77
2762 nigel 53 if (op->one_char == 0)
2763     {
2764     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2765 ph10 561 pcregrep_exit(usage(2));
2766 nigel 53 }
2767     }
2768 nigel 49
2769 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2770     are not in the right form for putting in the option table because they use
2771     only one hyphen, yet are more than one character long. By putting them
2772     separately here, they will not get displayed as part of the help() output,
2773     but I don't think Jeffrey will care about that. */
2774    
2775     #ifdef JFRIEDL_DEBUG
2776     else if (strcmp(argv[i], "-pre") == 0) {
2777     jfriedl_prefix = argv[++i];
2778     continue;
2779     } else if (strcmp(argv[i], "-post") == 0) {
2780     jfriedl_postfix = argv[++i];
2781     continue;
2782     } else if (strcmp(argv[i], "-XT") == 0) {
2783     sscanf(argv[++i], "%d", &jfriedl_XT);
2784     continue;
2785     } else if (strcmp(argv[i], "-XR") == 0) {
2786     sscanf(argv[++i], "%d", &jfriedl_XR);
2787     continue;
2788     }
2789     #endif
2790    
2791    
2792 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2793     continue till we hit the last one or one that needs data. */
2794 nigel 53
2795     else
2796     {
2797     char *s = argv[i] + 1;
2798 nigel 77 longop = FALSE;
2799 ph10 1221
2800 nigel 53 while (*s != 0)
2801     {
2802 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2803 ph10 579 {
2804     if (*s == op->one_char) break;
2805 ph10 565 }
2806 nigel 77 if (op->one_char == 0)
2807 nigel 53 {
2808 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2809     *s, argv[i]);
2810 ph10 561 pcregrep_exit(usage(2));
2811 nigel 77 }
2812 ph10 1221
2813 ph10 1039 option_data = s+1;
2814 ph10 1221
2815     /* Break out if this is the last character in the string; it's handled
2816 ph10 1039 below like a single multi-char option. */
2817 ph10 579
2818 ph10 1221 if (*option_data == 0) break;
2819    
2820 ph10 1039 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2821     are used for ones that either have a numerical number or defaults, i.e.
2822     the data is optional. If a digit follows, there is data; if not, carry on
2823 ph10 565 with other single-character options in the same string. */
2824 ph10 579
2825 ph10 1039 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2826 ph10 579 {
2827     if (isdigit((unsigned char)s[1])) break;
2828 nigel 53 }
2829 ph10 1039 else /* Check for an option with data */
2830 ph10 579 {
2831 ph10 1039 if (op->type != OP_NODATA) break;
2832 ph10 579 }
2833    
2834     /* Handle a single-character option with no data, then loop for the
2835 ph10 565 next character in the string. */
2836 ph10 1221
2837 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2838 nigel 49 }
2839     }
2840 ph10 1221
2841 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2842     is NO_DATA, it means that there is no data, and the option might set
2843     something in the PCRE options. */
2844 nigel 77
2845     if (op->type == OP_NODATA)
2846     {
2847 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2848     continue;
2849     }
2850    
2851 ph10 1039 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2852 nigel 87 either has a value or defaults to something. It cannot have data in a
2853 ph10 579 separate item. At the moment, the only such options are "colo(u)r",
2854 ph10 565 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2855 ph10 1221
2856 nigel 87 if (*option_data == 0 &&
2857 ph10 1039 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2858     op->type == OP_OP_NUMBERS))
2859 nigel 87 {
2860     switch (op->one_char)
2861 nigel 77 {
2862 nigel 87 case N_COLOUR:
2863     colour_option = (char *)"auto";
2864     break;
2865 ph10 579
2866 ph10 565 case 'o':
2867 ph10 1039 only_matching_last = add_number(0, only_matching_last);
2868     if (only_matching == NULL) only_matching = only_matching_last;
2869 ph10 579 break;
2870    
2871 nigel 87 #ifdef JFRIEDL_DEBUG
2872     case 'S':
2873     S_arg = 0;
2874     break;
2875     #endif
2876 nigel 77 }
2877 nigel 87 continue;
2878     }
2879 nigel 77
2880 nigel 87 /* Otherwise, find the data string for the option. */
2881    
2882     if (*option_data == 0)
2883     {
2884     if (i >= argc - 1 || longopwasequals)
2885 nigel 77 {
2886 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2887 ph10 561 pcregrep_exit(usage(2));
2888 nigel 87 }
2889     option_data = argv[++i];
2890     }
2891    
2892 ph10 1039 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2893     added to a chain of numbers. */
2894    
2895     if (op->type == OP_OP_NUMBERS)
2896     {
2897     unsigned long int n = decode_number(option_data, op, longop);
2898     omdatastr *omd = (omdatastr *)op->dataptr;
2899     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2900     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2901     }
2902    
2903 ph10 1003 /* If the option type is OP_PATLIST, it's the -e option, or one of the
2904     include/exclude options, which can be called multiple times to create lists
2905     of patterns. */
2906 ph10 975
2907 ph10 1039 else if (op->type == OP_PATLIST)
2908     {
2909     patdatastr *pd = (patdatastr *)op->dataptr;
2910     *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2911     if (*(pd->lastptr) == NULL) goto EXIT2;
2912     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2913     }
2914 ph10 1003
2915     /* If the option type is OP_FILELIST, it's one of the options that names a
2916     file. */
2917    
2918     else if (op->type == OP_FILELIST)
2919 nigel 87 {
2920 ph10 1003 fndatastr *fd = (fndatastr *)op->dataptr;
2921     fn = (fnstr *)malloc(sizeof(fnstr));
2922     if (fn == NULL)
2923 nigel 87 {
2924 ph10 1003 fprintf(stderr, "pcregrep: malloc failed\n");
2925     goto EXIT2;
2926 nigel 87 }
2927 ph10 1003 fn->next = NULL;
2928     fn->name = option_data;
2929     if (*(fd->anchor) == NULL)
2930     *(fd->anchor) = fn;
2931     else
2932     (*(fd->lastptr))->next = fn;
2933     *(fd->lastptr) = fn;
2934 nigel 87 }
2935 ph10 975
2936 ph10 947 /* Handle OP_BINARY_FILES */
2937 ph10 975
2938 ph10 947 else if (op->type == OP_BINFILES)
2939     {
2940     if (strcmp(option_data, "binary") == 0)
2941     binary_files = BIN_BINARY;
2942     else if (strcmp(option_data, "without-match") == 0)
2943     binary_files = BIN_NOMATCH;
2944     else if (strcmp(option_data, "text") == 0)
2945     binary_files = BIN_TEXT;
2946     else
2947     {
2948 ph10 975 fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2949     option_data);
2950 ph10 947 pcregrep_exit(usage(2));
2951 ph10 975 }
2952     }
2953 nigel 87
2954 ph10 1039 /* Otherwise, deal with a single string or numeric data value. */
2955 nigel 87
2956 ph10 584 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2957     op->type != OP_OP_NUMBER)
2958 nigel 87 {
2959     *((char **)op->dataptr) = option_data;
2960     }
2961     else
2962     {
2963 ph10 1039 unsigned long int n = decode_number(option_data, op, longop);
2964     if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2965     else *((int *)op->dataptr) = n;
2966 nigel 77 }
2967 nigel 49 }
2968    
2969 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2970     for -A and -B. */
2971    
2972     if (both_context > 0)
2973     {
2974     if (after_context == 0) after_context = both_context;
2975     if (before_context == 0) before_context = both_context;
2976     }
2977 ph10 286
2978     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2979 ph10 1039 However, all three set show_only_matching because they display, each in their
2980     own way, only the data that has matched. */
2981 nigel 77
2982 ph10 1039 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2983 ph10 286 (file_offsets && line_offsets))
2984 ph10 280 {
2985     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2986     "and/or --line-offsets\n");
2987 ph10 561 pcregrep_exit(usage(2));
2988 ph10 280 }
2989    
2990 ph10 1039 if (only_matching != NULL || file_offsets || line_offsets)
2991     show_only_matching = TRUE;
2992 ph10 286
2993 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2994     LC_ALL environment variable is set, and if so, use it. */
2995 nigel 49
2996 nigel 87 if (locale == NULL)
2997 nigel 53 {
2998 nigel 87 locale = getenv("LC_ALL");
2999     locale_from = "LCC_ALL";
3000 nigel 53 }
3001 nigel 49
3002 nigel 87 if (locale == NULL)
3003     {
3004     locale = getenv("LC_CTYPE");
3005     locale_from = "LC_CTYPE";
3006     }
3007 nigel 49
3008 ph10 1492 /* If a locale is set, use it to generate the tables the PCRE needs. Otherwise,
3009     pcretables==NULL, which causes the use of default tables. */
3010 nigel 87
3011     if (locale != NULL)
3012 nigel 49 {
3013 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
3014 nigel 53 {
3015 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
3016     locale, locale_from);
3017 ph10 1492 goto EXIT2;
3018 nigel 53 }
3019 nigel 87 pcretables = pcre_maketables();
3020     }
3021 nigel 77
3022 nigel 87 /* Sort out colouring */
3023    
3024     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
3025     {
3026     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
3027     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
3028     else
3029 nigel 53 {
3030 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
3031     colour_option);
3032 ph10 1492 goto EXIT2;
3033 nigel 77 }
3034 nigel 87 if (do_colour)
3035 nigel 77 {
3036 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
3037     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
3038     if (cs != NULL) colour_string = cs;
3039 nigel 77 }
3040 nigel 87 }
3041 ph10 535
3042 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
3043    
3044     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
3045     {
3046     pcre_options |= PCRE_NEWLINE_CR;
3047 nigel 93 endlinetype = EL_CR;
3048 nigel 91 }
3049     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
3050     {
3051     pcre_options |= PCRE_NEWLINE_LF;
3052 nigel 93 endlinetype = EL_LF;
3053 nigel 91 }
3054     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
3055     {
3056     pcre_options |= PCRE_NEWLINE_CRLF;
3057 nigel 93 endlinetype = EL_CRLF;
3058 nigel 91 }
3059 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
3060     {
3061     pcre_options |= PCRE_NEWLINE_ANY;
3062     endlinetype = EL_ANY;
3063     }
3064 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
3065     {
3066     pcre_options |= PCRE_NEWLINE_ANYCRLF;
3067     endlinetype = EL_ANYCRLF;
3068     }
3069 nigel 91 else
3070     {
3071     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
3072 ph10 1492 goto EXIT2;
3073 nigel 91 }
3074    
3075 nigel 87 /* Interpret the text values for -d and -D */
3076    
3077     if (dee_option != NULL)
3078     {
3079     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
3080     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
3081     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
3082     else
3083 nigel 77 {
3084 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
3085 ph10 1492 goto EXIT2;
3086 nigel 53 }
3087 nigel 49 }
3088    
3089 nigel 87 if (DEE_option != NULL)
3090     {
3091     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
3092     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
3093     else
3094     {
3095     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
3096 ph10 1492 goto EXIT2;
3097 nigel 87 }
3098     }
3099 nigel 49
3100 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
3101 nigel 87
3102     #ifdef JFRIEDL_DEBUG
3103     if (S_arg > 9)
3104 nigel 49 {
3105 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
3106     return 2;
3107     }
3108 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
3109     {
3110     if (jfriedl_XT == 0) jfriedl_XT = 1;
3111     if (jfriedl_XR == 0) jfriedl_XR = 1;
3112     }
3113 nigel 87 #endif
3114 nigel 77
3115 ph10 1003 /* Get memory for the main buffer. */
3116 nigel 87
3117 ph10 644 bufsize = 3*bufthird;
3118     main_buffer = (char *)malloc(bufsize);
3119 nigel 87
3120 ph10 1003 if (main_buffer == NULL)
3121 nigel 87 {
3122     fprintf(stderr, "pcregrep: malloc failed\n");
3123 ph10 123 goto EXIT2;
3124 nigel 87 }
3125    
3126 ph10 1003 /* If no patterns were provided by -e, and there are no files provided by -f,
3127 nigel 87 the first argument is the one and only pattern, and it must exist. */
3128    
3129 ph10 1003 if (patterns == NULL && pattern_files == NULL)
3130 nigel 87 {
3131 nigel 63 if (i >= argc) return usage(2);
3132 ph10 1003 patterns = patterns_last = add_pattern(argv[i++], NULL);
3133     if (patterns == NULL) goto EXIT2;
3134 nigel 87 }
3135 nigel 77
3136 nigel 87 /* Compile the patterns that were provided on the command line, either by
3137 ph10 1003 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3138     after all the command-line options are read so that we know which PCRE options
3139     to use. When -F is used, compile_pattern() may add another block into the
3140     chain, so we must not access the next pointer till after the compile. */
3141 nigel 87
3142 ph10 1003 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3143 nigel 87 {
3144 ph10 1003 if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3145     (j == 1 && patterns->next == NULL)? 0 : j))
3146 ph10 123 goto EXIT2;
3147 nigel 87 }
3148    
3149 ph10 1003 /* Read and compile the regular expressions that are provided in files. */
3150 nigel 87
3151 ph10 1003 for (fn = pattern_files; fn != NULL; fn = fn->next)
3152 nigel 87 {
3153 ph10 1003 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3154     goto EXIT2;
3155 ph10 1004 }
3156 nigel 87
3157 ph10 1039 /* Study the regular expressions, as we will be running them many times. If an
3158 ph10 1035 extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3159     returned, even if studying produces no data. */
3160 nigel 53
3161 ph10 1035 if (match_limit > 0 || match_limit_recursion > 0)
3162     study_options |= PCRE_STUDY_EXTRA_NEEDED;
3163    
3164     /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3165    
3166 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
3167     if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3168     jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3169 ph10 691 #endif
3170    
3171 ph10 1003 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3172 nigel 53 {
3173 ph10 1003 cp->hint = pcre_study(cp->compiled, study_options, &error);
3174 nigel 53 if (error != NULL)
3175     {
3176     char s[16];
3177 ph10 1003 if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3178 nigel 53 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3179 ph10 121 goto EXIT2;
3180 nigel 53 }
3181 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
3182 ph10 1003 if (jit_stack != NULL && cp->hint != NULL)
3183     pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3184 ph10 685 #endif
3185 nigel 53 }
3186 ph10 579
3187 ph10 561 /* If --match-limit or --recursion-limit was set, put the value(s) into the
3188 ph10 1039 pcre_extra block for each pattern. There will always be an extra block because
3189 ph10 1035 of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3190 nigel 53
3191 ph10 1035 for (cp = patterns; cp != NULL; cp = cp->next)
3192 ph10 561 {
3193 ph10 1035 if (match_limit > 0)
3194 ph10 561 {
3195 ph10 1035 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3196     cp->hint->match_limit = match_limit;
3197 ph10 561 }
3198 ph10 1039
3199 ph10 1035 if (match_limit_recursion > 0)
3200     {
3201     cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3202     cp->hint->match_limit_recursion = match_limit_recursion;
3203     }
3204 ph10 579 }
3205 ph10 561
3206 ph10 1003 /* If there are include or exclude patterns read from the command line, compile
3207     them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3208     0. */
3209 nigel 77
3210 ph10 1003 for (j = 0; j < 4; j++)
3211 nigel 77 {
3212 ph10 1003 int k;
3213     for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3214 nigel 77 {
3215 ph10 1003 if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3216     (k == 1 && cp->next == NULL)? 0 : k))
3217     goto EXIT2;
3218 nigel 77 }
3219     }
3220    
3221 ph10 1003 /* Read and compile include/exclude patterns from files. */
3222    
3223     for (fn = include_from; fn != NULL; fn = fn->next)
3224 nigel 77 {
3225 ph10 1003 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3226 ph10 121 goto EXIT2;
3227 nigel 77 }
3228    
3229 ph10 1003 for (fn = exclude_from; fn != NULL; fn = fn->next)
3230 ph10 325 {
3231 ph10 1003 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3232 ph10 325 goto EXIT2;
3233     }
3234    
3235 ph10 1003 /* If there are no files that contain lists of files to search, and there are
3236     no file arguments, search stdin, and then exit. */
3237    
3238     if (file_lists == NULL && i >= argc)
3239 ph10 325 {
3240 ph10 1003 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3241     (filenames > FN_DEFAULT)? stdin_name : NULL);
3242     goto EXIT;
3243 ph10 325 }
3244 ph10 975
3245 ph10 1003 /* If any files that contains a list of files to search have been specified,
3246     read them line by line and search the given files. */
3247 ph10 325
3248 ph10 1003 for (fn = file_lists; fn != NULL; fn = fn->next)
3249 ph10 944 {
3250     char buffer[PATBUFSIZE];
3251     FILE *fl;
3252 ph10 1003 if (strcmp(fn->name, "-") == 0) fl = stdin; else
3253 ph10 975 {
3254 ph10 1003 fl = fopen(fn->name, "rb");
3255 ph10 944 if (fl == NULL)
3256     {
3257 ph10 1003 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3258 ph10 944 strerror(errno));
3259     goto EXIT2;
3260 ph10 975 }
3261     }
3262 ph10 944 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3263     {
3264     int frc;
3265     char *end = buffer + (int)strlen(buffer);
3266     while (end > buffer && isspace(end[-1])) end--;
3267 ph10 975 *end = 0;
3268     if (*buffer != 0)
3269     {
3270     frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3271 ph10 944 if (frc > 1) rc = frc;
3272 ph10 975 else if (frc == 0 && rc == 1) rc = 0;
3273     }
3274     }
3275 ph10 1003 if (fl != stdin) fclose(fl);
3276 ph10 975 }
3277 nigel 49
3278 ph10 1003 /* After handling file-list, work through remaining arguments. Pass in the fact
3279     that there is only one argument at top level - this suppresses the file name if
3280     the argument is not a directory and filenames are not otherwise forced. */
3281 ph10 944
3282 ph10 1003 only_one_at_top = i == argc - 1 && file_lists == NULL;
3283 nigel 49
3284     for (; i < argc; i++)
3285     {
3286 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3287     only_one_at_top);
3288 nigel 77 if (frc > 1) rc = frc;
3289     else if (frc == 0 && rc == 1) rc = 0;
3290 nigel 49 }
3291    
3292 ph10 121 EXIT:
3293 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
3294     if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3295     #endif
3296 ph10 1003
3297 ph10 1492 free(main_buffer);
3298     free((void *)pcretables);
3299 ph10 1003
3300     free_pattern_chain(patterns);
3301     free_pattern_chain(include_patterns);
3302     free_pattern_chain(include_dir_patterns);
3303     free_pattern_chain(exclude_patterns);
3304     free_pattern_chain(exclude_dir_patterns);
3305    
3306     free_file_chain(exclude_from);
3307     free_file_chain(include_from);
3308     free_file_chain(pattern_files);
3309     free_file_chain(file_lists);
3310    
3311 ph10 1039 while (only_matching != NULL)
3312     {
3313     omstr *this = only_matching;
3314     only_matching = this->next;
3315     free(this);
3316     }
3317    
3318 ph10 561 pcregrep_exit(rc);
3319 ph10 121
3320     EXIT2:
3321     rc = 2;
3322     goto EXIT;
3323 nigel 49 }
3324    
3325 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12