/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1324 - (hide annotations) (download)
Fri May 10 11:40:06 2013 UTC (11 days ago) by ph10
File MIME type: text/plain
File size: 92469 byte(s)
Fix pcregrep so that it can find empty lines.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 836 Copyright (c) 1997-2012 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 ph10 378 #define OFFSET_SIZE 99
74 nigel 49
75 nigel 77 #if BUFSIZ > 8192
76 ph10 1003 #define MAXPATLEN BUFSIZ
77 nigel 77 #else
78 ph10 1003 #define MAXPATLEN 8192
79 nigel 77 #endif
80 nigel 49
81 ph10 1003 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
82    
83 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
84     output. The order is important; it is assumed that a file name is wanted for
85     all values greater than FN_DEFAULT. */
86 nigel 77
87 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
88 nigel 87
89 ph10 286 /* File reading styles */
90    
91     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
92    
93 nigel 87 /* Actions for the -d and -D options */
94    
95     enum { dee_READ, dee_SKIP, dee_RECURSE };
96     enum { DEE_READ, DEE_SKIP };
97    
98     /* Actions for special processing options (flag bits) */
99    
100     #define PO_WORD_MATCH 0x0001
101     #define PO_LINE_MATCH 0x0002
102     #define PO_FIXED_STRINGS 0x0004
103    
104 nigel 93 /* Line ending types */
105 nigel 87
106 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107 nigel 87
108 ph10 947 /* Binary file options */
109    
110     enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
111    
112 ph10 535 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113     environments), a warning is issued if the value of fwrite() is ignored.
114     Unfortunately, casting to (void) does not suppress the warning. To get round
115     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
116 ph10 515 apply to fprintf(). */
117 nigel 93
118 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
119 nigel 93
120 ph10 515
121    
122 nigel 49 /*************************************************
123     * Global variables *
124     *************************************************/
125    
126 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
127     regular code. */
128    
129     #ifdef JFRIEDL_DEBUG
130     static int S_arg = -1;
131 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
132     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
133     static const char *jfriedl_prefix = "";
134     static const char *jfriedl_postfix = "";
135 nigel 87 #endif
136    
137 nigel 93 static int endlinetype;
138 nigel 91
139 nigel 87 static char *colour_string = (char *)"1;31";
140     static char *colour_option = NULL;
141     static char *dee_option = NULL;
142     static char *DEE_option = NULL;
143 ph10 1003 static char *locale = NULL;
144 ph10 644 static char *main_buffer = NULL;
145 nigel 91 static char *newline = NULL;
146 ph10 1039 static char *om_separator = (char *)"";
147 nigel 77 static char *stdin_name = (char *)"(standard input)";
148 nigel 87
149     static const unsigned char *pcretables = NULL;
150    
151 nigel 77 static int after_context = 0;
152     static int before_context = 0;
153 ph10 947 static int binary_files = BIN_BINARY;
154 nigel 77 static int both_context = 0;
155 ph10 644 static int bufthird = PCREGREP_BUFSIZE;
156     static int bufsize = 3*PCREGREP_BUFSIZE;
157 ph10 1003
158     #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
159     static int dee_action = dee_SKIP;
160     #else
161 nigel 87 static int dee_action = dee_READ;
162 ph10 1003 #endif
163    
164 nigel 87 static int DEE_action = DEE_READ;
165     static int error_count = 0;
166     static int filenames = FN_DEFAULT;
167 ph10 1003 static int pcre_options = 0;
168 nigel 87 static int process_options = 0;
169 ph10 685
170     #ifdef SUPPORT_PCREGREP_JIT
171     static int study_options = PCRE_STUDY_JIT_COMPILE;
172     #else
173 ph10 667 static int study_options = 0;
174 ph10 685 #endif
175 nigel 77
176 ph10 561 static unsigned long int match_limit = 0;
177     static unsigned long int match_limit_recursion = 0;
178    
179 nigel 49 static BOOL count_only = FALSE;
180 nigel 87 static BOOL do_colour = FALSE;
181 ph10 280 static BOOL file_offsets = FALSE;
182 nigel 77 static BOOL hyphenpending = FALSE;
183 nigel 49 static BOOL invert = FALSE;
184 ph10 519 static BOOL line_buffered = FALSE;
185 ph10 280 static BOOL line_offsets = FALSE;
186 nigel 77 static BOOL multiline = FALSE;
187 nigel 49 static BOOL number = FALSE;
188 ph10 420 static BOOL omit_zero_count = FALSE;
189 ph10 561 static BOOL resource_error = FALSE;
190 nigel 77 static BOOL quiet = FALSE;
191 ph10 1039 static BOOL show_only_matching = FALSE;
192 nigel 49 static BOOL silent = FALSE;
193 nigel 93 static BOOL utf8 = FALSE;
194 nigel 49
195 ph10 1039 /* Structure for list of --only-matching capturing numbers. */
196    
197     typedef struct omstr {
198     struct omstr *next;
199     int groupnum;
200     } omstr;
201    
202     static omstr *only_matching = NULL;
203     static omstr *only_matching_last = NULL;
204    
205     /* Structure for holding the two variables that describe a number chain. */
206    
207     typedef struct omdatastr {
208     omstr **anchor;
209     omstr **lastptr;
210     } omdatastr;
211    
212     static omdatastr only_matching_data = { &only_matching, &only_matching_last };
213    
214 ph10 1003 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
215    
216     typedef struct fnstr {
217     struct fnstr *next;
218     char *name;
219     } fnstr;
220    
221     static fnstr *exclude_from = NULL;
222     static fnstr *exclude_from_last = NULL;
223     static fnstr *include_from = NULL;
224     static fnstr *include_from_last = NULL;
225    
226     static fnstr *file_lists = NULL;
227     static fnstr *file_lists_last = NULL;
228     static fnstr *pattern_files = NULL;
229     static fnstr *pattern_files_last = NULL;
230    
231     /* Structure for holding the two variables that describe a file name chain. */
232    
233     typedef struct fndatastr {
234     fnstr **anchor;
235     fnstr **lastptr;
236     } fndatastr;
237    
238     static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
239     static fndatastr include_from_data = { &include_from, &include_from_last };
240     static fndatastr file_lists_data = { &file_lists, &file_lists_last };
241     static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
242    
243     /* Structure for pattern and its compiled form; used for matching patterns and
244     also for include/exclude patterns. */
245    
246     typedef struct patstr {
247     struct patstr *next;
248     char *string;
249     pcre *compiled;
250     pcre_extra *hint;
251     } patstr;
252    
253     static patstr *patterns = NULL;
254     static patstr *patterns_last = NULL;
255     static patstr *include_patterns = NULL;
256     static patstr *include_patterns_last = NULL;
257     static patstr *exclude_patterns = NULL;
258     static patstr *exclude_patterns_last = NULL;
259     static patstr *include_dir_patterns = NULL;
260     static patstr *include_dir_patterns_last = NULL;
261     static patstr *exclude_dir_patterns = NULL;
262     static patstr *exclude_dir_patterns_last = NULL;
263    
264     /* Structure holding the two variables that describe a pattern chain. A pointer
265     to such structures is used for each appropriate option. */
266    
267     typedef struct patdatastr {
268     patstr **anchor;
269     patstr **lastptr;
270     } patdatastr;
271    
272     static patdatastr match_patdata = { &patterns, &patterns_last };
273     static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
274     static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
275     static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
276     static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
277    
278     static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
279     &include_dir_patterns, &exclude_dir_patterns };
280    
281     static const char *incexname[4] = { "--include", "--exclude",
282     "--include-dir", "--exclude-dir" };
283    
284 nigel 53 /* Structure for options and list of them */
285 nigel 49
286 ph10 584 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
287 ph10 1039 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
288 nigel 77
289 nigel 53 typedef struct option_item {
290 nigel 77 int type;
291 nigel 53 int one_char;
292 nigel 77 void *dataptr;
293 nigel 67 const char *long_name;
294     const char *help_text;
295 nigel 53 } option_item;
296 nigel 49
297 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
298     used to identify them. */
299    
300 ph10 325 #define N_COLOUR (-1)
301     #define N_EXCLUDE (-2)
302     #define N_EXCLUDE_DIR (-3)
303     #define N_HELP (-4)
304     #define N_INCLUDE (-5)
305     #define N_INCLUDE_DIR (-6)
306     #define N_LABEL (-7)
307     #define N_LOCALE (-8)
308     #define N_NULL (-9)
309     #define N_LOFFSETS (-10)
310     #define N_FOFFSETS (-11)
311 ph10 519 #define N_LBUFFER (-12)
312 ph10 561 #define N_M_LIMIT (-13)
313     #define N_M_LIMIT_REC (-14)
314 ph10 644 #define N_BUFSIZE (-15)
315 ph10 685 #define N_NOJIT (-16)
316 ph10 944 #define N_FILE_LIST (-17)
317 ph10 947 #define N_BINARY_FILES (-18)
318 ph10 1003 #define N_EXCLUDE_FROM (-19)
319     #define N_INCLUDE_FROM (-20)
320 ph10 1039 #define N_OM_SEPARATOR (-21)
321 nigel 87
322 nigel 53 static option_item optionlist[] = {
323 ph10 947 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
324 ph10 584 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
325     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
326 ph10 947 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
327 ph10 584 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
328 ph10 947 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
329 ph10 644 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
330 ph10 584 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
331     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
332     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
333     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
334     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
335     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
336 ph10 1003 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
337 ph10 584 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
338 ph10 1003 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
339     { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
340 ph10 584 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
341     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
342     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
343 ph10 947 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
344 ph10 584 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
345 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
346     { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
347     #else
348     { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
349     #endif
350 ph10 584 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
351     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
352     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
353     { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
354     { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
355     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
356     { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
357     { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
358     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
359     { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
360     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
361 ph10 1039 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
362     { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
363 ph10 584 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
364     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
365 ph10 1003 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
366     { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
367     { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
368     { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
369     { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
370     { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
371 ph10 571
372     /* These two were accidentally implemented with underscores instead of
373     hyphens in the option names. As this was not discovered for several releases,
374     the incorrect versions are left in the table for compatibility. However, the
375     --help function misses out any option that has an underscore in its name. */
376 ph10 579
377 ph10 1003 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
378     { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
379 ph10 571
380 nigel 87 #ifdef JFRIEDL_DEBUG
381     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
382     #endif
383     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
384     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
385     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
386     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
387     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
388     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
389     { OP_NODATA, 0, NULL, NULL, NULL }
390 nigel 53 };
391    
392 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
393     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
394     that the combination of -w and -x has the same effect as -x on its own, so we
395 ph10 1003 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
396     prefix+suffix is 10 characters; if anything longer is added, it must be
397     adjusted. */
398 nigel 53
399 nigel 87 static const char *prefix[] = {
400     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
401    
402     static const char *suffix[] = {
403     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
404    
405 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
406 nigel 87
407 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
408 nigel 87
409 nigel 93 const char utf8_table4[] = {
410     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
411     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
412     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
413     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
414    
415    
416    
417 nigel 53 /*************************************************
418 ph10 1039 * Exit from the program *
419     *************************************************/
420    
421     /* If there has been a resource error, give a suitable message.
422    
423     Argument: the return code
424     Returns: does not return
425     */
426    
427     static void
428     pcregrep_exit(int rc)
429     {
430     if (resource_error)
431     {
432     fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
433     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
434     PCRE_ERROR_JIT_STACKLIMIT);
435     fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
436     }
437     exit(rc);
438     }
439    
440    
441     /*************************************************
442 ph10 1003 * Add item to chain of patterns *
443     *************************************************/
444    
445     /* Used to add an item onto a chain, or just return an unconnected item if the
446     "after" argument is NULL.
447    
448     Arguments:
449     s pattern string to add
450     after if not NULL points to item to insert after
451    
452 ph10 1039 Returns: new pattern block
453 ph10 1003 */
454    
455     static patstr *
456     add_pattern(char *s, patstr *after)
457     {
458     patstr *p = (patstr *)malloc(sizeof(patstr));
459     if (p == NULL)
460     {
461     fprintf(stderr, "pcregrep: malloc failed\n");
462 ph10 1039 pcregrep_exit(2);
463 ph10 1003 }
464     if (strlen(s) > MAXPATLEN)
465     {
466     fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
467     MAXPATLEN);
468     return NULL;
469     }
470     p->next = NULL;
471     p->string = s;
472     p->compiled = NULL;
473     p->hint = NULL;
474    
475     if (after != NULL)
476     {
477     p->next = after->next;
478     after->next = p;
479     }
480     return p;
481     }
482    
483    
484     /*************************************************
485     * Free chain of patterns *
486     *************************************************/
487    
488     /* Used for several chains of patterns.
489    
490     Argument: pointer to start of chain
491     Returns: nothing
492     */
493    
494     static void
495     free_pattern_chain(patstr *pc)
496     {
497     while (pc != NULL)
498     {
499     patstr *p = pc;
500     pc = p->next;
501     if (p->hint != NULL) pcre_free_study(p->hint);
502     if (p->compiled != NULL) pcre_free(p->compiled);
503     free(p);
504     }
505     }
506    
507    
508     /*************************************************
509     * Free chain of file names *
510     *************************************************/
511    
512     /*
513     Argument: pointer to start of chain
514     Returns: nothing
515     */
516    
517     static void
518     free_file_chain(fnstr *fn)
519     {
520     while (fn != NULL)
521     {
522     fnstr *f = fn;
523     fn = f->next;
524     free(f);
525     }
526     }
527    
528    
529     /*************************************************
530 nigel 87 * OS-specific functions *
531 nigel 53 *************************************************/
532    
533     /* These functions are defined so that they can be made system specific,
534 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
535 nigel 53
536    
537     /************* Directory scanning in Unix ***********/
538    
539 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
540 nigel 53 #include <sys/types.h>
541     #include <sys/stat.h>
542     #include <dirent.h>
543    
544     typedef DIR directory_type;
545 ph10 1003 #define FILESEP '/'
546 nigel 53
547 nigel 67 static int
548 nigel 53 isdirectory(char *filename)
549     {
550     struct stat statbuf;
551     if (stat(filename, &statbuf) < 0)
552     return 0; /* In the expectation that opening as a file will fail */
553 ph10 1003 return (statbuf.st_mode & S_IFMT) == S_IFDIR;
554 nigel 53 }
555    
556 nigel 67 static directory_type *
557 nigel 53 opendirectory(char *filename)
558     {
559     return opendir(filename);
560     }
561    
562 nigel 67 static char *
563 nigel 53 readdirectory(directory_type *dir)
564     {
565     for (;;)
566     {
567     struct dirent *dent = readdir(dir);
568     if (dent == NULL) return NULL;
569     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
570     return dent->d_name;
571     }
572 ph10 151 /* Control never reaches here */
573 nigel 53 }
574    
575 nigel 67 static void
576 nigel 53 closedirectory(directory_type *dir)
577     {
578     closedir(dir);
579     }
580    
581    
582 nigel 87 /************* Test for regular file in Unix **********/
583    
584     static int
585     isregfile(char *filename)
586     {
587     struct stat statbuf;
588     if (stat(filename, &statbuf) < 0)
589     return 1; /* In the expectation that opening as a file will fail */
590     return (statbuf.st_mode & S_IFMT) == S_IFREG;
591     }
592    
593    
594 ph10 519 /************* Test for a terminal in Unix **********/
595 nigel 87
596     static BOOL
597     is_stdout_tty(void)
598     {
599     return isatty(fileno(stdout));
600     }
601    
602 ph10 519 static BOOL
603     is_file_tty(FILE *f)
604     {
605     return isatty(fileno(f));
606     }
607 nigel 87
608 ph10 519
609 nigel 63 /************* Directory scanning in Win32 ***********/
610 nigel 53
611 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
612 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
613 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
614     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
615 ph10 558 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
616     undefined when it is indeed undefined. */
617 nigel 53
618 ph10 558 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
619 nigel 63
620     #ifndef STRICT
621     # define STRICT
622     #endif
623     #ifndef WIN32_LEAN_AND_MEAN
624     # define WIN32_LEAN_AND_MEAN
625     #endif
626 ph10 283
627     #include <windows.h>
628    
629 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
630     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
631     #endif
632    
633 nigel 63 typedef struct directory_type
634     {
635     HANDLE handle;
636     BOOL first;
637     WIN32_FIND_DATA data;
638     } directory_type;
639    
640 ph10 1004 #define FILESEP '/'
641 ph10 1003
642 nigel 63 int
643     isdirectory(char *filename)
644     {
645     DWORD attr = GetFileAttributes(filename);
646     if (attr == INVALID_FILE_ATTRIBUTES)
647     return 0;
648 ph10 1003 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
649 nigel 63 }
650    
651     directory_type *
652     opendirectory(char *filename)
653     {
654     size_t len;
655     char *pattern;
656     directory_type *dir;
657     DWORD err;
658     len = strlen(filename);
659 ph10 1003 pattern = (char *)malloc(len + 3);
660     dir = (directory_type *)malloc(sizeof(*dir));
661 nigel 63 if ((pattern == NULL) || (dir == NULL))
662     {
663     fprintf(stderr, "pcregrep: malloc failed\n");
664 ph10 561 pcregrep_exit(2);
665 nigel 63 }
666     memcpy(pattern, filename, len);
667     memcpy(&(pattern[len]), "\\*", 3);
668     dir->handle = FindFirstFile(pattern, &(dir->data));
669     if (dir->handle != INVALID_HANDLE_VALUE)
670     {
671     free(pattern);
672     dir->first = TRUE;
673     return dir;
674     }
675     err = GetLastError();
676     free(pattern);
677     free(dir);
678     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
679     return NULL;
680     }
681    
682     char *
683     readdirectory(directory_type *dir)
684     {
685     for (;;)
686     {
687     if (!dir->first)
688     {
689     if (!FindNextFile(dir->handle, &(dir->data)))
690     return NULL;
691     }
692     else
693     {
694     dir->first = FALSE;
695     }
696     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
697     return dir->data.cFileName;
698     }
699     #ifndef _MSC_VER
700     return NULL; /* Keep compiler happy; never executed */
701     #endif
702     }
703    
704     void
705     closedirectory(directory_type *dir)
706     {
707     FindClose(dir->handle);
708     free(dir);
709     }
710    
711    
712 nigel 87 /************* Test for regular file in Win32 **********/
713    
714     /* I don't know how to do this, or if it can be done; assume all paths are
715     regular if they are not directories. */
716    
717     int isregfile(char *filename)
718     {
719 ph10 283 return !isdirectory(filename);
720 nigel 87 }
721    
722    
723 ph10 519 /************* Test for a terminal in Win32 **********/
724 nigel 87
725     /* I don't know how to do this; assume never */
726    
727     static BOOL
728     is_stdout_tty(void)
729     {
730 ph10 283 return FALSE;
731 nigel 87 }
732    
733 ph10 519 static BOOL
734     is_file_tty(FILE *f)
735     {
736     return FALSE;
737     }
738 nigel 87
739 ph10 519
740 nigel 53 /************* Directory scanning when we can't do it ***********/
741    
742     /* The type is void, and apart from isdirectory(), the functions do nothing. */
743    
744 nigel 63 #else
745    
746 ph10 1005 #define FILESEP 0
747 nigel 53 typedef void directory_type;
748    
749 nigel 87 int isdirectory(char *filename) { return 0; }
750 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
751     char *readdirectory(directory_type *dir) { return (char*)0;}
752 nigel 53 void closedirectory(directory_type *dir) {}
753    
754 nigel 87
755     /************* Test for regular when we can't do it **********/
756    
757     /* Assume all files are regular. */
758    
759     int isregfile(char *filename) { return 1; }
760    
761    
762 ph10 519 /************* Test for a terminal when we can't do it **********/
763 nigel 87
764     static BOOL
765     is_stdout_tty(void)
766     {
767     return FALSE;
768     }
769    
770 ph10 519 static BOOL
771     is_file_tty(FILE *f)
772     {
773     return FALSE;
774     }
775 nigel 87
776 nigel 53 #endif
777    
778    
779    
780 ph10 137 #ifndef HAVE_STRERROR
781 nigel 49 /*************************************************
782     * Provide strerror() for non-ANSI libraries *
783     *************************************************/
784    
785     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
786     in their libraries, but can provide the same facility by this simple
787     alternative function. */
788    
789     extern int sys_nerr;
790     extern char *sys_errlist[];
791    
792     char *
793     strerror(int n)
794     {
795     if (n < 0 || n >= sys_nerr) return "unknown error number";
796     return sys_errlist[n];
797     }
798     #endif /* HAVE_STRERROR */
799    
800    
801    
802     /*************************************************
803 ph10 1039 * Usage function *
804     *************************************************/
805    
806     static int
807     usage(int rc)
808     {
809     option_item *op;
810     fprintf(stderr, "Usage: pcregrep [-");
811     for (op = optionlist; op->one_char != 0; op++)
812     {
813     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
814     }
815     fprintf(stderr, "] [long options] [pattern] [files]\n");
816     fprintf(stderr, "Type `pcregrep --help' for more information and the long "
817     "options.\n");
818     return rc;
819     }
820    
821    
822    
823     /*************************************************
824     * Help function *
825     *************************************************/
826    
827     static void
828     help(void)
829     {
830     option_item *op;
831    
832     printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
833     printf("Search for PATTERN in each FILE or standard input.\n");
834     printf("PATTERN must be present if neither -e nor -f is used.\n");
835     printf("\"-\" can be used as a file name to mean STDIN.\n");
836    
837     #ifdef SUPPORT_LIBZ
838     printf("Files whose names end in .gz are read using zlib.\n");
839     #endif
840    
841     #ifdef SUPPORT_LIBBZ2
842     printf("Files whose names end in .bz2 are read using bzlib2.\n");
843     #endif
844    
845     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
846     printf("Other files and the standard input are read as plain files.\n\n");
847     #else
848     printf("All files are read as plain files, without any interpretation.\n\n");
849     #endif
850    
851     printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
852     printf("Options:\n");
853    
854     for (op = optionlist; op->one_char != 0; op++)
855     {
856     int n;
857     char s[4];
858    
859     /* Two options were accidentally implemented and documented with underscores
860     instead of hyphens in their names, something that was not noticed for quite a
861     few releases. When fixing this, I left the underscored versions in the list
862     in case people were using them. However, we don't want to display them in the
863     help data. There are no other options that contain underscores, and we do not
864     expect ever to implement such options. Therefore, just omit any option that
865     contains an underscore. */
866    
867     if (strchr(op->long_name, '_') != NULL) continue;
868    
869     if (op->one_char > 0 && (op->long_name)[0] == 0)
870     n = 31 - printf(" -%c", op->one_char);
871     else
872     {
873     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
874     else strcpy(s, " ");
875     n = 31 - printf(" %s --%s", s, op->long_name);
876     }
877    
878     if (n < 1) n = 1;
879     printf("%.*s%s\n", n, " ", op->help_text);
880     }
881    
882     printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
883     printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
884     printf("When reading patterns or file names from a file, trailing white\n");
885     printf("space is removed and blank lines are ignored.\n");
886     printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
887    
888     printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
889     printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
890     }
891    
892    
893    
894     /*************************************************
895 ph10 1003 * Test exclude/includes *
896     *************************************************/
897    
898     /* If any exclude pattern matches, the path is excluded. Otherwise, unless
899     there are no includes, the path must match an include pattern.
900    
901     Arguments:
902     path the path to be matched
903     ip the chain of include patterns
904     ep the chain of exclude patterns
905    
906     Returns: TRUE if the path is not excluded
907     */
908    
909     static BOOL
910     test_incexc(char *path, patstr *ip, patstr *ep)
911     {
912     int plen = strlen(path);
913    
914     for (; ep != NULL; ep = ep->next)
915     {
916     if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
917     return FALSE;
918     }
919    
920     if (ip == NULL) return TRUE;
921    
922     for (; ip != NULL; ip = ip->next)
923     {
924     if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
925     return TRUE;
926     }
927    
928     return FALSE;
929     }
930    
931    
932    
933     /*************************************************
934 ph10 1039 * Decode integer argument value *
935     *************************************************/
936    
937     /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
938     because SunOS4 doesn't have it. This is used only for unpicking arguments, so
939     just keep it simple.
940    
941     Arguments:
942     option_data the option data string
943     op the option item (for error messages)
944     longop TRUE if option given in long form
945    
946     Returns: a long integer
947     */
948    
949     static long int
950     decode_number(char *option_data, option_item *op, BOOL longop)
951     {
952     unsigned long int n = 0;
953     char *endptr = option_data;
954     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
955     while (isdigit((unsigned char)(*endptr)))
956     n = n * 10 + (int)(*endptr++ - '0');
957     if (toupper(*endptr) == 'K')
958     {
959     n *= 1024;
960     endptr++;
961     }
962     else if (toupper(*endptr) == 'M')
963     {
964     n *= 1024*1024;
965     endptr++;
966     }
967    
968     if (*endptr != 0) /* Error */
969     {
970     if (longop)
971     {
972     char *equals = strchr(op->long_name, '=');
973     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
974     (int)(equals - op->long_name);
975     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
976     option_data, nlen, op->long_name);
977     }
978     else
979     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
980     option_data, op->one_char);
981     pcregrep_exit(usage(2));
982     }
983    
984     return n;
985     }
986    
987    
988    
989     /*************************************************
990     * Add item to a chain of numbers *
991     *************************************************/
992    
993     /* Used to add an item onto a chain, or just return an unconnected item if the
994     "after" argument is NULL.
995    
996     Arguments:
997     n the number to add
998     after if not NULL points to item to insert after
999    
1000     Returns: new number block
1001     */
1002    
1003     static omstr *
1004     add_number(int n, omstr *after)
1005     {
1006     omstr *om = (omstr *)malloc(sizeof(omstr));
1007    
1008     if (om == NULL)
1009     {
1010     fprintf(stderr, "pcregrep: malloc failed\n");
1011     pcregrep_exit(2);
1012     }
1013     om->next = NULL;
1014     om->groupnum = n;
1015    
1016     if (after != NULL)
1017     {
1018     om->next = after->next;
1019     after->next = om;
1020     }
1021     return om;
1022     }
1023    
1024    
1025    
1026     /*************************************************
1027 ph10 519 * Read one line of input *
1028     *************************************************/
1029    
1030 ph10 535 /* Normally, input is read using fread() into a large buffer, so many lines may
1031     be read at once. However, doing this for tty input means that no output appears
1032 ph10 519 until a lot of input has been typed. Instead, tty input is handled line by
1033     line. We cannot use fgets() for this, because it does not stop at a binary
1034 ph10 535 zero, and therefore there is no way of telling how many characters it has read,
1035 ph10 519 because there may be binary zeros embedded in the data.
1036    
1037     Arguments:
1038     buffer the buffer to read into
1039     length the maximum number of characters to read
1040     f the file
1041 ph10 535
1042 ph10 519 Returns: the number of characters read, zero at end of file
1043 ph10 535 */
1044 ph10 519
1045 ph10 904 static unsigned int
1046 ph10 519 read_one_line(char *buffer, int length, FILE *f)
1047     {
1048     int c;
1049     int yield = 0;
1050     while ((c = fgetc(f)) != EOF)
1051     {
1052     buffer[yield++] = c;
1053 ph10 535 if (c == '\n' || yield >= length) break;
1054     }
1055     return yield;
1056 ph10 519 }
1057    
1058    
1059    
1060     /*************************************************
1061 nigel 93 * Find end of line *
1062     *************************************************/
1063    
1064     /* The length of the endline sequence that is found is set via lenptr. This may
1065     be zero at the very end of the file if there is no line-ending sequence there.
1066    
1067     Arguments:
1068     p current position in line
1069     endptr end of available data
1070     lenptr where to put the length of the eol sequence
1071    
1072 ph10 654 Returns: pointer after the last byte of the line,
1073 ph10 644 including the newline byte(s)
1074 nigel 93 */
1075    
1076     static char *
1077     end_of_line(char *p, char *endptr, int *lenptr)
1078     {
1079     switch(endlinetype)
1080     {
1081     default: /* Just in case */
1082     case EL_LF:
1083     while (p < endptr && *p != '\n') p++;
1084     if (p < endptr)
1085     {
1086     *lenptr = 1;
1087     return p + 1;
1088     }
1089     *lenptr = 0;
1090     return endptr;
1091    
1092     case EL_CR:
1093     while (p < endptr && *p != '\r') p++;
1094     if (p < endptr)
1095     {
1096     *lenptr = 1;
1097     return p + 1;
1098     }
1099     *lenptr = 0;
1100     return endptr;
1101    
1102     case EL_CRLF:
1103     for (;;)
1104     {
1105     while (p < endptr && *p != '\r') p++;
1106     if (++p >= endptr)
1107     {
1108     *lenptr = 0;
1109     return endptr;
1110     }
1111     if (*p == '\n')
1112     {
1113     *lenptr = 2;
1114     return p + 1;
1115     }
1116     }
1117     break;
1118    
1119 ph10 149 case EL_ANYCRLF:
1120     while (p < endptr)
1121     {
1122     int extra = 0;
1123     register int c = *((unsigned char *)p);
1124    
1125     if (utf8 && c >= 0xc0)
1126     {
1127     int gcii, gcss;
1128     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1129     gcss = 6*extra;
1130     c = (c & utf8_table3[extra]) << gcss;
1131     for (gcii = 1; gcii <= extra; gcii++)
1132     {
1133     gcss -= 6;
1134     c |= (p[gcii] & 0x3f) << gcss;
1135     }
1136     }
1137    
1138     p += 1 + extra;
1139    
1140     switch (c)
1141     {
1142 ph10 1033 case '\n':
1143 ph10 149 *lenptr = 1;
1144     return p;
1145    
1146 ph10 1033 case '\r':
1147     if (p < endptr && *p == '\n')
1148 ph10 149 {
1149     *lenptr = 2;
1150     p++;
1151     }
1152     else *lenptr = 1;
1153     return p;
1154 ph10 150
1155 ph10 149 default:
1156     break;
1157     }
1158     } /* End of loop for ANYCRLF case */
1159 ph10 150
1160 ph10 149 *lenptr = 0; /* Must have hit the end */
1161     return endptr;
1162    
1163 nigel 93 case EL_ANY:
1164     while (p < endptr)
1165     {
1166     int extra = 0;
1167     register int c = *((unsigned char *)p);
1168    
1169     if (utf8 && c >= 0xc0)
1170     {
1171     int gcii, gcss;
1172     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1173     gcss = 6*extra;
1174     c = (c & utf8_table3[extra]) << gcss;
1175     for (gcii = 1; gcii <= extra; gcii++)
1176     {
1177     gcss -= 6;
1178     c |= (p[gcii] & 0x3f) << gcss;
1179     }
1180     }
1181    
1182     p += 1 + extra;
1183    
1184     switch (c)
1185     {
1186 ph10 1033 case '\n': /* LF */
1187     case '\v': /* VT */
1188     case '\f': /* FF */
1189 nigel 93 *lenptr = 1;
1190     return p;
1191    
1192 ph10 1033 case '\r': /* CR */
1193     if (p < endptr && *p == '\n')
1194 nigel 93 {
1195     *lenptr = 2;
1196     p++;
1197     }
1198     else *lenptr = 1;
1199     return p;
1200    
1201 ph10 1033 #ifndef EBCDIC
1202     case 0x85: /* Unicode NEL */
1203 nigel 93 *lenptr = utf8? 2 : 1;
1204     return p;
1205    
1206 ph10 1033 case 0x2028: /* Unicode LS */
1207     case 0x2029: /* Unicode PS */
1208 nigel 93 *lenptr = 3;
1209     return p;
1210 ph10 1039 #endif /* Not EBCDIC */
1211 nigel 93
1212     default:
1213     break;
1214     }
1215     } /* End of loop for ANY case */
1216    
1217     *lenptr = 0; /* Must have hit the end */
1218     return endptr;
1219     } /* End of overall switch */
1220     }
1221    
1222    
1223    
1224     /*************************************************
1225     * Find start of previous line *
1226     *************************************************/
1227    
1228     /* This is called when looking back for before lines to print.
1229    
1230     Arguments:
1231     p start of the subsequent line
1232     startptr start of available data
1233    
1234     Returns: pointer to the start of the previous line
1235     */
1236    
1237     static char *
1238     previous_line(char *p, char *startptr)
1239     {
1240     switch(endlinetype)
1241     {
1242     default: /* Just in case */
1243     case EL_LF:
1244     p--;
1245     while (p > startptr && p[-1] != '\n') p--;
1246     return p;
1247    
1248     case EL_CR:
1249     p--;
1250     while (p > startptr && p[-1] != '\n') p--;
1251     return p;
1252    
1253     case EL_CRLF:
1254     for (;;)
1255     {
1256     p -= 2;
1257     while (p > startptr && p[-1] != '\n') p--;
1258     if (p <= startptr + 1 || p[-2] == '\r') return p;
1259     }
1260     return p; /* But control should never get here */
1261    
1262     case EL_ANY:
1263 ph10 150 case EL_ANYCRLF:
1264 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1265     if (utf8) while ((*p & 0xc0) == 0x80) p--;
1266    
1267     while (p > startptr)
1268     {
1269 chpe 1096 register unsigned int c;
1270 nigel 93 char *pp = p - 1;
1271    
1272     if (utf8)
1273     {
1274     int extra = 0;
1275     while ((*pp & 0xc0) == 0x80) pp--;
1276     c = *((unsigned char *)pp);
1277     if (c >= 0xc0)
1278     {
1279     int gcii, gcss;
1280     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1281     gcss = 6*extra;
1282     c = (c & utf8_table3[extra]) << gcss;
1283     for (gcii = 1; gcii <= extra; gcii++)
1284     {
1285     gcss -= 6;
1286     c |= (pp[gcii] & 0x3f) << gcss;
1287     }
1288     }
1289     }
1290     else c = *((unsigned char *)pp);
1291    
1292 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
1293 nigel 93 {
1294 ph10 1033 case '\n': /* LF */
1295     case '\r': /* CR */
1296 ph10 149 return p;
1297 ph10 150
1298 ph10 149 default:
1299     break;
1300 ph10 150 }
1301 ph10 149
1302     else switch (c)
1303     {
1304 ph10 1033 case '\n': /* LF */
1305     case '\v': /* VT */
1306     case '\f': /* FF */
1307     case '\r': /* CR */
1308 ph10 1039 #ifndef EBCDIE
1309 ph10 1033 case 0x85: /* Unicode NEL */
1310     case 0x2028: /* Unicode LS */
1311     case 0x2029: /* Unicode PS */
1312 ph10 1039 #endif /* Not EBCDIC */
1313 nigel 93 return p;
1314    
1315     default:
1316     break;
1317     }
1318    
1319     p = pp; /* Back one character */
1320     } /* End of loop for ANY case */
1321    
1322     return startptr; /* Hit start of data */
1323     } /* End of overall switch */
1324     }
1325    
1326    
1327    
1328    
1329    
1330     /*************************************************
1331 nigel 77 * Print the previous "after" lines *
1332 nigel 49 *************************************************/
1333    
1334 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
1335 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
1336     that a binary zero does not terminate it.
1337 nigel 77
1338     Arguments:
1339     lastmatchnumber the number of the last matching line, plus one
1340     lastmatchrestart where we restarted after the last match
1341     endptr end of available data
1342     printname filename for printing
1343    
1344     Returns: nothing
1345     */
1346    
1347 ph10 1003 static void
1348     do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1349     char *printname)
1350 nigel 77 {
1351     if (after_context > 0 && lastmatchnumber > 0)
1352     {
1353     int count = 0;
1354     while (lastmatchrestart < endptr && count++ < after_context)
1355     {
1356 nigel 93 int ellength;
1357 nigel 77 char *pp = lastmatchrestart;
1358     if (printname != NULL) fprintf(stdout, "%s-", printname);
1359     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1360 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1361 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1362 nigel 93 lastmatchrestart = pp;
1363 nigel 77 }
1364     hyphenpending = TRUE;
1365     }
1366     }
1367    
1368    
1369    
1370     /*************************************************
1371 ph10 378 * Apply patterns to subject till one matches *
1372     *************************************************/
1373    
1374 ph10 392 /* This function is called to run through all patterns, looking for a match. It
1375     is used multiple times for the same subject when colouring is enabled, in order
1376 ph10 378 to find all possible matches.
1377    
1378     Arguments:
1379 ph10 632 matchptr the start of the subject
1380     length the length of the subject to match
1381 ph10 1324 options options for pcre_exec
1382 ph10 632 startoffset where to start matching
1383     offsets the offets vector to fill in
1384     mrc address of where to put the result of pcre_exec()
1385 ph10 392
1386     Returns: TRUE if there was a match
1387 ph10 378 FALSE if there was no match
1388     invert if there was a non-fatal error
1389 ph10 392 */
1390 ph10 378
1391     static BOOL
1392 ph10 1324 match_patterns(char *matchptr, size_t length, unsigned int options,
1393     int startoffset, int *offsets, int *mrc)
1394 ph10 378 {
1395     int i;
1396 ph10 561 size_t slen = length;
1397 ph10 1003 patstr *p = patterns;
1398 ph10 561 const char *msg = "this text:\n\n";
1399 ph10 1003
1400 ph10 561 if (slen > 200)
1401     {
1402     slen = 200;
1403     msg = "text that starts:\n\n";
1404 ph10 579 }
1405 ph10 1003 for (i = 1; p != NULL; p = p->next, i++)
1406 ph10 378 {
1407 ph10 1003 *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1408 ph10 1324 startoffset, options, offsets, OFFSET_SIZE);
1409 ph10 378 if (*mrc >= 0) return TRUE;
1410     if (*mrc == PCRE_ERROR_NOMATCH) continue;
1411 ph10 561 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1412 ph10 1003 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1413 ph10 561 fprintf(stderr, "%s", msg);
1414     FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1415     fprintf(stderr, "\n\n");
1416 ph10 685 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1417     *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1418 ph10 561 resource_error = TRUE;
1419 ph10 378 if (error_count++ > 20)
1420     {
1421 ph10 561 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1422     pcregrep_exit(2);
1423 ph10 378 }
1424     return invert; /* No more matching; don't show the line again */
1425     }
1426    
1427     return FALSE; /* No match, no errors */
1428     }
1429    
1430    
1431    
1432     /*************************************************
1433 nigel 77 * Grep an individual file *
1434     *************************************************/
1435    
1436     /* This is called from grep_or_recurse() below. It uses a buffer that is three
1437 ph10 644 times the value of bufthird. The matching point is never allowed to stray into
1438 nigel 77 the top third of the buffer, thus keeping more of the file available for
1439     context printing or for multiline scanning. For large files, the pointer will
1440     be in the middle third most of the time, so the bottom third is available for
1441     "before" context printing.
1442    
1443     Arguments:
1444 ph10 286 handle the fopened FILE stream for a normal file
1445     the gzFile pointer when reading is via libz
1446     the BZFILE pointer when reading is via libbz2
1447     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1448 ph10 644 filename the file name or NULL (for errors)
1449 nigel 77 printname the file name if it is to be printed for each match
1450     or NULL if the file name is not to be printed
1451     it cannot be NULL if filenames[_nomatch]_only is set
1452    
1453     Returns: 0 if there was at least one match
1454     1 otherwise (no matches)
1455 ph10 654 2 if an overlong line is encountered
1456 ph10 644 3 if there is a read error on a .bz2 file
1457 nigel 77 */
1458    
1459 nigel 49 static int
1460 ph10 644 pcregrep(void *handle, int frtype, char *filename, char *printname)
1461 nigel 49 {
1462     int rc = 1;
1463 nigel 77 int linenumber = 1;
1464     int lastmatchnumber = 0;
1465 nigel 49 int count = 0;
1466 ph10 280 int filepos = 0;
1467 ph10 378 int offsets[OFFSET_SIZE];
1468 nigel 77 char *lastmatchrestart = NULL;
1469 ph10 644 char *ptr = main_buffer;
1470 nigel 77 char *endptr;
1471     size_t bufflength;
1472 ph10 947 BOOL binary = FALSE;
1473 nigel 77 BOOL endhyphenpending = FALSE;
1474 ph10 519 BOOL input_line_buffered = line_buffered;
1475 ph10 286 FILE *in = NULL; /* Ensure initialized */
1476 nigel 49
1477 ph10 286 #ifdef SUPPORT_LIBZ
1478     gzFile ingz = NULL;
1479     #endif
1480 nigel 77
1481 ph10 286 #ifdef SUPPORT_LIBBZ2
1482     BZFILE *inbz2 = NULL;
1483     #endif
1484    
1485    
1486     /* Do the first read into the start of the buffer and set up the pointer to end
1487     of what we have. In the case of libz, a non-zipped .gz file will be read as a
1488     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1489     fail. */
1490    
1491 chpe 1136 (void)frtype;
1492    
1493 ph10 286 #ifdef SUPPORT_LIBZ
1494     if (frtype == FR_LIBZ)
1495     {
1496     ingz = (gzFile)handle;
1497 ph10 644 bufflength = gzread (ingz, main_buffer, bufsize);
1498 ph10 286 }
1499     else
1500     #endif
1501    
1502     #ifdef SUPPORT_LIBBZ2
1503     if (frtype == FR_LIBBZ2)
1504     {
1505     inbz2 = (BZFILE *)handle;
1506 ph10 644 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1507 ph10 286 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1508     } /* without the cast it is unsigned. */
1509     else
1510     #endif
1511    
1512     {
1513     in = (FILE *)handle;
1514 ph10 519 if (is_file_tty(in)) input_line_buffered = TRUE;
1515 ph10 535 bufflength = input_line_buffered?
1516 ph10 644 read_one_line(main_buffer, bufsize, in) :
1517     fread(main_buffer, 1, bufsize, in);
1518 ph10 286 }
1519 ph10 535
1520 ph10 644 endptr = main_buffer + bufflength;
1521 nigel 77
1522 ph10 947 /* Unless binary-files=text, see if we have a binary file. This uses the same
1523 ph10 975 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1524 ph10 947 file. */
1525    
1526     if (binary_files != BIN_TEXT)
1527     {
1528 ph10 975 binary =
1529 ph10 947 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1530     if (binary && binary_files == BIN_NOMATCH) return 1;
1531 ph10 975 }
1532 ph10 947
1533 nigel 77 /* Loop while the current pointer is not at the end of the file. For large
1534     files, endptr will be at the end of the buffer when we are in the middle of the
1535     file, but ptr will never get there, because as soon as it gets over 2/3 of the
1536     way, the buffer is shifted left and re-filled. */
1537    
1538     while (ptr < endptr)
1539 nigel 49 {
1540 ph10 378 int endlinelength;
1541 nigel 87 int mrc = 0;
1542 ph10 654 int startoffset = 0;
1543 ph10 1324 unsigned int options = 0;
1544 ph10 378 BOOL match;
1545 ph10 286 char *matchptr = ptr;
1546 nigel 77 char *t = ptr;
1547     size_t length, linelength;
1548 nigel 49
1549 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
1550     of the subject string to pass to pcre_exec(). In multiline mode, it is the
1551     length remainder of the data in the buffer. Otherwise, it is the length of
1552 ph10 378 the next line, excluding the terminating newline. After matching, we always
1553     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1554     option is used for compiling, so that any match is constrained to be in the
1555     first line. */
1556 nigel 77
1557 nigel 93 t = end_of_line(t, endptr, &endlinelength);
1558     linelength = t - ptr - endlinelength;
1559 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
1560 ph10 654
1561     /* Check to see if the line we are looking at extends right to the very end
1562     of the buffer without a line terminator. This means the line is too long to
1563 ph10 644 handle. */
1564 ph10 654
1565 ph10 644 if (endlinelength == 0 && t == main_buffer + bufsize)
1566     {
1567     fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1568 ph10 646 "pcregrep: check the --buffer-size option\n",
1569 ph10 654 linenumber,
1570 ph10 644 (filename == NULL)? "" : " of file ",
1571     (filename == NULL)? "" : filename);
1572     return 2;
1573 ph10 654 }
1574 nigel 77
1575 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
1576    
1577     #ifdef JFRIEDL_DEBUG
1578     if (jfriedl_XT || jfriedl_XR)
1579     {
1580 zherczeg 1216 # include <sys/time.h>
1581     # include <time.h>
1582 nigel 89 struct timeval start_time, end_time;
1583     struct timezone dummy;
1584 ph10 392 int i;
1585 nigel 89
1586     if (jfriedl_XT)
1587     {
1588     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1589     const char *orig = ptr;
1590     ptr = malloc(newlen + 1);
1591     if (!ptr) {
1592     printf("out of memory");
1593 ph10 561 pcregrep_exit(2);
1594 nigel 89 }
1595     endptr = ptr;
1596     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1597     for (i = 0; i < jfriedl_XT; i++) {
1598     strncpy(endptr, orig, length);
1599     endptr += length;
1600     }
1601     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1602     length = newlen;
1603     }
1604    
1605     if (gettimeofday(&start_time, &dummy) != 0)
1606     perror("bad gettimeofday");
1607    
1608    
1609     for (i = 0; i < jfriedl_XR; i++)
1610 ph10 1003 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1611 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1612 nigel 89
1613     if (gettimeofday(&end_time, &dummy) != 0)
1614     perror("bad gettimeofday");
1615    
1616     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1617     -
1618     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1619    
1620     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1621     return 0;
1622     }
1623     #endif
1624    
1625 ph10 1039 /* We come back here after a match when show_only_matching is set, in order
1626     to find any further matches in the same line. This applies to
1627     --only-matching, --file-offsets, and --line-offsets. */
1628 nigel 89
1629 ph10 286 ONLY_MATCHING_RESTART:
1630    
1631 ph10 392 /* Run through all the patterns until one matches or there is an error other
1632 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1633 ph10 1324 finding subsequent matches when colouring matched lines. After finding one
1634     match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1635     this line. */
1636 ph10 392
1637 ph10 1324 match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1638     options = PCRE_NOTEMPTY;
1639 nigel 77
1640 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1641 nigel 77
1642 nigel 49 if (match != invert)
1643     {
1644 nigel 77 BOOL hyphenprinted = FALSE;
1645    
1646 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1647 nigel 77
1648 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1649    
1650     /* Just count if just counting is wanted. */
1651    
1652 nigel 49 if (count_only) count++;
1653 ph10 975
1654     /* When handling a binary file and binary-files==binary, the "binary"
1655     variable will be set true (it's false in all other cases). In this
1656 ph10 947 situation we just want to output the file name. No need to scan further. */
1657 ph10 975
1658 ph10 947 else if (binary)
1659     {
1660     fprintf(stdout, "Binary file %s matches\n", filename);
1661 ph10 975 return 0;
1662     }
1663 nigel 49
1664 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1665     in the file. */
1666    
1667 ph10 420 else if (filenames == FN_MATCH_ONLY)
1668 nigel 49 {
1669 nigel 77 fprintf(stdout, "%s\n", printname);
1670 nigel 49 return 0;
1671     }
1672    
1673 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1674    
1675 nigel 77 else if (quiet) return 0;
1676 nigel 49
1677 ph10 1039 /* The --only-matching option prints just the substring that matched,
1678     and/or one or more captured portions of it, as long as these strings are
1679     not empty. The --file-offsets and --line-offsets options output offsets for
1680     the matching substring (all three set show_only_matching). None of these
1681     mutually exclusive options prints any context. Afterwards, adjust the start
1682     and then jump back to look for further matches in the same line. If we are
1683     in invert mode, however, nothing is printed and we do not restart - this
1684     could still be useful because the return code is set. */
1685 nigel 87
1686 ph10 1039 else if (show_only_matching)
1687 nigel 87 {
1688 ph10 279 if (!invert)
1689 ph10 286 {
1690 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1691     if (number) fprintf(stdout, "%d:", linenumber);
1692 ph10 1039
1693     /* Handle --line-offsets */
1694    
1695 ph10 280 if (line_offsets)
1696 ph10 565 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1697 ph10 286 offsets[1] - offsets[0]);
1698 ph10 1039
1699     /* Handle --file-offsets */
1700    
1701 ph10 280 else if (file_offsets)
1702 ph10 579 fprintf(stdout, "%d,%d\n",
1703 ph10 565 (int)(filepos + matchptr + offsets[0] - ptr),
1704 ph10 286 offsets[1] - offsets[0]);
1705 ph10 1039
1706     /* Handle --only-matching, which may occur many times */
1707    
1708     else
1709 ph10 377 {
1710 ph10 1039 BOOL printed = FALSE;
1711     omstr *om;
1712 ph10 1221
1713 ph10 1039 for (om = only_matching; om != NULL; om = om->next)
1714 ph10 579 {
1715 ph10 1039 int n = om->groupnum;
1716     if (n < mrc)
1717     {
1718     int plen = offsets[2*n + 1] - offsets[2*n];
1719     if (plen > 0)
1720     {
1721 ph10 1221 if (printed) fprintf(stdout, "%s", om_separator);
1722 ph10 1039 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1723     FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1724     if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1725     printed = TRUE;
1726     }
1727     }
1728 ph10 579 }
1729 ph10 1221
1730 ph10 1039 if (printed || printname != NULL || number) fprintf(stdout, "\n");
1731 ph10 392 }
1732 ph10 1039
1733     /* Prepare to repeat to find the next match */
1734    
1735 ph10 286 match = FALSE;
1736 ph10 564 if (line_buffered) fflush(stdout);
1737 ph10 636 rc = 0; /* Had some success */
1738     startoffset = offsets[1]; /* Restart after the match */
1739 ph10 286 goto ONLY_MATCHING_RESTART;
1740     }
1741 nigel 87 }
1742    
1743     /* This is the default case when none of the above options is set. We print
1744     the matching lines(s), possibly preceded and/or followed by other lines of
1745     context. */
1746    
1747 nigel 49 else
1748     {
1749 nigel 77 /* See if there is a requirement to print some "after" lines from a
1750     previous match. We never print any overlaps. */
1751    
1752     if (after_context > 0 && lastmatchnumber > 0)
1753     {
1754 nigel 93 int ellength;
1755 nigel 77 int linecount = 0;
1756     char *p = lastmatchrestart;
1757    
1758     while (p < ptr && linecount < after_context)
1759     {
1760 nigel 93 p = end_of_line(p, ptr, &ellength);
1761 nigel 77 linecount++;
1762     }
1763    
1764     /* It is important to advance lastmatchrestart during this printing so
1765 nigel 87 that it interacts correctly with any "before" printing below. Print
1766     each line's data using fwrite() in case there are binary zeroes. */
1767 nigel 77
1768     while (lastmatchrestart < p)
1769     {
1770     char *pp = lastmatchrestart;
1771     if (printname != NULL) fprintf(stdout, "%s-", printname);
1772     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1773 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1774 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1775 nigel 93 lastmatchrestart = pp;
1776 nigel 77 }
1777     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1778     }
1779    
1780     /* If there were non-contiguous lines printed above, insert hyphens. */
1781    
1782     if (hyphenpending)
1783     {
1784     fprintf(stdout, "--\n");
1785     hyphenpending = FALSE;
1786     hyphenprinted = TRUE;
1787     }
1788    
1789     /* See if there is a requirement to print some "before" lines for this
1790     match. Again, don't print overlaps. */
1791    
1792     if (before_context > 0)
1793     {
1794     int linecount = 0;
1795     char *p = ptr;
1796    
1797 ph10 644 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1798 nigel 87 linecount < before_context)
1799 nigel 77 {
1800 nigel 87 linecount++;
1801 ph10 644 p = previous_line(p, main_buffer);
1802 nigel 77 }
1803    
1804     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1805     fprintf(stdout, "--\n");
1806    
1807     while (p < ptr)
1808     {
1809 nigel 93 int ellength;
1810 nigel 77 char *pp = p;
1811     if (printname != NULL) fprintf(stdout, "%s-", printname);
1812     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1813 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1814 ph10 515 FWRITE(p, 1, pp - p, stdout);
1815 nigel 93 p = pp;
1816 nigel 77 }
1817     }
1818    
1819     /* Now print the matching line(s); ensure we set hyphenpending at the end
1820 nigel 85 of the file if any context lines are being output. */
1821 nigel 77
1822 nigel 85 if (after_context > 0 || before_context > 0)
1823     endhyphenpending = TRUE;
1824    
1825 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1826 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1827 nigel 77
1828     /* In multiline mode, we want to print to the end of the line in which
1829     the end of the matched string is found, so we adjust linelength and the
1830 ph10 222 line number appropriately, but only when there actually was a match
1831     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1832     the match will always be before the first newline sequence. */
1833 nigel 77
1834 ph10 587 if (multiline & !invert)
1835 nigel 77 {
1836 ph10 587 char *endmatch = ptr + offsets[1];
1837     t = ptr;
1838     while (t < endmatch)
1839 nigel 93 {
1840 ph10 587 t = end_of_line(t, endptr, &endlinelength);
1841     if (t < endmatch) linenumber++; else break;
1842 nigel 93 }
1843 ph10 587 linelength = t - ptr - endlinelength;
1844 nigel 77 }
1845    
1846 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1847     zeroes are treated as just another data character. */
1848    
1849     /* This extra option, for Jeffrey Friedl's debugging requirements,
1850     replaces the matched string, or a specific captured string if it exists,
1851     with X. When this happens, colouring is ignored. */
1852    
1853     #ifdef JFRIEDL_DEBUG
1854     if (S_arg >= 0 && S_arg < mrc)
1855     {
1856     int first = S_arg * 2;
1857     int last = first + 1;
1858 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1859 nigel 87 fprintf(stdout, "X");
1860 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1861 nigel 87 }
1862     else
1863     #endif
1864    
1865 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1866 ph10 585 matches, but not of course if the line is a non-match. */
1867 ph10 589
1868 ph10 585 if (do_colour && !invert)
1869 nigel 87 {
1870 ph10 589 int plength;
1871 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1872 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1873 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1874 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1875 ph10 378 for (;;)
1876     {
1877 ph10 632 startoffset = offsets[1];
1878 ph10 718 if (startoffset >= (int)linelength + endlinelength ||
1879 ph10 1324 !match_patterns(matchptr, length, options, startoffset, offsets,
1880     &mrc))
1881 ph10 632 break;
1882     FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1883 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1884 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1885 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1886     }
1887 ph10 587
1888     /* In multiline mode, we may have already printed the complete line
1889 ph10 589 and its line-ending characters (if they matched the pattern), so there
1890 ph10 587 may be no more to print. */
1891 ph10 589
1892 ph10 836 plength = (int)((linelength + endlinelength) - startoffset);
1893 ph10 636 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1894 nigel 87 }
1895 ph10 392
1896 ph10 378 /* Not colouring; no need to search for further matches */
1897 ph10 392
1898 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1899 nigel 49 }
1900    
1901 ph10 519 /* End of doing what has to be done for a match. If --line-buffered was
1902     given, flush the output. */
1903 nigel 87
1904 ph10 519 if (line_buffered) fflush(stdout);
1905 nigel 77 rc = 0; /* Had some success */
1906    
1907     /* Remember where the last match happened for after_context. We remember
1908     where we are about to restart, and that line's number. */
1909    
1910 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1911 nigel 77 lastmatchnumber = linenumber + 1;
1912 nigel 49 }
1913 nigel 77
1914 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1915     anything to be printed), we have to move on to the end of the match before
1916     proceeding. */
1917    
1918     if (multiline && invert && match)
1919     {
1920     int ellength;
1921     char *endmatch = ptr + offsets[1];
1922     t = ptr;
1923     while (t < endmatch)
1924     {
1925     t = end_of_line(t, endptr, &ellength);
1926     if (t <= endmatch) linenumber++; else break;
1927     }
1928     endmatch = end_of_line(endmatch, endptr, &ellength);
1929     linelength = endmatch - ptr - ellength;
1930     }
1931    
1932 ph10 286 /* Advance to after the newline and increment the line number. The file
1933 ph10 280 offset to the current line is maintained in filepos. */
1934 nigel 77
1935 nigel 93 ptr += linelength + endlinelength;
1936 ph10 530 filepos += (int)(linelength + endlinelength);
1937 nigel 77 linenumber++;
1938 ph10 535
1939     /* If input is line buffered, and the buffer is not yet full, read another
1940 ph10 519 line and add it into the buffer. */
1941 ph10 535
1942 ph10 718 if (input_line_buffered && bufflength < (size_t)bufsize)
1943 ph10 519 {
1944 ph10 836 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1945 ph10 519 bufflength += add;
1946 ph10 535 endptr += add;
1947     }
1948 nigel 77
1949     /* If we haven't yet reached the end of the file (the buffer is full), and
1950     the current point is in the top 1/3 of the buffer, slide the buffer down by
1951     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1952     about to be lost, print them. */
1953    
1954 ph10 718 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1955 nigel 77 {
1956     if (after_context > 0 &&
1957     lastmatchnumber > 0 &&
1958 ph10 644 lastmatchrestart < main_buffer + bufthird)
1959 nigel 77 {
1960     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1961     lastmatchnumber = 0;
1962     }
1963    
1964     /* Now do the shuffle */
1965    
1966 ph10 644 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1967     ptr -= bufthird;
1968 ph10 286
1969     #ifdef SUPPORT_LIBZ
1970     if (frtype == FR_LIBZ)
1971 ph10 644 bufflength = 2*bufthird +
1972     gzread (ingz, main_buffer + 2*bufthird, bufthird);
1973 ph10 286 else
1974     #endif
1975    
1976     #ifdef SUPPORT_LIBBZ2
1977     if (frtype == FR_LIBBZ2)
1978 ph10 644 bufflength = 2*bufthird +
1979     BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1980 ph10 286 else
1981     #endif
1982    
1983 ph10 644 bufflength = 2*bufthird +
1984 ph10 535 (input_line_buffered?
1985 ph10 644 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1986     fread(main_buffer + 2*bufthird, 1, bufthird, in));
1987     endptr = main_buffer + bufflength;
1988 nigel 77
1989     /* Adjust any last match point */
1990    
1991 ph10 644 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1992 nigel 77 }
1993     } /* Loop through the whole file */
1994    
1995     /* End of file; print final "after" lines if wanted; do_after_lines sets
1996     hyphenpending if it prints something. */
1997    
1998 ph10 1039 if (!show_only_matching && !count_only)
1999 nigel 87 {
2000     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2001     hyphenpending |= endhyphenpending;
2002     }
2003 nigel 77
2004     /* Print the file name if we are looking for those without matches and there
2005     were none. If we found a match, we won't have got this far. */
2006    
2007 nigel 87 if (filenames == FN_NOMATCH_ONLY)
2008 nigel 77 {
2009     fprintf(stdout, "%s\n", printname);
2010     return 0;
2011 nigel 49 }
2012    
2013 nigel 77 /* Print the match count if wanted */
2014    
2015 nigel 49 if (count_only)
2016     {
2017 ph10 420 if (count > 0 || !omit_zero_count)
2018 ph10 461 {
2019     if (printname != NULL && filenames != FN_NONE)
2020 ph10 420 fprintf(stdout, "%s:", printname);
2021     fprintf(stdout, "%d\n", count);
2022 ph10 461 }
2023 nigel 49 }
2024    
2025     return rc;
2026     }
2027    
2028    
2029    
2030     /*************************************************
2031 nigel 53 * Grep a file or recurse into a directory *
2032     *************************************************/
2033    
2034 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
2035     recursing; if it's a file, grep it.
2036    
2037     Arguments:
2038     pathname the path to investigate
2039 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
2040 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
2041    
2042 ph10 1003 Returns: -1 the file/directory was skipped
2043     0 if there was at least one match
2044 nigel 77 1 if there were no matches
2045     2 there was some kind of error
2046    
2047     However, file opening failures are suppressed if "silent" is set.
2048     */
2049    
2050 nigel 53 static int
2051 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2052 nigel 53 {
2053     int rc = 1;
2054 ph10 286 int frtype;
2055     void *handle;
2056 ph10 1003 char *lastcomp;
2057 ph10 286 FILE *in = NULL; /* Ensure initialized */
2058 nigel 53
2059 ph10 286 #ifdef SUPPORT_LIBZ
2060     gzFile ingz = NULL;
2061     #endif
2062    
2063     #ifdef SUPPORT_LIBBZ2
2064     BZFILE *inbz2 = NULL;
2065     #endif
2066    
2067 ph10 971 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2068 ph10 879 int pathlen;
2069     #endif
2070    
2071 nigel 77 /* If the file name is "-" we scan stdin */
2072 nigel 53
2073 nigel 77 if (strcmp(pathname, "-") == 0)
2074 nigel 53 {
2075 ph10 644 return pcregrep(stdin, FR_PLAIN, stdin_name,
2076 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2077 nigel 77 stdin_name : NULL);
2078     }
2079    
2080 ph10 1003 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2081     directories, whereas --include and --exclude apply to everything else. The test
2082     is against the final component of the path. */
2083 nigel 87
2084 ph10 1003 lastcomp = strrchr(pathname, FILESEP);
2085     lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2086    
2087     /* If the file is a directory, skip if not recursing or if explicitly excluded.
2088     Otherwise, scan the directory and recurse for each path within it. The scanning
2089     code is localized so it can be made system-specific. */
2090    
2091     if (isdirectory(pathname))
2092 nigel 77 {
2093 ph10 1003 if (dee_action == dee_SKIP ||
2094     !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2095     return -1;
2096    
2097 nigel 87 if (dee_action == dee_RECURSE)
2098 nigel 53 {
2099 nigel 87 char buffer[1024];
2100     char *nextfile;
2101     directory_type *dir = opendirectory(pathname);
2102 nigel 53
2103 nigel 87 if (dir == NULL)
2104     {
2105     if (!silent)
2106     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2107     strerror(errno));
2108     return 2;
2109     }
2110 nigel 77
2111 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
2112     {
2113 ph10 1003 int frc;
2114     sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2115 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2116     if (frc > 1) rc = frc;
2117     else if (frc == 0 && rc == 1) rc = 0;
2118     }
2119    
2120     closedirectory(dir);
2121     return rc;
2122 nigel 53 }
2123     }
2124    
2125 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
2126 ph10 1003 been requested. Otherwise, check for explicit include/exclude. */
2127 nigel 53
2128 ph10 1003 else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2129     !test_incexc(lastcomp, include_patterns, exclude_patterns))
2130     return -1;
2131 nigel 87
2132     /* Control reaches here if we have a regular file, or if we have a directory
2133     and recursion or skipping was not requested, or if we have anything else and
2134     skipping was not requested. The scan proceeds. If this is the first and only
2135     argument at top level, we don't show the file name, unless we are only showing
2136     the file name, or the filename was forced (-H). */
2137    
2138 ph10 971 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2139 ph10 530 pathlen = (int)(strlen(pathname));
2140 ph10 879 #endif
2141 ph10 286
2142     /* Open using zlib if it is supported and the file name ends with .gz. */
2143    
2144     #ifdef SUPPORT_LIBZ
2145     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2146 nigel 53 {
2147 ph10 286 ingz = gzopen(pathname, "rb");
2148     if (ingz == NULL)
2149     {
2150     if (!silent)
2151     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2152     strerror(errno));
2153     return 2;
2154     }
2155     handle = (void *)ingz;
2156     frtype = FR_LIBZ;
2157     }
2158     else
2159     #endif
2160    
2161     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2162    
2163     #ifdef SUPPORT_LIBBZ2
2164     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2165     {
2166     inbz2 = BZ2_bzopen(pathname, "rb");
2167     handle = (void *)inbz2;
2168     frtype = FR_LIBBZ2;
2169     }
2170     else
2171     #endif
2172    
2173     /* Otherwise use plain fopen(). The label is so that we can come back here if
2174     an attempt to read a .bz2 file indicates that it really is a plain file. */
2175    
2176     #ifdef SUPPORT_LIBBZ2
2177     PLAIN_FILE:
2178     #endif
2179     {
2180 ph10 419 in = fopen(pathname, "rb");
2181 ph10 286 handle = (void *)in;
2182     frtype = FR_PLAIN;
2183     }
2184    
2185     /* All the opening methods return errno when they fail. */
2186    
2187     if (handle == NULL)
2188     {
2189 nigel 77 if (!silent)
2190     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2191     strerror(errno));
2192 nigel 53 return 2;
2193     }
2194    
2195 ph10 286 /* Now grep the file */
2196    
2197 ph10 644 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2198 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2199 nigel 77
2200 ph10 286 /* Close in an appropriate manner. */
2201    
2202     #ifdef SUPPORT_LIBZ
2203     if (frtype == FR_LIBZ)
2204     gzclose(ingz);
2205     else
2206     #endif
2207    
2208 ph10 644 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2209 ph10 286 read failed. If the error indicates that the file isn't in fact bzipped, try
2210     again as a normal file. */
2211    
2212     #ifdef SUPPORT_LIBBZ2
2213     if (frtype == FR_LIBBZ2)
2214     {
2215 ph10 644 if (rc == 3)
2216 ph10 286 {
2217     int errnum;
2218     const char *err = BZ2_bzerror(inbz2, &errnum);
2219     if (errnum == BZ_DATA_ERROR_MAGIC)
2220     {
2221     BZ2_bzclose(inbz2);
2222     goto PLAIN_FILE;
2223     }
2224     else if (!silent)
2225     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2226     pathname, err);
2227 ph10 654 rc = 2; /* The normal "something went wrong" code */
2228 ph10 286 }
2229     BZ2_bzclose(inbz2);
2230     }
2231     else
2232     #endif
2233    
2234     /* Normal file close */
2235    
2236 nigel 53 fclose(in);
2237 ph10 286
2238     /* Pass back the yield from pcregrep(). */
2239    
2240 nigel 53 return rc;
2241     }
2242    
2243    
2244    
2245     /*************************************************
2246 nigel 77 * Handle a single-letter, no data option *
2247 nigel 53 *************************************************/
2248    
2249     static int
2250     handle_option(int letter, int options)
2251     {
2252     switch(letter)
2253     {
2254 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
2255 ph10 561 case N_HELP: help(); pcregrep_exit(0);
2256 ph10 685 case N_LBUFFER: line_buffered = TRUE; break;
2257 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
2258 ph10 691 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2259 ph10 947 case 'a': binary_files = BIN_TEXT; break;
2260 nigel 53 case 'c': count_only = TRUE; break;
2261 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
2262     case 'H': filenames = FN_FORCE; break;
2263 ph10 947 case 'I': binary_files = BIN_NOMATCH; break;
2264 nigel 87 case 'h': filenames = FN_NONE; break;
2265 nigel 53 case 'i': options |= PCRE_CASELESS; break;
2266 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2267 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
2268 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2269 nigel 53 case 'n': number = TRUE; break;
2270 ph10 1221
2271 ph10 1039 case 'o':
2272     only_matching_last = add_number(0, only_matching_last);
2273     if (only_matching == NULL) only_matching = only_matching_last;
2274     break;
2275 ph10 1221
2276 nigel 77 case 'q': quiet = TRUE; break;
2277 nigel 87 case 'r': dee_action = dee_RECURSE; break;
2278 nigel 53 case 's': silent = TRUE; break;
2279 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2280 nigel 53 case 'v': invert = TRUE; break;
2281 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
2282     case 'x': process_options |= PO_LINE_MATCH; break;
2283 nigel 53
2284     case 'V':
2285 ph10 1003 fprintf(stdout, "pcregrep version %s\n", pcre_version());
2286 ph10 561 pcregrep_exit(0);
2287 nigel 53 break;
2288    
2289     default:
2290     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2291 ph10 561 pcregrep_exit(usage(2));
2292 nigel 53 }
2293    
2294     return options;
2295     }
2296    
2297    
2298    
2299    
2300     /*************************************************
2301 nigel 87 * Construct printed ordinal *
2302     *************************************************/
2303    
2304     /* This turns a number into "1st", "3rd", etc. */
2305    
2306     static char *
2307     ordin(int n)
2308     {
2309     static char buffer[8];
2310     char *p = buffer;
2311     sprintf(p, "%d", n);
2312     while (*p != 0) p++;
2313     switch (n%10)
2314     {
2315     case 1: strcpy(p, "st"); break;
2316     case 2: strcpy(p, "nd"); break;
2317     case 3: strcpy(p, "rd"); break;
2318     default: strcpy(p, "th"); break;
2319     }
2320     return buffer;
2321     }
2322    
2323    
2324    
2325     /*************************************************
2326     * Compile a single pattern *
2327     *************************************************/
2328    
2329 ph10 1003 /* Do nothing if the pattern has already been compiled. This is the case for
2330     include/exclude patterns read from a file.
2331 nigel 87
2332 ph10 1003 When the -F option has been used, each "pattern" may be a list of strings,
2333     separated by line breaks. They will be matched literally. We split such a
2334     string and compile the first substring, inserting an additional block into the
2335     pattern chain.
2336    
2337 nigel 87 Arguments:
2338 ph10 1003 p points to the pattern block
2339 nigel 87 options the PCRE options
2340 ph10 1003 popts the processing options
2341     fromfile TRUE if the pattern was read from a file
2342     fromtext file name or identifying text (e.g. "include")
2343 nigel 87 count 0 if this is the only command line pattern, or
2344     number of the command line pattern, or
2345     linenumber for a pattern from a file
2346    
2347     Returns: TRUE on success, FALSE after an error
2348     */
2349    
2350     static BOOL
2351 ph10 1003 compile_pattern(patstr *p, int options, int popts, int fromfile,
2352     const char *fromtext, int count)
2353 nigel 87 {
2354 ph10 644 char buffer[PATBUFSIZE];
2355 nigel 87 const char *error;
2356 ph10 1003 char *ps = p->string;
2357     int patlen = strlen(ps);
2358 nigel 87 int errptr;
2359    
2360 ph10 1003 if (p->compiled != NULL) return TRUE;
2361    
2362     if ((popts & PO_FIXED_STRINGS) != 0)
2363 nigel 87 {
2364 ph10 1003 int ellength;
2365     char *eop = ps + patlen;
2366     char *pe = end_of_line(ps, eop, &ellength);
2367 nigel 87
2368 ph10 1003 if (ellength != 0)
2369     {
2370     if (add_pattern(pe, p) == NULL) return FALSE;
2371     patlen = (int)(pe - ps - ellength);
2372     }
2373 ph10 142 }
2374 nigel 87
2375 ph10 1003 sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2376     p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2377     if (p->compiled != NULL) return TRUE;
2378    
2379 nigel 87 /* Handle compile errors */
2380    
2381 ph10 1003 errptr -= (int)strlen(prefix[popts]);
2382     if (errptr > patlen) errptr = patlen;
2383 nigel 87
2384 ph10 1003 if (fromfile)
2385 nigel 87 {
2386 ph10 1003 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2387     "at offset %d: %s\n", count, fromtext, errptr, error);
2388 nigel 87 }
2389     else
2390     {
2391 ph10 1003 if (count == 0)
2392     fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2393     fromtext, errptr, error);
2394     else
2395     fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2396     ordin(count), fromtext, errptr, error);
2397 nigel 87 }
2398    
2399     return FALSE;
2400     }
2401    
2402    
2403    
2404     /*************************************************
2405 ph10 1003 * Read and compile a file of patterns *
2406 nigel 87 *************************************************/
2407    
2408 ph10 1003 /* This is used for --filelist, --include-from, and --exclude-from.
2409 nigel 87
2410     Arguments:
2411 ph10 1003 name the name of the file; "-" is stdin
2412     patptr pointer to the pattern chain anchor
2413     patlastptr pointer to the last pattern pointer
2414     popts the process options to pass to pattern_compile()
2415 nigel 87
2416 ph10 1003 Returns: TRUE if all went well
2417 nigel 87 */
2418    
2419     static BOOL
2420 ph10 1003 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2421 nigel 87 {
2422 ph10 1003 int linenumber = 0;
2423     FILE *f;
2424     char *filename;
2425     char buffer[PATBUFSIZE];
2426    
2427     if (strcmp(name, "-") == 0)
2428 nigel 87 {
2429 ph10 1003 f = stdin;
2430     filename = stdin_name;
2431     }
2432     else
2433     {
2434     f = fopen(name, "r");
2435     if (f == NULL)
2436     {
2437     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2438     return FALSE;
2439     }
2440     filename = name;
2441     }
2442    
2443     while (fgets(buffer, PATBUFSIZE, f) != NULL)
2444     {
2445     char *s = buffer + (int)strlen(buffer);
2446     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2447     *s = 0;
2448     linenumber++;
2449     if (buffer[0] == 0) continue; /* Skip blank lines */
2450    
2451     /* Note: this call to add_pattern() puts a pointer to the local variable
2452     "buffer" into the pattern chain. However, that pointer is used only when
2453     compiling the pattern, which happens immediately below, so we flatten it
2454     afterwards, as a precaution against any later code trying to use it. */
2455    
2456     *patlastptr = add_pattern(buffer, *patlastptr);
2457     if (*patlastptr == NULL) return FALSE;
2458     if (*patptr == NULL) *patptr = *patlastptr;
2459    
2460     /* This loop is needed because compiling a "pattern" when -F is set may add
2461     on additional literal patterns if the original contains a newline. In the
2462     common case, it never will, because fgets() stops at a newline. However,
2463     the -N option can be used to give pcregrep a different newline setting. */
2464    
2465 nigel 87 for(;;)
2466     {
2467 ph10 1003 if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2468     linenumber))
2469 nigel 87 return FALSE;
2470 ph10 1003 (*patlastptr)->string = NULL; /* Insurance */
2471     if ((*patlastptr)->next == NULL) break;
2472     *patlastptr = (*patlastptr)->next;
2473 nigel 87 }
2474     }
2475 ph10 1003
2476     if (f != stdin) fclose(f);
2477     return TRUE;
2478 nigel 87 }
2479    
2480    
2481    
2482     /*************************************************
2483 nigel 49 * Main program *
2484     *************************************************/
2485    
2486 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2487    
2488 nigel 49 int
2489     main(int argc, char **argv)
2490     {
2491 nigel 53 int i, j;
2492 nigel 49 int rc = 1;
2493 nigel 87 BOOL only_one_at_top;
2494 ph10 1003 patstr *cp;
2495     fnstr *fn;
2496 nigel 87 const char *locale_from = "--locale";
2497 nigel 49 const char *error;
2498    
2499 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
2500     pcre_jit_stack *jit_stack = NULL;
2501     #endif
2502    
2503 nigel 93 /* Set the default line ending value from the default in the PCRE library;
2504     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2505 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
2506 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
2507 ph10 391 rather than escapes such as as '\r'. */
2508 nigel 91
2509     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2510     switch(i)
2511     {
2512 ph10 391 default: newline = (char *)"lf"; break;
2513     case 13: newline = (char *)"cr"; break;
2514     case (13 << 8) | 10: newline = (char *)"crlf"; break;
2515     case -1: newline = (char *)"any"; break;
2516     case -2: newline = (char *)"anycrlf"; break;
2517 nigel 91 }
2518    
2519 nigel 49 /* Process the options */
2520    
2521     for (i = 1; i < argc; i++)
2522     {
2523 nigel 77 option_item *op = NULL;
2524     char *option_data = (char *)""; /* default to keep compiler happy */
2525     BOOL longop;
2526     BOOL longopwasequals = FALSE;
2527    
2528 nigel 49 if (argv[i][0] != '-') break;
2529 nigel 53
2530 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2531 nigel 87 but only if we have previously had -e or -f to define the patterns. */
2532 nigel 63
2533 nigel 77 if (argv[i][1] == 0)
2534     {
2535 ph10 1003 if (pattern_files != NULL || patterns != NULL) break;
2536 ph10 561 else pcregrep_exit(usage(2));
2537 nigel 77 }
2538 nigel 63
2539 nigel 77 /* Handle a long name option, or -- to terminate the options */
2540 nigel 53
2541     if (argv[i][1] == '-')
2542 nigel 49 {
2543 nigel 77 char *arg = argv[i] + 2;
2544     char *argequals = strchr(arg, '=');
2545 nigel 53
2546 nigel 77 if (*arg == 0) /* -- terminates options */
2547 nigel 49 {
2548 nigel 77 i++;
2549     break; /* out of the options-handling loop */
2550 nigel 53 }
2551 nigel 49
2552 nigel 77 longop = TRUE;
2553    
2554     /* Some long options have data that follows after =, for example file=name.
2555     Some options have variations in the long name spelling: specifically, we
2556     allow "regexp" because GNU grep allows it, though I personally go along
2557 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2558 ph10 422 These options are entered in the table as "regex(p)". Options can be in
2559     both these categories. */
2560 nigel 77
2561 nigel 53 for (op = optionlist; op->one_char != 0; op++)
2562     {
2563 nigel 77 char *opbra = strchr(op->long_name, '(');
2564     char *equals = strchr(op->long_name, '=');
2565 ph10 461
2566 ph10 422 /* Handle options with only one spelling of the name */
2567 ph10 461
2568 ph10 422 if (opbra == NULL) /* Does not contain '(' */
2569 nigel 53 {
2570 nigel 77 if (equals == NULL) /* Not thing=data case */
2571     {
2572     if (strcmp(arg, op->long_name) == 0) break;
2573     }
2574     else /* Special case xxx=data */
2575     {
2576 ph10 530 int oplen = (int)(equals - op->long_name);
2577 ph10 535 int arglen = (argequals == NULL)?
2578 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2579 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2580     {
2581     option_data = arg + arglen;
2582     if (*option_data == '=')
2583     {
2584     option_data++;
2585     longopwasequals = TRUE;
2586     }
2587     break;
2588     }
2589     }
2590 nigel 53 }
2591 ph10 461
2592 ph10 422 /* Handle options with an alternate spelling of the name */
2593 ph10 461
2594     else
2595 nigel 77 {
2596     char buff1[24];
2597     char buff2[24];
2598 ph10 461
2599 ph10 530 int baselen = (int)(opbra - op->long_name);
2600     int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2601 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
2602 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2603 ph10 461
2604 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
2605 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2606 ph10 461
2607     if (strncmp(arg, buff1, arglen) == 0 ||
2608 ph10 422 strncmp(arg, buff2, arglen) == 0)
2609     {
2610     if (equals != NULL && argequals != NULL)
2611     {
2612 ph10 461 option_data = argequals;
2613 ph10 422 if (*option_data == '=')
2614     {
2615 ph10 461 option_data++;
2616 ph10 422 longopwasequals = TRUE;
2617 ph10 461 }
2618     }
2619 nigel 77 break;
2620 ph10 461 }
2621 nigel 77 }
2622 nigel 53 }
2623 nigel 77
2624 nigel 53 if (op->one_char == 0)
2625     {
2626     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2627 ph10 561 pcregrep_exit(usage(2));
2628 nigel 53 }
2629     }
2630 nigel 49
2631 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2632     are not in the right form for putting in the option table because they use
2633     only one hyphen, yet are more than one character long. By putting them
2634     separately here, they will not get displayed as part of the help() output,
2635     but I don't think Jeffrey will care about that. */
2636    
2637     #ifdef JFRIEDL_DEBUG
2638     else if (strcmp(argv[i], "-pre") == 0) {
2639     jfriedl_prefix = argv[++i];
2640     continue;
2641     } else if (strcmp(argv[i], "-post") == 0) {
2642     jfriedl_postfix = argv[++i];
2643     continue;
2644     } else if (strcmp(argv[i], "-XT") == 0) {
2645     sscanf(argv[++i], "%d", &jfriedl_XT);
2646     continue;
2647     } else if (strcmp(argv[i], "-XR") == 0) {
2648     sscanf(argv[++i], "%d", &jfriedl_XR);
2649     continue;
2650     }
2651     #endif
2652    
2653    
2654 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2655     continue till we hit the last one or one that needs data. */
2656 nigel 53
2657     else
2658     {
2659     char *s = argv[i] + 1;
2660 nigel 77 longop = FALSE;
2661 ph10 1221
2662 nigel 53 while (*s != 0)
2663     {
2664 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2665 ph10 579 {
2666     if (*s == op->one_char) break;
2667 ph10 565 }
2668 nigel 77 if (op->one_char == 0)
2669 nigel 53 {
2670 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2671     *s, argv[i]);
2672 ph10 561 pcregrep_exit(usage(2));
2673 nigel 77 }
2674 ph10 1221
2675 ph10 1039 option_data = s+1;
2676 ph10 1221
2677     /* Break out if this is the last character in the string; it's handled
2678 ph10 1039 below like a single multi-char option. */
2679 ph10 579
2680 ph10 1221 if (*option_data == 0) break;
2681    
2682 ph10 1039 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2683     are used for ones that either have a numerical number or defaults, i.e.
2684     the data is optional. If a digit follows, there is data; if not, carry on
2685 ph10 565 with other single-character options in the same string. */
2686 ph10 579
2687 ph10 1039 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2688 ph10 579 {
2689     if (isdigit((unsigned char)s[1])) break;
2690 nigel 53 }
2691 ph10 1039 else /* Check for an option with data */
2692 ph10 579 {
2693 ph10 1039 if (op->type != OP_NODATA) break;
2694 ph10 579 }
2695    
2696     /* Handle a single-character option with no data, then loop for the
2697 ph10 565 next character in the string. */
2698 ph10 1221
2699 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2700 nigel 49 }
2701     }
2702 ph10 1221
2703 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2704     is NO_DATA, it means that there is no data, and the option might set
2705     something in the PCRE options. */
2706 nigel 77
2707     if (op->type == OP_NODATA)
2708     {
2709 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2710     continue;
2711     }
2712    
2713 ph10 1039 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2714 nigel 87 either has a value or defaults to something. It cannot have data in a
2715 ph10 579 separate item. At the moment, the only such options are "colo(u)r",
2716 ph10 565 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2717 ph10 1221
2718 nigel 87 if (*option_data == 0 &&
2719 ph10 1039 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2720     op->type == OP_OP_NUMBERS))
2721 nigel 87 {
2722     switch (op->one_char)
2723 nigel 77 {
2724 nigel 87 case N_COLOUR:
2725     colour_option = (char *)"auto";
2726     break;
2727 ph10 579
2728 ph10 565 case 'o':
2729 ph10 1039 only_matching_last = add_number(0, only_matching_last);
2730     if (only_matching == NULL) only_matching = only_matching_last;
2731 ph10 579 break;
2732    
2733 nigel 87 #ifdef JFRIEDL_DEBUG
2734     case 'S':
2735     S_arg = 0;
2736     break;
2737     #endif
2738 nigel 77 }
2739 nigel 87 continue;
2740     }
2741 nigel 77
2742 nigel 87 /* Otherwise, find the data string for the option. */
2743    
2744     if (*option_data == 0)
2745     {
2746     if (i >= argc - 1 || longopwasequals)
2747 nigel 77 {
2748 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2749 ph10 561 pcregrep_exit(usage(2));
2750 nigel 87 }
2751     option_data = argv[++i];
2752     }
2753    
2754 ph10 1039 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2755     added to a chain of numbers. */
2756    
2757     if (op->type == OP_OP_NUMBERS)
2758     {
2759     unsigned long int n = decode_number(option_data, op, longop);
2760     omdatastr *omd = (omdatastr *)op->dataptr;
2761     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2762     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2763     }
2764    
2765 ph10 1003 /* If the option type is OP_PATLIST, it's the -e option, or one of the
2766     include/exclude options, which can be called multiple times to create lists
2767     of patterns. */
2768 ph10 975
2769 ph10 1039 else if (op->type == OP_PATLIST)
2770     {
2771     patdatastr *pd = (patdatastr *)op->dataptr;
2772     *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2773     if (*(pd->lastptr) == NULL) goto EXIT2;
2774     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2775     }
2776 ph10 1003
2777     /* If the option type is OP_FILELIST, it's one of the options that names a
2778     file. */
2779    
2780     else if (op->type == OP_FILELIST)
2781 nigel 87 {
2782 ph10 1003 fndatastr *fd = (fndatastr *)op->dataptr;
2783     fn = (fnstr *)malloc(sizeof(fnstr));
2784     if (fn == NULL)
2785 nigel 87 {
2786 ph10 1003 fprintf(stderr, "pcregrep: malloc failed\n");
2787     goto EXIT2;
2788 nigel 87 }
2789 ph10 1003 fn->next = NULL;
2790     fn->name = option_data;
2791     if (*(fd->anchor) == NULL)
2792     *(fd->anchor) = fn;
2793     else
2794     (*(fd->lastptr))->next = fn;
2795     *(fd->lastptr) = fn;
2796 nigel 87 }
2797 ph10 975
2798 ph10 947 /* Handle OP_BINARY_FILES */
2799 ph10 975
2800 ph10 947 else if (op->type == OP_BINFILES)
2801     {
2802     if (strcmp(option_data, "binary") == 0)
2803     binary_files = BIN_BINARY;
2804     else if (strcmp(option_data, "without-match") == 0)
2805     binary_files = BIN_NOMATCH;
2806     else if (strcmp(option_data, "text") == 0)
2807     binary_files = BIN_TEXT;
2808     else
2809     {
2810 ph10 975 fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2811     option_data);
2812 ph10 947 pcregrep_exit(usage(2));
2813 ph10 975 }
2814     }
2815 nigel 87
2816 ph10 1039 /* Otherwise, deal with a single string or numeric data value. */
2817 nigel 87
2818 ph10 584 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2819     op->type != OP_OP_NUMBER)
2820 nigel 87 {
2821     *((char **)op->dataptr) = option_data;
2822     }
2823     else
2824     {
2825 ph10 1039 unsigned long int n = decode_number(option_data, op, longop);
2826     if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2827     else *((int *)op->dataptr) = n;
2828 nigel 77 }
2829 nigel 49 }
2830    
2831 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2832     for -A and -B. */
2833    
2834     if (both_context > 0)
2835     {
2836     if (after_context == 0) after_context = both_context;
2837     if (before_context == 0) before_context = both_context;
2838     }
2839 ph10 286
2840     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2841 ph10 1039 However, all three set show_only_matching because they display, each in their
2842     own way, only the data that has matched. */
2843 nigel 77
2844 ph10 1039 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2845 ph10 286 (file_offsets && line_offsets))
2846 ph10 280 {
2847     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2848     "and/or --line-offsets\n");
2849 ph10 561 pcregrep_exit(usage(2));
2850 ph10 280 }
2851    
2852 ph10 1039 if (only_matching != NULL || file_offsets || line_offsets)
2853     show_only_matching = TRUE;
2854 ph10 286
2855 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2856     LC_ALL environment variable is set, and if so, use it. */
2857 nigel 49
2858 nigel 87 if (locale == NULL)
2859 nigel 53 {
2860 nigel 87 locale = getenv("LC_ALL");
2861     locale_from = "LCC_ALL";
2862 nigel 53 }
2863 nigel 49
2864 nigel 87 if (locale == NULL)
2865     {
2866     locale = getenv("LC_CTYPE");
2867     locale_from = "LC_CTYPE";
2868     }
2869 nigel 49
2870 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2871     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2872    
2873     if (locale != NULL)
2874 nigel 49 {
2875 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2876 nigel 53 {
2877 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2878     locale, locale_from);
2879 nigel 53 return 2;
2880     }
2881 nigel 87 pcretables = pcre_maketables();
2882     }
2883 nigel 77
2884 nigel 87 /* Sort out colouring */
2885    
2886     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2887     {
2888     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2889     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2890     else
2891 nigel 53 {
2892 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2893     colour_option);
2894     return 2;
2895 nigel 77 }
2896 nigel 87 if (do_colour)
2897 nigel 77 {
2898 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2899     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2900     if (cs != NULL) colour_string = cs;
2901 nigel 77 }
2902 nigel 87 }
2903 ph10 535
2904 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2905    
2906     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2907     {
2908     pcre_options |= PCRE_NEWLINE_CR;
2909 nigel 93 endlinetype = EL_CR;
2910 nigel 91 }
2911     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2912     {
2913     pcre_options |= PCRE_NEWLINE_LF;
2914 nigel 93 endlinetype = EL_LF;
2915 nigel 91 }
2916     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2917     {
2918     pcre_options |= PCRE_NEWLINE_CRLF;
2919 nigel 93 endlinetype = EL_CRLF;
2920 nigel 91 }
2921 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2922     {
2923     pcre_options |= PCRE_NEWLINE_ANY;
2924     endlinetype = EL_ANY;
2925     }
2926 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2927     {
2928     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2929     endlinetype = EL_ANYCRLF;
2930     }
2931 nigel 91 else
2932     {
2933     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2934     return 2;
2935     }
2936    
2937 nigel 87 /* Interpret the text values for -d and -D */
2938    
2939     if (dee_option != NULL)
2940     {
2941     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2942     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2943     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2944     else
2945 nigel 77 {
2946 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2947     return 2;
2948 nigel 53 }
2949 nigel 49 }
2950    
2951 nigel 87 if (DEE_option != NULL)
2952     {
2953     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2954     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2955     else
2956     {
2957     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2958     return 2;
2959     }
2960     }
2961 nigel 49
2962 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2963 nigel 87
2964     #ifdef JFRIEDL_DEBUG
2965     if (S_arg > 9)
2966 nigel 49 {
2967 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2968     return 2;
2969     }
2970 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2971     {
2972     if (jfriedl_XT == 0) jfriedl_XT = 1;
2973     if (jfriedl_XR == 0) jfriedl_XR = 1;
2974     }
2975 nigel 87 #endif
2976 nigel 77
2977 ph10 1003 /* Get memory for the main buffer. */
2978 nigel 87
2979 ph10 644 bufsize = 3*bufthird;
2980     main_buffer = (char *)malloc(bufsize);
2981 nigel 87
2982 ph10 1003 if (main_buffer == NULL)
2983 nigel 87 {
2984     fprintf(stderr, "pcregrep: malloc failed\n");
2985 ph10 123 goto EXIT2;
2986 nigel 87 }
2987    
2988 ph10 1003 /* If no patterns were provided by -e, and there are no files provided by -f,
2989 nigel 87 the first argument is the one and only pattern, and it must exist. */
2990    
2991 ph10 1003 if (patterns == NULL && pattern_files == NULL)
2992 nigel 87 {
2993 nigel 63 if (i >= argc) return usage(2);
2994 ph10 1003 patterns = patterns_last = add_pattern(argv[i++], NULL);
2995     if (patterns == NULL) goto EXIT2;
2996 nigel 87 }
2997 nigel 77
2998 nigel 87 /* Compile the patterns that were provided on the command line, either by
2999 ph10 1003 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3000     after all the command-line options are read so that we know which PCRE options
3001     to use. When -F is used, compile_pattern() may add another block into the
3002     chain, so we must not access the next pointer till after the compile. */
3003 nigel 87
3004 ph10 1003 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3005 nigel 87 {
3006 ph10 1003 if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3007     (j == 1 && patterns->next == NULL)? 0 : j))
3008 ph10 123 goto EXIT2;
3009 nigel 87 }
3010    
3011 ph10 1003 /* Read and compile the regular expressions that are provided in files. */
3012 nigel 87
3013 ph10 1003 for (fn = pattern_files; fn != NULL; fn = fn->next)
3014 nigel 87 {
3015 ph10 1003 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3016     goto EXIT2;
3017 ph10 1004 }
3018 nigel 87
3019 ph10 1039 /* Study the regular expressions, as we will be running them many times. If an
3020 ph10 1035 extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3021     returned, even if studying produces no data. */
3022 nigel 53
3023 ph10 1035 if (match_limit > 0 || match_limit_recursion > 0)
3024     study_options |= PCRE_STUDY_EXTRA_NEEDED;
3025    
3026     /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3027    
3028 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
3029     if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3030     jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3031 ph10 691 #endif
3032    
3033 ph10 1003 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3034 nigel 53 {
3035 ph10 1003 cp->hint = pcre_study(cp->compiled, study_options, &error);
3036 nigel 53 if (error != NULL)
3037     {
3038     char s[16];
3039 ph10 1003 if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3040 nigel 53 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3041 ph10 121 goto EXIT2;
3042 nigel 53 }
3043 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
3044 ph10 1003 if (jit_stack != NULL && cp->hint != NULL)
3045     pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3046 ph10 685 #endif
3047 nigel 53 }
3048 ph10 579
3049 ph10 561 /* If --match-limit or --recursion-limit was set, put the value(s) into the
3050 ph10 1039 pcre_extra block for each pattern. There will always be an extra block because
3051 ph10 1035 of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3052 nigel 53
3053 ph10 1035 for (cp = patterns; cp != NULL; cp = cp->next)
3054 ph10 561 {
3055 ph10 1035 if (match_limit > 0)
3056 ph10 561 {
3057 ph10 1035 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3058     cp->hint->match_limit = match_limit;
3059 ph10 561 }
3060 ph10 1039
3061 ph10 1035 if (match_limit_recursion > 0)
3062     {
3063     cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3064     cp->hint->match_limit_recursion = match_limit_recursion;
3065     }
3066 ph10 579 }
3067 ph10 561
3068 ph10 1003 /* If there are include or exclude patterns read from the command line, compile
3069     them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3070     0. */
3071 nigel 77
3072 ph10 1003 for (j = 0; j < 4; j++)
3073 nigel 77 {
3074 ph10 1003 int k;
3075     for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3076 nigel 77 {
3077 ph10 1003 if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3078     (k == 1 && cp->next == NULL)? 0 : k))
3079     goto EXIT2;
3080 nigel 77 }
3081     }
3082    
3083 ph10 1003 /* Read and compile include/exclude patterns from files. */
3084    
3085     for (fn = include_from; fn != NULL; fn = fn->next)
3086 nigel 77 {
3087 ph10 1003 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3088 ph10 121 goto EXIT2;
3089 nigel 77 }
3090    
3091 ph10 1003 for (fn = exclude_from; fn != NULL; fn = fn->next)
3092 ph10 325 {
3093 ph10 1003 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3094 ph10 325 goto EXIT2;
3095     }
3096    
3097 ph10 1003 /* If there are no files that contain lists of files to search, and there are
3098     no file arguments, search stdin, and then exit. */
3099    
3100     if (file_lists == NULL && i >= argc)
3101 ph10 325 {
3102 ph10 1003 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3103     (filenames > FN_DEFAULT)? stdin_name : NULL);
3104     goto EXIT;
3105 ph10 325 }
3106 ph10 975
3107 ph10 1003 /* If any files that contains a list of files to search have been specified,
3108     read them line by line and search the given files. */
3109 ph10 325
3110 ph10 1003 for (fn = file_lists; fn != NULL; fn = fn->next)
3111 ph10 944 {
3112     char buffer[PATBUFSIZE];
3113     FILE *fl;
3114 ph10 1003 if (strcmp(fn->name, "-") == 0) fl = stdin; else
3115 ph10 975 {
3116 ph10 1003 fl = fopen(fn->name, "rb");
3117 ph10 944 if (fl == NULL)
3118     {
3119 ph10 1003 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3120 ph10 944 strerror(errno));
3121     goto EXIT2;
3122 ph10 975 }
3123     }
3124 ph10 944 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3125     {
3126     int frc;
3127     char *end = buffer + (int)strlen(buffer);
3128     while (end > buffer && isspace(end[-1])) end--;
3129 ph10 975 *end = 0;
3130     if (*buffer != 0)
3131     {
3132     frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3133 ph10 944 if (frc > 1) rc = frc;
3134 ph10 975 else if (frc == 0 && rc == 1) rc = 0;
3135     }
3136     }
3137 ph10 1003 if (fl != stdin) fclose(fl);
3138 ph10 975 }
3139 nigel 49
3140 ph10 1003 /* After handling file-list, work through remaining arguments. Pass in the fact
3141     that there is only one argument at top level - this suppresses the file name if
3142     the argument is not a directory and filenames are not otherwise forced. */
3143 ph10 944
3144 ph10 1003 only_one_at_top = i == argc - 1 && file_lists == NULL;
3145 nigel 49
3146     for (; i < argc; i++)
3147     {
3148 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3149     only_one_at_top);
3150 nigel 77 if (frc > 1) rc = frc;
3151     else if (frc == 0 && rc == 1) rc = 0;
3152 nigel 49 }
3153    
3154 ph10 121 EXIT:
3155 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
3156     if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3157     #endif
3158 ph10 1003
3159 ph10 644 if (main_buffer != NULL) free(main_buffer);
3160 ph10 1003
3161     free_pattern_chain(patterns);
3162     free_pattern_chain(include_patterns);
3163     free_pattern_chain(include_dir_patterns);
3164     free_pattern_chain(exclude_patterns);
3165     free_pattern_chain(exclude_dir_patterns);
3166    
3167     free_file_chain(exclude_from);
3168     free_file_chain(include_from);
3169     free_file_chain(pattern_files);
3170     free_file_chain(file_lists);
3171    
3172 ph10 1039 while (only_matching != NULL)
3173     {
3174     omstr *this = only_matching;
3175     only_matching = this->next;
3176     free(this);
3177     }
3178    
3179 ph10 561 pcregrep_exit(rc);
3180 ph10 121
3181     EXIT2:
3182     rc = 2;
3183     goto EXIT;
3184 nigel 49 }
3185    
3186 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12