/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1467 - (show annotations) (download)
Thu Apr 3 16:51:41 2014 UTC (2 weeks, 3 days ago) by ph10
File MIME type: text/plain
File size: 95008 byte(s)
Two minor changes to avoid compiler warnings.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On Unix-like, Windows, and native z/OS systems it can
7 recurse into directories, and in z/OS it can handle PDS files.
8
9 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
10 additional header is required. That header is not included in the main PCRE
11 distribution because other apparatus is needed to compile pcregrep for z/OS.
12 The header can be found in the special z/OS distribution, which is available
13 from www.zaconsultants.net or from www.cbttape.org.
14
15 Copyright (c) 1997-2014 University of Cambridge
16
17 -----------------------------------------------------------------------------
18 Redistribution and use in source and binary forms, with or without
19 modification, are permitted provided that the following conditions are met:
20
21 * Redistributions of source code must retain the above copyright notice,
22 this list of conditions and the following disclaimer.
23
24 * Redistributions in binary form must reproduce the above copyright
25 notice, this list of conditions and the following disclaimer in the
26 documentation and/or other materials provided with the distribution.
27
28 * Neither the name of the University of Cambridge nor the names of its
29 contributors may be used to endorse or promote products derived from
30 this software without specific prior written permission.
31
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
36 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
42 POSSIBILITY OF SUCH DAMAGE.
43 -----------------------------------------------------------------------------
44 */
45
46 #ifdef HAVE_CONFIG_H
47 #include "config.h"
48 #endif
49
50 #include <ctype.h>
51 #include <locale.h>
52 #include <stdio.h>
53 #include <string.h>
54 #include <stdlib.h>
55 #include <errno.h>
56
57 #include <sys/types.h>
58 #include <sys/stat.h>
59
60 #ifdef HAVE_UNISTD_H
61 #include <unistd.h>
62 #endif
63
64 #ifdef SUPPORT_LIBZ
65 #include <zlib.h>
66 #endif
67
68 #ifdef SUPPORT_LIBBZ2
69 #include <bzlib.h>
70 #endif
71
72 #include "pcre.h"
73
74 #define FALSE 0
75 #define TRUE 1
76
77 typedef int BOOL;
78
79 #define OFFSET_SIZE 99
80
81 #if BUFSIZ > 8192
82 #define MAXPATLEN BUFSIZ
83 #else
84 #define MAXPATLEN 8192
85 #endif
86
87 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
88
89 /* Values for the "filenames" variable, which specifies options for file name
90 output. The order is important; it is assumed that a file name is wanted for
91 all values greater than FN_DEFAULT. */
92
93 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
94
95 /* File reading styles */
96
97 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
98
99 /* Actions for the -d and -D options */
100
101 enum { dee_READ, dee_SKIP, dee_RECURSE };
102 enum { DEE_READ, DEE_SKIP };
103
104 /* Actions for special processing options (flag bits) */
105
106 #define PO_WORD_MATCH 0x0001
107 #define PO_LINE_MATCH 0x0002
108 #define PO_FIXED_STRINGS 0x0004
109
110 /* Line ending types */
111
112 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
113
114 /* Binary file options */
115
116 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
117
118 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
119 environments), a warning is issued if the value of fwrite() is ignored.
120 Unfortunately, casting to (void) does not suppress the warning. To get round
121 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
122 apply to fprintf(). */
123
124 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
125
126
127
128 /*************************************************
129 * Global variables *
130 *************************************************/
131
132 /* Jeffrey Friedl has some debugging requirements that are not part of the
133 regular code. */
134
135 #ifdef JFRIEDL_DEBUG
136 static int S_arg = -1;
137 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
138 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
139 static const char *jfriedl_prefix = "";
140 static const char *jfriedl_postfix = "";
141 #endif
142
143 static int endlinetype;
144
145 static char *colour_string = (char *)"1;31";
146 static char *colour_option = NULL;
147 static char *dee_option = NULL;
148 static char *DEE_option = NULL;
149 static char *locale = NULL;
150 static char *main_buffer = NULL;
151 static char *newline = NULL;
152 static char *om_separator = (char *)"";
153 static char *stdin_name = (char *)"(standard input)";
154
155 static const unsigned char *pcretables = NULL;
156
157 static int after_context = 0;
158 static int before_context = 0;
159 static int binary_files = BIN_BINARY;
160 static int both_context = 0;
161 static int bufthird = PCREGREP_BUFSIZE;
162 static int bufsize = 3*PCREGREP_BUFSIZE;
163
164 #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
165 static int dee_action = dee_SKIP;
166 #else
167 static int dee_action = dee_READ;
168 #endif
169
170 static int DEE_action = DEE_READ;
171 static int error_count = 0;
172 static int filenames = FN_DEFAULT;
173 static int pcre_options = 0;
174 static int process_options = 0;
175
176 #ifdef SUPPORT_PCREGREP_JIT
177 static int study_options = PCRE_STUDY_JIT_COMPILE;
178 #else
179 static int study_options = 0;
180 #endif
181
182 static unsigned long int match_limit = 0;
183 static unsigned long int match_limit_recursion = 0;
184
185 static BOOL count_only = FALSE;
186 static BOOL do_colour = FALSE;
187 static BOOL file_offsets = FALSE;
188 static BOOL hyphenpending = FALSE;
189 static BOOL invert = FALSE;
190 static BOOL line_buffered = FALSE;
191 static BOOL line_offsets = FALSE;
192 static BOOL multiline = FALSE;
193 static BOOL number = FALSE;
194 static BOOL omit_zero_count = FALSE;
195 static BOOL resource_error = FALSE;
196 static BOOL quiet = FALSE;
197 static BOOL show_only_matching = FALSE;
198 static BOOL silent = FALSE;
199 static BOOL utf8 = FALSE;
200
201 /* Structure for list of --only-matching capturing numbers. */
202
203 typedef struct omstr {
204 struct omstr *next;
205 int groupnum;
206 } omstr;
207
208 static omstr *only_matching = NULL;
209 static omstr *only_matching_last = NULL;
210
211 /* Structure for holding the two variables that describe a number chain. */
212
213 typedef struct omdatastr {
214 omstr **anchor;
215 omstr **lastptr;
216 } omdatastr;
217
218 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
219
220 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
221
222 typedef struct fnstr {
223 struct fnstr *next;
224 char *name;
225 } fnstr;
226
227 static fnstr *exclude_from = NULL;
228 static fnstr *exclude_from_last = NULL;
229 static fnstr *include_from = NULL;
230 static fnstr *include_from_last = NULL;
231
232 static fnstr *file_lists = NULL;
233 static fnstr *file_lists_last = NULL;
234 static fnstr *pattern_files = NULL;
235 static fnstr *pattern_files_last = NULL;
236
237 /* Structure for holding the two variables that describe a file name chain. */
238
239 typedef struct fndatastr {
240 fnstr **anchor;
241 fnstr **lastptr;
242 } fndatastr;
243
244 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
245 static fndatastr include_from_data = { &include_from, &include_from_last };
246 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
247 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
248
249 /* Structure for pattern and its compiled form; used for matching patterns and
250 also for include/exclude patterns. */
251
252 typedef struct patstr {
253 struct patstr *next;
254 char *string;
255 pcre *compiled;
256 pcre_extra *hint;
257 } patstr;
258
259 static patstr *patterns = NULL;
260 static patstr *patterns_last = NULL;
261 static patstr *include_patterns = NULL;
262 static patstr *include_patterns_last = NULL;
263 static patstr *exclude_patterns = NULL;
264 static patstr *exclude_patterns_last = NULL;
265 static patstr *include_dir_patterns = NULL;
266 static patstr *include_dir_patterns_last = NULL;
267 static patstr *exclude_dir_patterns = NULL;
268 static patstr *exclude_dir_patterns_last = NULL;
269
270 /* Structure holding the two variables that describe a pattern chain. A pointer
271 to such structures is used for each appropriate option. */
272
273 typedef struct patdatastr {
274 patstr **anchor;
275 patstr **lastptr;
276 } patdatastr;
277
278 static patdatastr match_patdata = { &patterns, &patterns_last };
279 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
280 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
281 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
282 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
283
284 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
285 &include_dir_patterns, &exclude_dir_patterns };
286
287 static const char *incexname[4] = { "--include", "--exclude",
288 "--include-dir", "--exclude-dir" };
289
290 /* Structure for options and list of them */
291
292 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
293 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
294
295 typedef struct option_item {
296 int type;
297 int one_char;
298 void *dataptr;
299 const char *long_name;
300 const char *help_text;
301 } option_item;
302
303 /* Options without a single-letter equivalent get a negative value. This can be
304 used to identify them. */
305
306 #define N_COLOUR (-1)
307 #define N_EXCLUDE (-2)
308 #define N_EXCLUDE_DIR (-3)
309 #define N_HELP (-4)
310 #define N_INCLUDE (-5)
311 #define N_INCLUDE_DIR (-6)
312 #define N_LABEL (-7)
313 #define N_LOCALE (-8)
314 #define N_NULL (-9)
315 #define N_LOFFSETS (-10)
316 #define N_FOFFSETS (-11)
317 #define N_LBUFFER (-12)
318 #define N_M_LIMIT (-13)
319 #define N_M_LIMIT_REC (-14)
320 #define N_BUFSIZE (-15)
321 #define N_NOJIT (-16)
322 #define N_FILE_LIST (-17)
323 #define N_BINARY_FILES (-18)
324 #define N_EXCLUDE_FROM (-19)
325 #define N_INCLUDE_FROM (-20)
326 #define N_OM_SEPARATOR (-21)
327
328 static option_item optionlist[] = {
329 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
330 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
331 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
332 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
333 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
334 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
335 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
336 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
337 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
338 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
339 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
340 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
341 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
342 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
343 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
344 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
345 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
346 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
347 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
348 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
349 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
350 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
351 #ifdef SUPPORT_PCREGREP_JIT
352 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
353 #else
354 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
355 #endif
356 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
357 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
358 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
359 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
360 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
361 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
362 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
363 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
364 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
365 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
366 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
367 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
368 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
369 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
370 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
371 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
372 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
373 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
374 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
375 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
376 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
377
378 /* These two were accidentally implemented with underscores instead of
379 hyphens in the option names. As this was not discovered for several releases,
380 the incorrect versions are left in the table for compatibility. However, the
381 --help function misses out any option that has an underscore in its name. */
382
383 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
384 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
385
386 #ifdef JFRIEDL_DEBUG
387 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
388 #endif
389 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
390 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
391 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
392 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
393 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
394 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
395 { OP_NODATA, 0, NULL, NULL, NULL }
396 };
397
398 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
399 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
400 that the combination of -w and -x has the same effect as -x on its own, so we
401 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
402 prefix+suffix is 10 characters; if anything longer is added, it must be
403 adjusted. */
404
405 static const char *prefix[] = {
406 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
407
408 static const char *suffix[] = {
409 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
410
411 /* UTF-8 tables - used only when the newline setting is "any". */
412
413 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
414
415 const char utf8_table4[] = {
416 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
417 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
418 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
419 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
420
421
422
423 /*************************************************
424 * Exit from the program *
425 *************************************************/
426
427 /* If there has been a resource error, give a suitable message.
428
429 Argument: the return code
430 Returns: does not return
431 */
432
433 static void
434 pcregrep_exit(int rc)
435 {
436 if (resource_error)
437 {
438 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
439 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
440 PCRE_ERROR_JIT_STACKLIMIT);
441 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
442 }
443 exit(rc);
444 }
445
446
447 /*************************************************
448 * Add item to chain of patterns *
449 *************************************************/
450
451 /* Used to add an item onto a chain, or just return an unconnected item if the
452 "after" argument is NULL.
453
454 Arguments:
455 s pattern string to add
456 after if not NULL points to item to insert after
457
458 Returns: new pattern block
459 */
460
461 static patstr *
462 add_pattern(char *s, patstr *after)
463 {
464 patstr *p = (patstr *)malloc(sizeof(patstr));
465 if (p == NULL)
466 {
467 fprintf(stderr, "pcregrep: malloc failed\n");
468 pcregrep_exit(2);
469 }
470 if (strlen(s) > MAXPATLEN)
471 {
472 fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
473 MAXPATLEN);
474 return NULL;
475 }
476 p->next = NULL;
477 p->string = s;
478 p->compiled = NULL;
479 p->hint = NULL;
480
481 if (after != NULL)
482 {
483 p->next = after->next;
484 after->next = p;
485 }
486 return p;
487 }
488
489
490 /*************************************************
491 * Free chain of patterns *
492 *************************************************/
493
494 /* Used for several chains of patterns.
495
496 Argument: pointer to start of chain
497 Returns: nothing
498 */
499
500 static void
501 free_pattern_chain(patstr *pc)
502 {
503 while (pc != NULL)
504 {
505 patstr *p = pc;
506 pc = p->next;
507 if (p->hint != NULL) pcre_free_study(p->hint);
508 if (p->compiled != NULL) pcre_free(p->compiled);
509 free(p);
510 }
511 }
512
513
514 /*************************************************
515 * Free chain of file names *
516 *************************************************/
517
518 /*
519 Argument: pointer to start of chain
520 Returns: nothing
521 */
522
523 static void
524 free_file_chain(fnstr *fn)
525 {
526 while (fn != NULL)
527 {
528 fnstr *f = fn;
529 fn = f->next;
530 free(f);
531 }
532 }
533
534
535 /*************************************************
536 * OS-specific functions *
537 *************************************************/
538
539 /* These functions are defined so that they can be made system specific.
540 At present there are versions for Unix-style environments, Windows, native
541 z/OS, and "no support". */
542
543
544 /************* Directory scanning Unix-style and z/OS ***********/
545
546 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
547 #include <sys/types.h>
548 #include <sys/stat.h>
549 #include <dirent.h>
550
551 #if defined NATIVE_ZOS
552 /************* Directory and PDS/E scanning for z/OS ***********/
553 /************* z/OS looks mostly like Unix with USS ************/
554 /* However, z/OS needs the #include statements in this header */
555 #include "pcrzosfs.h"
556 /* That header is not included in the main PCRE distribution because
557 other apparatus is needed to compile pcregrep for z/OS. The header
558 can be found in the special z/OS distribution, which is available
559 from www.zaconsultants.net or from www.cbttape.org. */
560 #endif
561
562 typedef DIR directory_type;
563 #define FILESEP '/'
564
565 static int
566 isdirectory(char *filename)
567 {
568 struct stat statbuf;
569 if (stat(filename, &statbuf) < 0)
570 return 0; /* In the expectation that opening as a file will fail */
571 return (statbuf.st_mode & S_IFMT) == S_IFDIR;
572 }
573
574 static directory_type *
575 opendirectory(char *filename)
576 {
577 return opendir(filename);
578 }
579
580 static char *
581 readdirectory(directory_type *dir)
582 {
583 for (;;)
584 {
585 struct dirent *dent = readdir(dir);
586 if (dent == NULL) return NULL;
587 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
588 return dent->d_name;
589 }
590 /* Control never reaches here */
591 }
592
593 static void
594 closedirectory(directory_type *dir)
595 {
596 closedir(dir);
597 }
598
599
600 /************* Test for regular file, Unix-style **********/
601
602 static int
603 isregfile(char *filename)
604 {
605 struct stat statbuf;
606 if (stat(filename, &statbuf) < 0)
607 return 1; /* In the expectation that opening as a file will fail */
608 return (statbuf.st_mode & S_IFMT) == S_IFREG;
609 }
610
611
612 #if defined NATIVE_ZOS
613 /************* Test for a terminal in z/OS **********/
614 /* isatty() does not work in a TSO environment, so always give FALSE.*/
615
616 static BOOL
617 is_stdout_tty(void)
618 {
619 return FALSE;
620 }
621
622 static BOOL
623 is_file_tty(FILE *f)
624 {
625 return FALSE;
626 }
627
628
629 /************* Test for a terminal, Unix-style **********/
630
631 #else
632 static BOOL
633 is_stdout_tty(void)
634 {
635 return isatty(fileno(stdout));
636 }
637
638 static BOOL
639 is_file_tty(FILE *f)
640 {
641 return isatty(fileno(f));
642 }
643 #endif
644
645 /* End of Unix-style or native z/OS environment functions. */
646
647
648 /************* Directory scanning in Windows ***********/
649
650 /* I (Philip Hazel) have no means of testing this code. It was contributed by
651 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
652 when it did not exist. David Byron added a patch that moved the #include of
653 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
654 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
655 undefined when it is indeed undefined. */
656
657 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
658
659 #ifndef STRICT
660 # define STRICT
661 #endif
662 #ifndef WIN32_LEAN_AND_MEAN
663 # define WIN32_LEAN_AND_MEAN
664 #endif
665
666 #include <windows.h>
667
668 #ifndef INVALID_FILE_ATTRIBUTES
669 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
670 #endif
671
672 typedef struct directory_type
673 {
674 HANDLE handle;
675 BOOL first;
676 WIN32_FIND_DATA data;
677 } directory_type;
678
679 #define FILESEP '/'
680
681 int
682 isdirectory(char *filename)
683 {
684 DWORD attr = GetFileAttributes(filename);
685 if (attr == INVALID_FILE_ATTRIBUTES)
686 return 0;
687 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
688 }
689
690 directory_type *
691 opendirectory(char *filename)
692 {
693 size_t len;
694 char *pattern;
695 directory_type *dir;
696 DWORD err;
697 len = strlen(filename);
698 pattern = (char *)malloc(len + 3);
699 dir = (directory_type *)malloc(sizeof(*dir));
700 if ((pattern == NULL) || (dir == NULL))
701 {
702 fprintf(stderr, "pcregrep: malloc failed\n");
703 pcregrep_exit(2);
704 }
705 memcpy(pattern, filename, len);
706 memcpy(&(pattern[len]), "\\*", 3);
707 dir->handle = FindFirstFile(pattern, &(dir->data));
708 if (dir->handle != INVALID_HANDLE_VALUE)
709 {
710 free(pattern);
711 dir->first = TRUE;
712 return dir;
713 }
714 err = GetLastError();
715 free(pattern);
716 free(dir);
717 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
718 return NULL;
719 }
720
721 char *
722 readdirectory(directory_type *dir)
723 {
724 for (;;)
725 {
726 if (!dir->first)
727 {
728 if (!FindNextFile(dir->handle, &(dir->data)))
729 return NULL;
730 }
731 else
732 {
733 dir->first = FALSE;
734 }
735 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
736 return dir->data.cFileName;
737 }
738 #ifndef _MSC_VER
739 return NULL; /* Keep compiler happy; never executed */
740 #endif
741 }
742
743 void
744 closedirectory(directory_type *dir)
745 {
746 FindClose(dir->handle);
747 free(dir);
748 }
749
750
751 /************* Test for regular file in Windows **********/
752
753 /* I don't know how to do this, or if it can be done; assume all paths are
754 regular if they are not directories. */
755
756 int isregfile(char *filename)
757 {
758 return !isdirectory(filename);
759 }
760
761
762 /************* Test for a terminal in Windows **********/
763
764 /* I don't know how to do this; assume never */
765
766 static BOOL
767 is_stdout_tty(void)
768 {
769 return FALSE;
770 }
771
772 static BOOL
773 is_file_tty(FILE *f)
774 {
775 return FALSE;
776 }
777
778 /* End of Windows functions */
779
780
781 /************* Directory scanning when we can't do it ***********/
782
783 /* The type is void, and apart from isdirectory(), the functions do nothing. */
784
785 #else
786
787 #define FILESEP 0
788 typedef void directory_type;
789
790 int isdirectory(char *filename) { return 0; }
791 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
792 char *readdirectory(directory_type *dir) { return (char*)0;}
793 void closedirectory(directory_type *dir) {}
794
795
796 /************* Test for regular file when we can't do it **********/
797
798 /* Assume all files are regular. */
799
800 int isregfile(char *filename) { return 1; }
801
802
803 /************* Test for a terminal when we can't do it **********/
804
805 static BOOL
806 is_stdout_tty(void)
807 {
808 return FALSE;
809 }
810
811 static BOOL
812 is_file_tty(FILE *f)
813 {
814 return FALSE;
815 }
816
817 #endif /* End of system-specific functions */
818
819
820
821 #ifndef HAVE_STRERROR
822 /*************************************************
823 * Provide strerror() for non-ANSI libraries *
824 *************************************************/
825
826 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
827 in their libraries, but can provide the same facility by this simple
828 alternative function. */
829
830 extern int sys_nerr;
831 extern char *sys_errlist[];
832
833 char *
834 strerror(int n)
835 {
836 if (n < 0 || n >= sys_nerr) return "unknown error number";
837 return sys_errlist[n];
838 }
839 #endif /* HAVE_STRERROR */
840
841
842
843 /*************************************************
844 * Usage function *
845 *************************************************/
846
847 static int
848 usage(int rc)
849 {
850 option_item *op;
851 fprintf(stderr, "Usage: pcregrep [-");
852 for (op = optionlist; op->one_char != 0; op++)
853 {
854 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
855 }
856 fprintf(stderr, "] [long options] [pattern] [files]\n");
857 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
858 "options.\n");
859 return rc;
860 }
861
862
863
864 /*************************************************
865 * Help function *
866 *************************************************/
867
868 static void
869 help(void)
870 {
871 option_item *op;
872
873 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
874 printf("Search for PATTERN in each FILE or standard input.\n");
875 printf("PATTERN must be present if neither -e nor -f is used.\n");
876 printf("\"-\" can be used as a file name to mean STDIN.\n");
877
878 #ifdef SUPPORT_LIBZ
879 printf("Files whose names end in .gz are read using zlib.\n");
880 #endif
881
882 #ifdef SUPPORT_LIBBZ2
883 printf("Files whose names end in .bz2 are read using bzlib2.\n");
884 #endif
885
886 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
887 printf("Other files and the standard input are read as plain files.\n\n");
888 #else
889 printf("All files are read as plain files, without any interpretation.\n\n");
890 #endif
891
892 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
893 printf("Options:\n");
894
895 for (op = optionlist; op->one_char != 0; op++)
896 {
897 int n;
898 char s[4];
899
900 /* Two options were accidentally implemented and documented with underscores
901 instead of hyphens in their names, something that was not noticed for quite a
902 few releases. When fixing this, I left the underscored versions in the list
903 in case people were using them. However, we don't want to display them in the
904 help data. There are no other options that contain underscores, and we do not
905 expect ever to implement such options. Therefore, just omit any option that
906 contains an underscore. */
907
908 if (strchr(op->long_name, '_') != NULL) continue;
909
910 if (op->one_char > 0 && (op->long_name)[0] == 0)
911 n = 31 - printf(" -%c", op->one_char);
912 else
913 {
914 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
915 else strcpy(s, " ");
916 n = 31 - printf(" %s --%s", s, op->long_name);
917 }
918
919 if (n < 1) n = 1;
920 printf("%.*s%s\n", n, " ", op->help_text);
921 }
922
923 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
924 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
925 printf("When reading patterns or file names from a file, trailing white\n");
926 printf("space is removed and blank lines are ignored.\n");
927 printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
928
929 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
930 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
931 }
932
933
934
935 /*************************************************
936 * Test exclude/includes *
937 *************************************************/
938
939 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
940 there are no includes, the path must match an include pattern.
941
942 Arguments:
943 path the path to be matched
944 ip the chain of include patterns
945 ep the chain of exclude patterns
946
947 Returns: TRUE if the path is not excluded
948 */
949
950 static BOOL
951 test_incexc(char *path, patstr *ip, patstr *ep)
952 {
953 int plen = strlen(path);
954
955 for (; ep != NULL; ep = ep->next)
956 {
957 if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
958 return FALSE;
959 }
960
961 if (ip == NULL) return TRUE;
962
963 for (; ip != NULL; ip = ip->next)
964 {
965 if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
966 return TRUE;
967 }
968
969 return FALSE;
970 }
971
972
973
974 /*************************************************
975 * Decode integer argument value *
976 *************************************************/
977
978 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
979 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
980 just keep it simple.
981
982 Arguments:
983 option_data the option data string
984 op the option item (for error messages)
985 longop TRUE if option given in long form
986
987 Returns: a long integer
988 */
989
990 static long int
991 decode_number(char *option_data, option_item *op, BOOL longop)
992 {
993 unsigned long int n = 0;
994 char *endptr = option_data;
995 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
996 while (isdigit((unsigned char)(*endptr)))
997 n = n * 10 + (int)(*endptr++ - '0');
998 if (toupper(*endptr) == 'K')
999 {
1000 n *= 1024;
1001 endptr++;
1002 }
1003 else if (toupper(*endptr) == 'M')
1004 {
1005 n *= 1024*1024;
1006 endptr++;
1007 }
1008
1009 if (*endptr != 0) /* Error */
1010 {
1011 if (longop)
1012 {
1013 char *equals = strchr(op->long_name, '=');
1014 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1015 (int)(equals - op->long_name);
1016 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1017 option_data, nlen, op->long_name);
1018 }
1019 else
1020 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1021 option_data, op->one_char);
1022 pcregrep_exit(usage(2));
1023 }
1024
1025 return n;
1026 }
1027
1028
1029
1030 /*************************************************
1031 * Add item to a chain of numbers *
1032 *************************************************/
1033
1034 /* Used to add an item onto a chain, or just return an unconnected item if the
1035 "after" argument is NULL.
1036
1037 Arguments:
1038 n the number to add
1039 after if not NULL points to item to insert after
1040
1041 Returns: new number block
1042 */
1043
1044 static omstr *
1045 add_number(int n, omstr *after)
1046 {
1047 omstr *om = (omstr *)malloc(sizeof(omstr));
1048
1049 if (om == NULL)
1050 {
1051 fprintf(stderr, "pcregrep: malloc failed\n");
1052 pcregrep_exit(2);
1053 }
1054 om->next = NULL;
1055 om->groupnum = n;
1056
1057 if (after != NULL)
1058 {
1059 om->next = after->next;
1060 after->next = om;
1061 }
1062 return om;
1063 }
1064
1065
1066
1067 /*************************************************
1068 * Read one line of input *
1069 *************************************************/
1070
1071 /* Normally, input is read using fread() into a large buffer, so many lines may
1072 be read at once. However, doing this for tty input means that no output appears
1073 until a lot of input has been typed. Instead, tty input is handled line by
1074 line. We cannot use fgets() for this, because it does not stop at a binary
1075 zero, and therefore there is no way of telling how many characters it has read,
1076 because there may be binary zeros embedded in the data.
1077
1078 Arguments:
1079 buffer the buffer to read into
1080 length the maximum number of characters to read
1081 f the file
1082
1083 Returns: the number of characters read, zero at end of file
1084 */
1085
1086 static unsigned int
1087 read_one_line(char *buffer, int length, FILE *f)
1088 {
1089 int c;
1090 int yield = 0;
1091 while ((c = fgetc(f)) != EOF)
1092 {
1093 buffer[yield++] = c;
1094 if (c == '\n' || yield >= length) break;
1095 }
1096 return yield;
1097 }
1098
1099
1100
1101 /*************************************************
1102 * Find end of line *
1103 *************************************************/
1104
1105 /* The length of the endline sequence that is found is set via lenptr. This may
1106 be zero at the very end of the file if there is no line-ending sequence there.
1107
1108 Arguments:
1109 p current position in line
1110 endptr end of available data
1111 lenptr where to put the length of the eol sequence
1112
1113 Returns: pointer after the last byte of the line,
1114 including the newline byte(s)
1115 */
1116
1117 static char *
1118 end_of_line(char *p, char *endptr, int *lenptr)
1119 {
1120 switch(endlinetype)
1121 {
1122 default: /* Just in case */
1123 case EL_LF:
1124 while (p < endptr && *p != '\n') p++;
1125 if (p < endptr)
1126 {
1127 *lenptr = 1;
1128 return p + 1;
1129 }
1130 *lenptr = 0;
1131 return endptr;
1132
1133 case EL_CR:
1134 while (p < endptr && *p != '\r') p++;
1135 if (p < endptr)
1136 {
1137 *lenptr = 1;
1138 return p + 1;
1139 }
1140 *lenptr = 0;
1141 return endptr;
1142
1143 case EL_CRLF:
1144 for (;;)
1145 {
1146 while (p < endptr && *p != '\r') p++;
1147 if (++p >= endptr)
1148 {
1149 *lenptr = 0;
1150 return endptr;
1151 }
1152 if (*p == '\n')
1153 {
1154 *lenptr = 2;
1155 return p + 1;
1156 }
1157 }
1158 break;
1159
1160 case EL_ANYCRLF:
1161 while (p < endptr)
1162 {
1163 int extra = 0;
1164 register int c = *((unsigned char *)p);
1165
1166 if (utf8 && c >= 0xc0)
1167 {
1168 int gcii, gcss;
1169 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1170 gcss = 6*extra;
1171 c = (c & utf8_table3[extra]) << gcss;
1172 for (gcii = 1; gcii <= extra; gcii++)
1173 {
1174 gcss -= 6;
1175 c |= (p[gcii] & 0x3f) << gcss;
1176 }
1177 }
1178
1179 p += 1 + extra;
1180
1181 switch (c)
1182 {
1183 case '\n':
1184 *lenptr = 1;
1185 return p;
1186
1187 case '\r':
1188 if (p < endptr && *p == '\n')
1189 {
1190 *lenptr = 2;
1191 p++;
1192 }
1193 else *lenptr = 1;
1194 return p;
1195
1196 default:
1197 break;
1198 }
1199 } /* End of loop for ANYCRLF case */
1200
1201 *lenptr = 0; /* Must have hit the end */
1202 return endptr;
1203
1204 case EL_ANY:
1205 while (p < endptr)
1206 {
1207 int extra = 0;
1208 register int c = *((unsigned char *)p);
1209
1210 if (utf8 && c >= 0xc0)
1211 {
1212 int gcii, gcss;
1213 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1214 gcss = 6*extra;
1215 c = (c & utf8_table3[extra]) << gcss;
1216 for (gcii = 1; gcii <= extra; gcii++)
1217 {
1218 gcss -= 6;
1219 c |= (p[gcii] & 0x3f) << gcss;
1220 }
1221 }
1222
1223 p += 1 + extra;
1224
1225 switch (c)
1226 {
1227 case '\n': /* LF */
1228 case '\v': /* VT */
1229 case '\f': /* FF */
1230 *lenptr = 1;
1231 return p;
1232
1233 case '\r': /* CR */
1234 if (p < endptr && *p == '\n')
1235 {
1236 *lenptr = 2;
1237 p++;
1238 }
1239 else *lenptr = 1;
1240 return p;
1241
1242 #ifndef EBCDIC
1243 case 0x85: /* Unicode NEL */
1244 *lenptr = utf8? 2 : 1;
1245 return p;
1246
1247 case 0x2028: /* Unicode LS */
1248 case 0x2029: /* Unicode PS */
1249 *lenptr = 3;
1250 return p;
1251 #endif /* Not EBCDIC */
1252
1253 default:
1254 break;
1255 }
1256 } /* End of loop for ANY case */
1257
1258 *lenptr = 0; /* Must have hit the end */
1259 return endptr;
1260 } /* End of overall switch */
1261 }
1262
1263
1264
1265 /*************************************************
1266 * Find start of previous line *
1267 *************************************************/
1268
1269 /* This is called when looking back for before lines to print.
1270
1271 Arguments:
1272 p start of the subsequent line
1273 startptr start of available data
1274
1275 Returns: pointer to the start of the previous line
1276 */
1277
1278 static char *
1279 previous_line(char *p, char *startptr)
1280 {
1281 switch(endlinetype)
1282 {
1283 default: /* Just in case */
1284 case EL_LF:
1285 p--;
1286 while (p > startptr && p[-1] != '\n') p--;
1287 return p;
1288
1289 case EL_CR:
1290 p--;
1291 while (p > startptr && p[-1] != '\n') p--;
1292 return p;
1293
1294 case EL_CRLF:
1295 for (;;)
1296 {
1297 p -= 2;
1298 while (p > startptr && p[-1] != '\n') p--;
1299 if (p <= startptr + 1 || p[-2] == '\r') return p;
1300 }
1301 /* Control can never get here */
1302
1303 case EL_ANY:
1304 case EL_ANYCRLF:
1305 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1306 if (utf8) while ((*p & 0xc0) == 0x80) p--;
1307
1308 while (p > startptr)
1309 {
1310 register unsigned int c;
1311 char *pp = p - 1;
1312
1313 if (utf8)
1314 {
1315 int extra = 0;
1316 while ((*pp & 0xc0) == 0x80) pp--;
1317 c = *((unsigned char *)pp);
1318 if (c >= 0xc0)
1319 {
1320 int gcii, gcss;
1321 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1322 gcss = 6*extra;
1323 c = (c & utf8_table3[extra]) << gcss;
1324 for (gcii = 1; gcii <= extra; gcii++)
1325 {
1326 gcss -= 6;
1327 c |= (pp[gcii] & 0x3f) << gcss;
1328 }
1329 }
1330 }
1331 else c = *((unsigned char *)pp);
1332
1333 if (endlinetype == EL_ANYCRLF) switch (c)
1334 {
1335 case '\n': /* LF */
1336 case '\r': /* CR */
1337 return p;
1338
1339 default:
1340 break;
1341 }
1342
1343 else switch (c)
1344 {
1345 case '\n': /* LF */
1346 case '\v': /* VT */
1347 case '\f': /* FF */
1348 case '\r': /* CR */
1349 #ifndef EBCDIE
1350 case 0x85: /* Unicode NEL */
1351 case 0x2028: /* Unicode LS */
1352 case 0x2029: /* Unicode PS */
1353 #endif /* Not EBCDIC */
1354 return p;
1355
1356 default:
1357 break;
1358 }
1359
1360 p = pp; /* Back one character */
1361 } /* End of loop for ANY case */
1362
1363 return startptr; /* Hit start of data */
1364 } /* End of overall switch */
1365 }
1366
1367
1368
1369
1370
1371 /*************************************************
1372 * Print the previous "after" lines *
1373 *************************************************/
1374
1375 /* This is called if we are about to lose said lines because of buffer filling,
1376 and at the end of the file. The data in the line is written using fwrite() so
1377 that a binary zero does not terminate it.
1378
1379 Arguments:
1380 lastmatchnumber the number of the last matching line, plus one
1381 lastmatchrestart where we restarted after the last match
1382 endptr end of available data
1383 printname filename for printing
1384
1385 Returns: nothing
1386 */
1387
1388 static void
1389 do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1390 char *printname)
1391 {
1392 if (after_context > 0 && lastmatchnumber > 0)
1393 {
1394 int count = 0;
1395 while (lastmatchrestart < endptr && count++ < after_context)
1396 {
1397 int ellength;
1398 char *pp = lastmatchrestart;
1399 if (printname != NULL) fprintf(stdout, "%s-", printname);
1400 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1401 pp = end_of_line(pp, endptr, &ellength);
1402 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1403 lastmatchrestart = pp;
1404 }
1405 hyphenpending = TRUE;
1406 }
1407 }
1408
1409
1410
1411 /*************************************************
1412 * Apply patterns to subject till one matches *
1413 *************************************************/
1414
1415 /* This function is called to run through all patterns, looking for a match. It
1416 is used multiple times for the same subject when colouring is enabled, in order
1417 to find all possible matches.
1418
1419 Arguments:
1420 matchptr the start of the subject
1421 length the length of the subject to match
1422 options options for pcre_exec
1423 startoffset where to start matching
1424 offsets the offets vector to fill in
1425 mrc address of where to put the result of pcre_exec()
1426
1427 Returns: TRUE if there was a match
1428 FALSE if there was no match
1429 invert if there was a non-fatal error
1430 */
1431
1432 static BOOL
1433 match_patterns(char *matchptr, size_t length, unsigned int options,
1434 int startoffset, int *offsets, int *mrc)
1435 {
1436 int i;
1437 size_t slen = length;
1438 patstr *p = patterns;
1439 const char *msg = "this text:\n\n";
1440
1441 if (slen > 200)
1442 {
1443 slen = 200;
1444 msg = "text that starts:\n\n";
1445 }
1446 for (i = 1; p != NULL; p = p->next, i++)
1447 {
1448 *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1449 startoffset, options, offsets, OFFSET_SIZE);
1450 if (*mrc >= 0) return TRUE;
1451 if (*mrc == PCRE_ERROR_NOMATCH) continue;
1452 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1453 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1454 fprintf(stderr, "%s", msg);
1455 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1456 fprintf(stderr, "\n\n");
1457 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1458 *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1459 resource_error = TRUE;
1460 if (error_count++ > 20)
1461 {
1462 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1463 pcregrep_exit(2);
1464 }
1465 return invert; /* No more matching; don't show the line again */
1466 }
1467
1468 return FALSE; /* No match, no errors */
1469 }
1470
1471
1472
1473 /*************************************************
1474 * Grep an individual file *
1475 *************************************************/
1476
1477 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1478 times the value of bufthird. The matching point is never allowed to stray into
1479 the top third of the buffer, thus keeping more of the file available for
1480 context printing or for multiline scanning. For large files, the pointer will
1481 be in the middle third most of the time, so the bottom third is available for
1482 "before" context printing.
1483
1484 Arguments:
1485 handle the fopened FILE stream for a normal file
1486 the gzFile pointer when reading is via libz
1487 the BZFILE pointer when reading is via libbz2
1488 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1489 filename the file name or NULL (for errors)
1490 printname the file name if it is to be printed for each match
1491 or NULL if the file name is not to be printed
1492 it cannot be NULL if filenames[_nomatch]_only is set
1493
1494 Returns: 0 if there was at least one match
1495 1 otherwise (no matches)
1496 2 if an overlong line is encountered
1497 3 if there is a read error on a .bz2 file
1498 */
1499
1500 static int
1501 pcregrep(void *handle, int frtype, char *filename, char *printname)
1502 {
1503 int rc = 1;
1504 int linenumber = 1;
1505 int lastmatchnumber = 0;
1506 int count = 0;
1507 int filepos = 0;
1508 int offsets[OFFSET_SIZE];
1509 char *lastmatchrestart = NULL;
1510 char *ptr = main_buffer;
1511 char *endptr;
1512 size_t bufflength;
1513 BOOL binary = FALSE;
1514 BOOL endhyphenpending = FALSE;
1515 BOOL input_line_buffered = line_buffered;
1516 FILE *in = NULL; /* Ensure initialized */
1517
1518 #ifdef SUPPORT_LIBZ
1519 gzFile ingz = NULL;
1520 #endif
1521
1522 #ifdef SUPPORT_LIBBZ2
1523 BZFILE *inbz2 = NULL;
1524 #endif
1525
1526
1527 /* Do the first read into the start of the buffer and set up the pointer to end
1528 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1529 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1530 fail. */
1531
1532 (void)frtype;
1533
1534 #ifdef SUPPORT_LIBZ
1535 if (frtype == FR_LIBZ)
1536 {
1537 ingz = (gzFile)handle;
1538 bufflength = gzread (ingz, main_buffer, bufsize);
1539 }
1540 else
1541 #endif
1542
1543 #ifdef SUPPORT_LIBBZ2
1544 if (frtype == FR_LIBBZ2)
1545 {
1546 inbz2 = (BZFILE *)handle;
1547 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1548 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1549 } /* without the cast it is unsigned. */
1550 else
1551 #endif
1552
1553 {
1554 in = (FILE *)handle;
1555 if (is_file_tty(in)) input_line_buffered = TRUE;
1556 bufflength = input_line_buffered?
1557 read_one_line(main_buffer, bufsize, in) :
1558 fread(main_buffer, 1, bufsize, in);
1559 }
1560
1561 endptr = main_buffer + bufflength;
1562
1563 /* Unless binary-files=text, see if we have a binary file. This uses the same
1564 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1565 file. */
1566
1567 if (binary_files != BIN_TEXT)
1568 {
1569 binary =
1570 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1571 if (binary && binary_files == BIN_NOMATCH) return 1;
1572 }
1573
1574 /* Loop while the current pointer is not at the end of the file. For large
1575 files, endptr will be at the end of the buffer when we are in the middle of the
1576 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1577 way, the buffer is shifted left and re-filled. */
1578
1579 while (ptr < endptr)
1580 {
1581 int endlinelength;
1582 int mrc = 0;
1583 int startoffset = 0;
1584 unsigned int options = 0;
1585 BOOL match;
1586 char *matchptr = ptr;
1587 char *t = ptr;
1588 size_t length, linelength;
1589
1590 /* At this point, ptr is at the start of a line. We need to find the length
1591 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1592 length remainder of the data in the buffer. Otherwise, it is the length of
1593 the next line, excluding the terminating newline. After matching, we always
1594 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1595 option is used for compiling, so that any match is constrained to be in the
1596 first line. */
1597
1598 t = end_of_line(t, endptr, &endlinelength);
1599 linelength = t - ptr - endlinelength;
1600 length = multiline? (size_t)(endptr - ptr) : linelength;
1601
1602 /* Check to see if the line we are looking at extends right to the very end
1603 of the buffer without a line terminator. This means the line is too long to
1604 handle. */
1605
1606 if (endlinelength == 0 && t == main_buffer + bufsize)
1607 {
1608 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1609 "pcregrep: check the --buffer-size option\n",
1610 linenumber,
1611 (filename == NULL)? "" : " of file ",
1612 (filename == NULL)? "" : filename);
1613 return 2;
1614 }
1615
1616 /* Extra processing for Jeffrey Friedl's debugging. */
1617
1618 #ifdef JFRIEDL_DEBUG
1619 if (jfriedl_XT || jfriedl_XR)
1620 {
1621 # include <sys/time.h>
1622 # include <time.h>
1623 struct timeval start_time, end_time;
1624 struct timezone dummy;
1625 int i;
1626
1627 if (jfriedl_XT)
1628 {
1629 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1630 const char *orig = ptr;
1631 ptr = malloc(newlen + 1);
1632 if (!ptr) {
1633 printf("out of memory");
1634 pcregrep_exit(2);
1635 }
1636 endptr = ptr;
1637 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1638 for (i = 0; i < jfriedl_XT; i++) {
1639 strncpy(endptr, orig, length);
1640 endptr += length;
1641 }
1642 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1643 length = newlen;
1644 }
1645
1646 if (gettimeofday(&start_time, &dummy) != 0)
1647 perror("bad gettimeofday");
1648
1649
1650 for (i = 0; i < jfriedl_XR; i++)
1651 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1652 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1653
1654 if (gettimeofday(&end_time, &dummy) != 0)
1655 perror("bad gettimeofday");
1656
1657 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1658 -
1659 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1660
1661 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1662 return 0;
1663 }
1664 #endif
1665
1666 /* We come back here after a match when show_only_matching is set, in order
1667 to find any further matches in the same line. This applies to
1668 --only-matching, --file-offsets, and --line-offsets. */
1669
1670 ONLY_MATCHING_RESTART:
1671
1672 /* Run through all the patterns until one matches or there is an error other
1673 than NOMATCH. This code is in a subroutine so that it can be re-used for
1674 finding subsequent matches when colouring matched lines. After finding one
1675 match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1676 this line. */
1677
1678 match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1679 options = PCRE_NOTEMPTY;
1680
1681 /* If it's a match or a not-match (as required), do what's wanted. */
1682
1683 if (match != invert)
1684 {
1685 BOOL hyphenprinted = FALSE;
1686
1687 /* We've failed if we want a file that doesn't have any matches. */
1688
1689 if (filenames == FN_NOMATCH_ONLY) return 1;
1690
1691 /* Just count if just counting is wanted. */
1692
1693 if (count_only) count++;
1694
1695 /* When handling a binary file and binary-files==binary, the "binary"
1696 variable will be set true (it's false in all other cases). In this
1697 situation we just want to output the file name. No need to scan further. */
1698
1699 else if (binary)
1700 {
1701 fprintf(stdout, "Binary file %s matches\n", filename);
1702 return 0;
1703 }
1704
1705 /* If all we want is a file name, there is no need to scan any more lines
1706 in the file. */
1707
1708 else if (filenames == FN_MATCH_ONLY)
1709 {
1710 fprintf(stdout, "%s\n", printname);
1711 return 0;
1712 }
1713
1714 /* Likewise, if all we want is a yes/no answer. */
1715
1716 else if (quiet) return 0;
1717
1718 /* The --only-matching option prints just the substring that matched,
1719 and/or one or more captured portions of it, as long as these strings are
1720 not empty. The --file-offsets and --line-offsets options output offsets for
1721 the matching substring (all three set show_only_matching). None of these
1722 mutually exclusive options prints any context. Afterwards, adjust the start
1723 and then jump back to look for further matches in the same line. If we are
1724 in invert mode, however, nothing is printed and we do not restart - this
1725 could still be useful because the return code is set. */
1726
1727 else if (show_only_matching)
1728 {
1729 if (!invert)
1730 {
1731 if (printname != NULL) fprintf(stdout, "%s:", printname);
1732 if (number) fprintf(stdout, "%d:", linenumber);
1733
1734 /* Handle --line-offsets */
1735
1736 if (line_offsets)
1737 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1738 offsets[1] - offsets[0]);
1739
1740 /* Handle --file-offsets */
1741
1742 else if (file_offsets)
1743 fprintf(stdout, "%d,%d\n",
1744 (int)(filepos + matchptr + offsets[0] - ptr),
1745 offsets[1] - offsets[0]);
1746
1747 /* Handle --only-matching, which may occur many times */
1748
1749 else
1750 {
1751 BOOL printed = FALSE;
1752 omstr *om;
1753
1754 for (om = only_matching; om != NULL; om = om->next)
1755 {
1756 int n = om->groupnum;
1757 if (n < mrc)
1758 {
1759 int plen = offsets[2*n + 1] - offsets[2*n];
1760 if (plen > 0)
1761 {
1762 if (printed) fprintf(stdout, "%s", om_separator);
1763 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1764 FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1765 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1766 printed = TRUE;
1767 }
1768 }
1769 }
1770
1771 if (printed || printname != NULL || number) fprintf(stdout, "\n");
1772 }
1773
1774 /* Prepare to repeat to find the next match */
1775
1776 match = FALSE;
1777 if (line_buffered) fflush(stdout);
1778 rc = 0; /* Had some success */
1779 startoffset = offsets[1]; /* Restart after the match */
1780 goto ONLY_MATCHING_RESTART;
1781 }
1782 }
1783
1784 /* This is the default case when none of the above options is set. We print
1785 the matching lines(s), possibly preceded and/or followed by other lines of
1786 context. */
1787
1788 else
1789 {
1790 /* See if there is a requirement to print some "after" lines from a
1791 previous match. We never print any overlaps. */
1792
1793 if (after_context > 0 && lastmatchnumber > 0)
1794 {
1795 int ellength;
1796 int linecount = 0;
1797 char *p = lastmatchrestart;
1798
1799 while (p < ptr && linecount < after_context)
1800 {
1801 p = end_of_line(p, ptr, &ellength);
1802 linecount++;
1803 }
1804
1805 /* It is important to advance lastmatchrestart during this printing so
1806 that it interacts correctly with any "before" printing below. Print
1807 each line's data using fwrite() in case there are binary zeroes. */
1808
1809 while (lastmatchrestart < p)
1810 {
1811 char *pp = lastmatchrestart;
1812 if (printname != NULL) fprintf(stdout, "%s-", printname);
1813 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1814 pp = end_of_line(pp, endptr, &ellength);
1815 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1816 lastmatchrestart = pp;
1817 }
1818 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1819 }
1820
1821 /* If there were non-contiguous lines printed above, insert hyphens. */
1822
1823 if (hyphenpending)
1824 {
1825 fprintf(stdout, "--\n");
1826 hyphenpending = FALSE;
1827 hyphenprinted = TRUE;
1828 }
1829
1830 /* See if there is a requirement to print some "before" lines for this
1831 match. Again, don't print overlaps. */
1832
1833 if (before_context > 0)
1834 {
1835 int linecount = 0;
1836 char *p = ptr;
1837
1838 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1839 linecount < before_context)
1840 {
1841 linecount++;
1842 p = previous_line(p, main_buffer);
1843 }
1844
1845 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1846 fprintf(stdout, "--\n");
1847
1848 while (p < ptr)
1849 {
1850 int ellength;
1851 char *pp = p;
1852 if (printname != NULL) fprintf(stdout, "%s-", printname);
1853 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1854 pp = end_of_line(pp, endptr, &ellength);
1855 FWRITE(p, 1, pp - p, stdout);
1856 p = pp;
1857 }
1858 }
1859
1860 /* Now print the matching line(s); ensure we set hyphenpending at the end
1861 of the file if any context lines are being output. */
1862
1863 if (after_context > 0 || before_context > 0)
1864 endhyphenpending = TRUE;
1865
1866 if (printname != NULL) fprintf(stdout, "%s:", printname);
1867 if (number) fprintf(stdout, "%d:", linenumber);
1868
1869 /* In multiline mode, we want to print to the end of the line in which
1870 the end of the matched string is found, so we adjust linelength and the
1871 line number appropriately, but only when there actually was a match
1872 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1873 the match will always be before the first newline sequence. */
1874
1875 if (multiline & !invert)
1876 {
1877 char *endmatch = ptr + offsets[1];
1878 t = ptr;
1879 while (t <= endmatch)
1880 {
1881 t = end_of_line(t, endptr, &endlinelength);
1882 if (t < endmatch) linenumber++; else break;
1883 }
1884 linelength = t - ptr - endlinelength;
1885 }
1886
1887 /*** NOTE: Use only fwrite() to output the data line, so that binary
1888 zeroes are treated as just another data character. */
1889
1890 /* This extra option, for Jeffrey Friedl's debugging requirements,
1891 replaces the matched string, or a specific captured string if it exists,
1892 with X. When this happens, colouring is ignored. */
1893
1894 #ifdef JFRIEDL_DEBUG
1895 if (S_arg >= 0 && S_arg < mrc)
1896 {
1897 int first = S_arg * 2;
1898 int last = first + 1;
1899 FWRITE(ptr, 1, offsets[first], stdout);
1900 fprintf(stdout, "X");
1901 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1902 }
1903 else
1904 #endif
1905
1906 /* We have to split the line(s) up if colouring, and search for further
1907 matches, but not of course if the line is a non-match. */
1908
1909 if (do_colour && !invert)
1910 {
1911 int plength;
1912 FWRITE(ptr, 1, offsets[0], stdout);
1913 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1914 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1915 fprintf(stdout, "%c[00m", 0x1b);
1916 for (;;)
1917 {
1918 startoffset = offsets[1];
1919 if (startoffset >= (int)linelength + endlinelength ||
1920 !match_patterns(matchptr, length, options, startoffset, offsets,
1921 &mrc))
1922 break;
1923 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1924 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1925 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1926 fprintf(stdout, "%c[00m", 0x1b);
1927 }
1928
1929 /* In multiline mode, we may have already printed the complete line
1930 and its line-ending characters (if they matched the pattern), so there
1931 may be no more to print. */
1932
1933 plength = (int)((linelength + endlinelength) - startoffset);
1934 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1935 }
1936
1937 /* Not colouring; no need to search for further matches */
1938
1939 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1940 }
1941
1942 /* End of doing what has to be done for a match. If --line-buffered was
1943 given, flush the output. */
1944
1945 if (line_buffered) fflush(stdout);
1946 rc = 0; /* Had some success */
1947
1948 /* Remember where the last match happened for after_context. We remember
1949 where we are about to restart, and that line's number. */
1950
1951 lastmatchrestart = ptr + linelength + endlinelength;
1952 lastmatchnumber = linenumber + 1;
1953 }
1954
1955 /* For a match in multiline inverted mode (which of course did not cause
1956 anything to be printed), we have to move on to the end of the match before
1957 proceeding. */
1958
1959 if (multiline && invert && match)
1960 {
1961 int ellength;
1962 char *endmatch = ptr + offsets[1];
1963 t = ptr;
1964 while (t < endmatch)
1965 {
1966 t = end_of_line(t, endptr, &ellength);
1967 if (t <= endmatch) linenumber++; else break;
1968 }
1969 endmatch = end_of_line(endmatch, endptr, &ellength);
1970 linelength = endmatch - ptr - ellength;
1971 }
1972
1973 /* Advance to after the newline and increment the line number. The file
1974 offset to the current line is maintained in filepos. */
1975
1976 ptr += linelength + endlinelength;
1977 filepos += (int)(linelength + endlinelength);
1978 linenumber++;
1979
1980 /* If input is line buffered, and the buffer is not yet full, read another
1981 line and add it into the buffer. */
1982
1983 if (input_line_buffered && bufflength < (size_t)bufsize)
1984 {
1985 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1986 bufflength += add;
1987 endptr += add;
1988 }
1989
1990 /* If we haven't yet reached the end of the file (the buffer is full), and
1991 the current point is in the top 1/3 of the buffer, slide the buffer down by
1992 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1993 about to be lost, print them. */
1994
1995 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1996 {
1997 if (after_context > 0 &&
1998 lastmatchnumber > 0 &&
1999 lastmatchrestart < main_buffer + bufthird)
2000 {
2001 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2002 lastmatchnumber = 0;
2003 }
2004
2005 /* Now do the shuffle */
2006
2007 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2008 ptr -= bufthird;
2009
2010 #ifdef SUPPORT_LIBZ
2011 if (frtype == FR_LIBZ)
2012 bufflength = 2*bufthird +
2013 gzread (ingz, main_buffer + 2*bufthird, bufthird);
2014 else
2015 #endif
2016
2017 #ifdef SUPPORT_LIBBZ2
2018 if (frtype == FR_LIBBZ2)
2019 bufflength = 2*bufthird +
2020 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2021 else
2022 #endif
2023
2024 bufflength = 2*bufthird +
2025 (input_line_buffered?
2026 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2027 fread(main_buffer + 2*bufthird, 1, bufthird, in));
2028 endptr = main_buffer + bufflength;
2029
2030 /* Adjust any last match point */
2031
2032 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2033 }
2034 } /* Loop through the whole file */
2035
2036 /* End of file; print final "after" lines if wanted; do_after_lines sets
2037 hyphenpending if it prints something. */
2038
2039 if (!show_only_matching && !count_only)
2040 {
2041 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2042 hyphenpending |= endhyphenpending;
2043 }
2044
2045 /* Print the file name if we are looking for those without matches and there
2046 were none. If we found a match, we won't have got this far. */
2047
2048 if (filenames == FN_NOMATCH_ONLY)
2049 {
2050 fprintf(stdout, "%s\n", printname);
2051 return 0;
2052 }
2053
2054 /* Print the match count if wanted */
2055
2056 if (count_only)
2057 {
2058 if (count > 0 || !omit_zero_count)
2059 {
2060 if (printname != NULL && filenames != FN_NONE)
2061 fprintf(stdout, "%s:", printname);
2062 fprintf(stdout, "%d\n", count);
2063 }
2064 }
2065
2066 return rc;
2067 }
2068
2069
2070
2071 /*************************************************
2072 * Grep a file or recurse into a directory *
2073 *************************************************/
2074
2075 /* Given a path name, if it's a directory, scan all the files if we are
2076 recursing; if it's a file, grep it.
2077
2078 Arguments:
2079 pathname the path to investigate
2080 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
2081 only_one_at_top TRUE if the path is the only one at toplevel
2082
2083 Returns: -1 the file/directory was skipped
2084 0 if there was at least one match
2085 1 if there were no matches
2086 2 there was some kind of error
2087
2088 However, file opening failures are suppressed if "silent" is set.
2089 */
2090
2091 static int
2092 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2093 {
2094 int rc = 1;
2095 int frtype;
2096 void *handle;
2097 char *lastcomp;
2098 FILE *in = NULL; /* Ensure initialized */
2099
2100 #ifdef SUPPORT_LIBZ
2101 gzFile ingz = NULL;
2102 #endif
2103
2104 #ifdef SUPPORT_LIBBZ2
2105 BZFILE *inbz2 = NULL;
2106 #endif
2107
2108 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2109 int pathlen;
2110 #endif
2111
2112 #if defined NATIVE_ZOS
2113 int zos_type;
2114 FILE *zos_test_file;
2115 #endif
2116
2117 /* If the file name is "-" we scan stdin */
2118
2119 if (strcmp(pathname, "-") == 0)
2120 {
2121 return pcregrep(stdin, FR_PLAIN, stdin_name,
2122 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2123 stdin_name : NULL);
2124 }
2125
2126 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2127 directories, whereas --include and --exclude apply to everything else. The test
2128 is against the final component of the path. */
2129
2130 lastcomp = strrchr(pathname, FILESEP);
2131 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2132
2133 /* If the file is a directory, skip if not recursing or if explicitly excluded.
2134 Otherwise, scan the directory and recurse for each path within it. The scanning
2135 code is localized so it can be made system-specific. */
2136
2137
2138 /* For z/OS, determine the file type. */
2139
2140 #if defined NATIVE_ZOS
2141 zos_test_file = fopen(pathname,"rb");
2142
2143 if (zos_test_file == NULL)
2144 {
2145 if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
2146 pathname, strerror(errno));
2147 return -1;
2148 }
2149 zos_type = identifyzosfiletype (zos_test_file);
2150 fclose (zos_test_file);
2151
2152 /* Handle a PDS in separate code */
2153
2154 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2155 {
2156 return travelonpdsdir (pathname, only_one_at_top);
2157 }
2158
2159 /* Deal with regular files in the normal way below. These types are:
2160 zos_type == __ZOS_PDS_MEMBER
2161 zos_type == __ZOS_PS
2162 zos_type == __ZOS_VSAM_KSDS
2163 zos_type == __ZOS_VSAM_ESDS
2164 zos_type == __ZOS_VSAM_RRDS
2165 */
2166
2167 /* Handle a z/OS directory using common code. */
2168
2169 else if (zos_type == __ZOS_HFS)
2170 {
2171 #endif /* NATIVE_ZOS */
2172
2173
2174 /* Handle directories: common code for all OS */
2175
2176 if (isdirectory(pathname))
2177 {
2178 if (dee_action == dee_SKIP ||
2179 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2180 return -1;
2181
2182 if (dee_action == dee_RECURSE)
2183 {
2184 char buffer[1024];
2185 char *nextfile;
2186 directory_type *dir = opendirectory(pathname);
2187
2188 if (dir == NULL)
2189 {
2190 if (!silent)
2191 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2192 strerror(errno));
2193 return 2;
2194 }
2195
2196 while ((nextfile = readdirectory(dir)) != NULL)
2197 {
2198 int frc;
2199 sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
2200 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2201 if (frc > 1) rc = frc;
2202 else if (frc == 0 && rc == 1) rc = 0;
2203 }
2204
2205 closedirectory(dir);
2206 return rc;
2207 }
2208 }
2209
2210 #if defined NATIVE_ZOS
2211 }
2212 #endif
2213
2214 /* If the file is not a directory, check for a regular file, and if it is not,
2215 skip it if that's been requested. Otherwise, check for an explicit inclusion or
2216 exclusion. */
2217
2218 else if (
2219 #if defined NATIVE_ZOS
2220 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2221 #else /* all other OS */
2222 (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2223 #endif
2224 !test_incexc(lastcomp, include_patterns, exclude_patterns))
2225 return -1; /* File skipped */
2226
2227 /* Control reaches here if we have a regular file, or if we have a directory
2228 and recursion or skipping was not requested, or if we have anything else and
2229 skipping was not requested. The scan proceeds. If this is the first and only
2230 argument at top level, we don't show the file name, unless we are only showing
2231 the file name, or the filename was forced (-H). */
2232
2233 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2234 pathlen = (int)(strlen(pathname));
2235 #endif
2236
2237 /* Open using zlib if it is supported and the file name ends with .gz. */
2238
2239 #ifdef SUPPORT_LIBZ
2240 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2241 {
2242 ingz = gzopen(pathname, "rb");
2243 if (ingz == NULL)
2244 {
2245 if (!silent)
2246 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2247 strerror(errno));
2248 return 2;
2249 }
2250 handle = (void *)ingz;
2251 frtype = FR_LIBZ;
2252 }
2253 else
2254 #endif
2255
2256 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2257
2258 #ifdef SUPPORT_LIBBZ2
2259 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2260 {
2261 inbz2 = BZ2_bzopen(pathname, "rb");
2262 handle = (void *)inbz2;
2263 frtype = FR_LIBBZ2;
2264 }
2265 else
2266 #endif
2267
2268 /* Otherwise use plain fopen(). The label is so that we can come back here if
2269 an attempt to read a .bz2 file indicates that it really is a plain file. */
2270
2271 #ifdef SUPPORT_LIBBZ2
2272 PLAIN_FILE:
2273 #endif
2274 {
2275 in = fopen(pathname, "rb");
2276 handle = (void *)in;
2277 frtype = FR_PLAIN;
2278 }
2279
2280 /* All the opening methods return errno when they fail. */
2281
2282 if (handle == NULL)
2283 {
2284 if (!silent)
2285 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2286 strerror(errno));
2287 return 2;
2288 }
2289
2290 /* Now grep the file */
2291
2292 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2293 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2294
2295 /* Close in an appropriate manner. */
2296
2297 #ifdef SUPPORT_LIBZ
2298 if (frtype == FR_LIBZ)
2299 gzclose(ingz);
2300 else
2301 #endif
2302
2303 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2304 read failed. If the error indicates that the file isn't in fact bzipped, try
2305 again as a normal file. */
2306
2307 #ifdef SUPPORT_LIBBZ2
2308 if (frtype == FR_LIBBZ2)
2309 {
2310 if (rc == 3)
2311 {
2312 int errnum;
2313 const char *err = BZ2_bzerror(inbz2, &errnum);
2314 if (errnum == BZ_DATA_ERROR_MAGIC)
2315 {
2316 BZ2_bzclose(inbz2);
2317 goto PLAIN_FILE;
2318 }
2319 else if (!silent)
2320 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2321 pathname, err);
2322 rc = 2; /* The normal "something went wrong" code */
2323 }
2324 BZ2_bzclose(inbz2);
2325 }
2326 else
2327 #endif
2328
2329 /* Normal file close */
2330
2331 fclose(in);
2332
2333 /* Pass back the yield from pcregrep(). */
2334
2335 return rc;
2336 }
2337
2338
2339
2340 /*************************************************
2341 * Handle a single-letter, no data option *
2342 *************************************************/
2343
2344 static int
2345 handle_option(int letter, int options)
2346 {
2347 switch(letter)
2348 {
2349 case N_FOFFSETS: file_offsets = TRUE; break;
2350 case N_HELP: help(); pcregrep_exit(0);
2351 case N_LBUFFER: line_buffered = TRUE; break;
2352 case N_LOFFSETS: line_offsets = number = TRUE; break;
2353 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2354 case 'a': binary_files = BIN_TEXT; break;
2355 case 'c': count_only = TRUE; break;
2356 case 'F': process_options |= PO_FIXED_STRINGS; break;
2357 case 'H': filenames = FN_FORCE; break;
2358 case 'I': binary_files = BIN_NOMATCH; break;
2359 case 'h': filenames = FN_NONE; break;
2360 case 'i': options |= PCRE_CASELESS; break;
2361 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2362 case 'L': filenames = FN_NOMATCH_ONLY; break;
2363 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2364 case 'n': number = TRUE; break;
2365
2366 case 'o':
2367 only_matching_last = add_number(0, only_matching_last);
2368 if (only_matching == NULL) only_matching = only_matching_last;
2369 break;
2370
2371 case 'q': quiet = TRUE; break;
2372 case 'r': dee_action = dee_RECURSE; break;
2373 case 's': silent = TRUE; break;
2374 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2375 case 'v': invert = TRUE; break;
2376 case 'w': process_options |= PO_WORD_MATCH; break;
2377 case 'x': process_options |= PO_LINE_MATCH; break;
2378
2379 case 'V':
2380 fprintf(stdout, "pcregrep version %s\n", pcre_version());
2381 pcregrep_exit(0);
2382 break;
2383
2384 default:
2385 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2386 pcregrep_exit(usage(2));
2387 }
2388
2389 return options;
2390 }
2391
2392
2393
2394
2395 /*************************************************
2396 * Construct printed ordinal *
2397 *************************************************/
2398
2399 /* This turns a number into "1st", "3rd", etc. */
2400
2401 static char *
2402 ordin(int n)
2403 {
2404 static char buffer[8];
2405 char *p = buffer;
2406 sprintf(p, "%d", n);
2407 while (*p != 0) p++;
2408 switch (n%10)
2409 {
2410 case 1: strcpy(p, "st"); break;
2411 case 2: strcpy(p, "nd"); break;
2412 case 3: strcpy(p, "rd"); break;
2413 default: strcpy(p, "th"); break;
2414 }
2415 return buffer;
2416 }
2417
2418
2419
2420 /*************************************************
2421 * Compile a single pattern *
2422 *************************************************/
2423
2424 /* Do nothing if the pattern has already been compiled. This is the case for
2425 include/exclude patterns read from a file.
2426
2427 When the -F option has been used, each "pattern" may be a list of strings,
2428 separated by line breaks. They will be matched literally. We split such a
2429 string and compile the first substring, inserting an additional block into the
2430 pattern chain.
2431
2432 Arguments:
2433 p points to the pattern block
2434 options the PCRE options
2435 popts the processing options
2436 fromfile TRUE if the pattern was read from a file
2437 fromtext file name or identifying text (e.g. "include")
2438 count 0 if this is the only command line pattern, or
2439 number of the command line pattern, or
2440 linenumber for a pattern from a file
2441
2442 Returns: TRUE on success, FALSE after an error
2443 */
2444
2445 static BOOL
2446 compile_pattern(patstr *p, int options, int popts, int fromfile,
2447 const char *fromtext, int count)
2448 {
2449 char buffer[PATBUFSIZE];
2450 const char *error;
2451 char *ps = p->string;
2452 int patlen = strlen(ps);
2453 int errptr;
2454
2455 if (p->compiled != NULL) return TRUE;
2456
2457 if ((popts & PO_FIXED_STRINGS) != 0)
2458 {
2459 int ellength;
2460 char *eop = ps + patlen;
2461 char *pe = end_of_line(ps, eop, &ellength);
2462
2463 if (ellength != 0)
2464 {
2465 if (add_pattern(pe, p) == NULL) return FALSE;
2466 patlen = (int)(pe - ps - ellength);
2467 }
2468 }
2469
2470 sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2471 p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2472 if (p->compiled != NULL) return TRUE;
2473
2474 /* Handle compile errors */
2475
2476 errptr -= (int)strlen(prefix[popts]);
2477 if (errptr > patlen) errptr = patlen;
2478
2479 if (fromfile)
2480 {
2481 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2482 "at offset %d: %s\n", count, fromtext, errptr, error);
2483 }
2484 else
2485 {
2486 if (count == 0)
2487 fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2488 fromtext, errptr, error);
2489 else
2490 fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2491 ordin(count), fromtext, errptr, error);
2492 }
2493
2494 return FALSE;
2495 }
2496
2497
2498
2499 /*************************************************
2500 * Read and compile a file of patterns *
2501 *************************************************/
2502
2503 /* This is used for --filelist, --include-from, and --exclude-from.
2504
2505 Arguments:
2506 name the name of the file; "-" is stdin
2507 patptr pointer to the pattern chain anchor
2508 patlastptr pointer to the last pattern pointer
2509 popts the process options to pass to pattern_compile()
2510
2511 Returns: TRUE if all went well
2512 */
2513
2514 static BOOL
2515 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2516 {
2517 int linenumber = 0;
2518 FILE *f;
2519 char *filename;
2520 char buffer[PATBUFSIZE];
2521
2522 if (strcmp(name, "-") == 0)
2523 {
2524 f = stdin;
2525 filename = stdin_name;
2526 }
2527 else
2528 {
2529 f = fopen(name, "r");
2530 if (f == NULL)
2531 {
2532 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2533 return FALSE;
2534 }
2535 filename = name;
2536 }
2537
2538 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2539 {
2540 char *s = buffer + (int)strlen(buffer);
2541 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2542 *s = 0;
2543 linenumber++;
2544 if (buffer[0] == 0) continue; /* Skip blank lines */
2545
2546 /* Note: this call to add_pattern() puts a pointer to the local variable
2547 "buffer" into the pattern chain. However, that pointer is used only when
2548 compiling the pattern, which happens immediately below, so we flatten it
2549 afterwards, as a precaution against any later code trying to use it. */
2550
2551 *patlastptr = add_pattern(buffer, *patlastptr);
2552 if (*patlastptr == NULL) return FALSE;
2553 if (*patptr == NULL) *patptr = *patlastptr;
2554
2555 /* This loop is needed because compiling a "pattern" when -F is set may add
2556 on additional literal patterns if the original contains a newline. In the
2557 common case, it never will, because fgets() stops at a newline. However,
2558 the -N option can be used to give pcregrep a different newline setting. */
2559
2560 for(;;)
2561 {
2562 if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2563 linenumber))
2564 return FALSE;
2565 (*patlastptr)->string = NULL; /* Insurance */
2566 if ((*patlastptr)->next == NULL) break;
2567 *patlastptr = (*patlastptr)->next;
2568 }
2569 }
2570
2571 if (f != stdin) fclose(f);
2572 return TRUE;
2573 }
2574
2575
2576
2577 /*************************************************
2578 * Main program *
2579 *************************************************/
2580
2581 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2582
2583 int
2584 main(int argc, char **argv)
2585 {
2586 int i, j;
2587 int rc = 1;
2588 BOOL only_one_at_top;
2589 patstr *cp;
2590 fnstr *fn;
2591 const char *locale_from = "--locale";
2592 const char *error;
2593
2594 #ifdef SUPPORT_PCREGREP_JIT
2595 pcre_jit_stack *jit_stack = NULL;
2596 #endif
2597
2598 /* Set the default line ending value from the default in the PCRE library;
2599 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2600 Note that the return values from pcre_config(), though derived from the ASCII
2601 codes, are the same in EBCDIC environments, so we must use the actual values
2602 rather than escapes such as as '\r'. */
2603
2604 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2605 switch(i)
2606 {
2607 default: newline = (char *)"lf"; break;
2608 case 13: newline = (char *)"cr"; break;
2609 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2610 case -1: newline = (char *)"any"; break;
2611 case -2: newline = (char *)"anycrlf"; break;
2612 }
2613
2614 /* Process the options */
2615
2616 for (i = 1; i < argc; i++)
2617 {
2618 option_item *op = NULL;
2619 char *option_data = (char *)""; /* default to keep compiler happy */
2620 BOOL longop;
2621 BOOL longopwasequals = FALSE;
2622
2623 if (argv[i][0] != '-') break;
2624
2625 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2626 but only if we have previously had -e or -f to define the patterns. */
2627
2628 if (argv[i][1] == 0)
2629 {
2630 if (pattern_files != NULL || patterns != NULL) break;
2631 else pcregrep_exit(usage(2));
2632 }
2633
2634 /* Handle a long name option, or -- to terminate the options */
2635
2636 if (argv[i][1] == '-')
2637 {
2638 char *arg = argv[i] + 2;
2639 char *argequals = strchr(arg, '=');
2640
2641 if (*arg == 0) /* -- terminates options */
2642 {
2643 i++;
2644 break; /* out of the options-handling loop */
2645 }
2646
2647 longop = TRUE;
2648
2649 /* Some long options have data that follows after =, for example file=name.
2650 Some options have variations in the long name spelling: specifically, we
2651 allow "regexp" because GNU grep allows it, though I personally go along
2652 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2653 These options are entered in the table as "regex(p)". Options can be in
2654 both these categories. */
2655
2656 for (op = optionlist; op->one_char != 0; op++)
2657 {
2658 char *opbra = strchr(op->long_name, '(');
2659 char *equals = strchr(op->long_name, '=');
2660
2661 /* Handle options with only one spelling of the name */
2662
2663 if (opbra == NULL) /* Does not contain '(' */
2664 {
2665 if (equals == NULL) /* Not thing=data case */
2666 {
2667 if (strcmp(arg, op->long_name) == 0) break;
2668 }
2669 else /* Special case xxx=data */
2670 {
2671 int oplen = (int)(equals - op->long_name);
2672 int arglen = (argequals == NULL)?
2673 (int)strlen(arg) : (int)(argequals - arg);
2674 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2675 {
2676 option_data = arg + arglen;
2677 if (*option_data == '=')
2678 {
2679 option_data++;
2680 longopwasequals = TRUE;
2681 }
2682 break;
2683 }
2684 }
2685 }
2686
2687 /* Handle options with an alternate spelling of the name */
2688
2689 else
2690 {
2691 char buff1[24];
2692 char buff2[24];
2693
2694 int baselen = (int)(opbra - op->long_name);
2695 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2696 int arglen = (argequals == NULL || equals == NULL)?
2697 (int)strlen(arg) : (int)(argequals - arg);
2698
2699 sprintf(buff1, "%.*s", baselen, op->long_name);
2700 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2701
2702 if (strncmp(arg, buff1, arglen) == 0 ||
2703 strncmp(arg, buff2, arglen) == 0)
2704 {
2705 if (equals != NULL && argequals != NULL)
2706 {
2707 option_data = argequals;
2708 if (*option_data == '=')
2709 {
2710 option_data++;
2711 longopwasequals = TRUE;
2712 }
2713 }
2714 break;
2715 }
2716 }
2717 }
2718
2719 if (op->one_char == 0)
2720 {
2721 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2722 pcregrep_exit(usage(2));
2723 }
2724 }
2725
2726 /* Jeffrey Friedl's debugging harness uses these additional options which
2727 are not in the right form for putting in the option table because they use
2728 only one hyphen, yet are more than one character long. By putting them
2729 separately here, they will not get displayed as part of the help() output,
2730 but I don't think Jeffrey will care about that. */
2731
2732 #ifdef JFRIEDL_DEBUG
2733 else if (strcmp(argv[i], "-pre") == 0) {
2734 jfriedl_prefix = argv[++i];
2735 continue;
2736 } else if (strcmp(argv[i], "-post") == 0) {
2737 jfriedl_postfix = argv[++i];
2738 continue;
2739 } else if (strcmp(argv[i], "-XT") == 0) {
2740 sscanf(argv[++i], "%d", &jfriedl_XT);
2741 continue;
2742 } else if (strcmp(argv[i], "-XR") == 0) {
2743 sscanf(argv[++i], "%d", &jfriedl_XR);
2744 continue;
2745 }
2746 #endif
2747
2748
2749 /* One-char options; many that have no data may be in a single argument; we
2750 continue till we hit the last one or one that needs data. */
2751
2752 else
2753 {
2754 char *s = argv[i] + 1;
2755 longop = FALSE;
2756
2757 while (*s != 0)
2758 {
2759 for (op = optionlist; op->one_char != 0; op++)
2760 {
2761 if (*s == op->one_char) break;
2762 }
2763 if (op->one_char == 0)
2764 {
2765 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2766 *s, argv[i]);
2767 pcregrep_exit(usage(2));
2768 }
2769
2770 option_data = s+1;
2771
2772 /* Break out if this is the last character in the string; it's handled
2773 below like a single multi-char option. */
2774
2775 if (*option_data == 0) break;
2776
2777 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2778 are used for ones that either have a numerical number or defaults, i.e.
2779 the data is optional. If a digit follows, there is data; if not, carry on
2780 with other single-character options in the same string. */
2781
2782 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2783 {
2784 if (isdigit((unsigned char)s[1])) break;
2785 }
2786 else /* Check for an option with data */
2787 {
2788 if (op->type != OP_NODATA) break;
2789 }
2790
2791 /* Handle a single-character option with no data, then loop for the
2792 next character in the string. */
2793
2794 pcre_options = handle_option(*s++, pcre_options);
2795 }
2796 }
2797
2798 /* At this point we should have op pointing to a matched option. If the type
2799 is NO_DATA, it means that there is no data, and the option might set
2800 something in the PCRE options. */
2801
2802 if (op->type == OP_NODATA)
2803 {
2804 pcre_options = handle_option(op->one_char, pcre_options);
2805 continue;
2806 }
2807
2808 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2809 either has a value or defaults to something. It cannot have data in a
2810 separate item. At the moment, the only such options are "colo(u)r",
2811 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2812
2813 if (*option_data == 0 &&
2814 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2815 op->type == OP_OP_NUMBERS))
2816 {
2817 switch (op->one_char)
2818 {
2819 case N_COLOUR:
2820 colour_option = (char *)"auto";
2821 break;
2822
2823 case 'o':
2824 only_matching_last = add_number(0, only_matching_last);
2825 if (only_matching == NULL) only_matching = only_matching_last;
2826 break;
2827
2828 #ifdef JFRIEDL_DEBUG
2829 case 'S':
2830 S_arg = 0;
2831 break;
2832 #endif
2833 }
2834 continue;
2835 }
2836
2837 /* Otherwise, find the data string for the option. */
2838
2839 if (*option_data == 0)
2840 {
2841 if (i >= argc - 1 || longopwasequals)
2842 {
2843 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2844 pcregrep_exit(usage(2));
2845 }
2846 option_data = argv[++i];
2847 }
2848
2849 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2850 added to a chain of numbers. */
2851
2852 if (op->type == OP_OP_NUMBERS)
2853 {
2854 unsigned long int n = decode_number(option_data, op, longop);
2855 omdatastr *omd = (omdatastr *)op->dataptr;
2856 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2857 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2858 }
2859
2860 /* If the option type is OP_PATLIST, it's the -e option, or one of the
2861 include/exclude options, which can be called multiple times to create lists
2862 of patterns. */
2863
2864 else if (op->type == OP_PATLIST)
2865 {
2866 patdatastr *pd = (patdatastr *)op->dataptr;
2867 *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2868 if (*(pd->lastptr) == NULL) goto EXIT2;
2869 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2870 }
2871
2872 /* If the option type is OP_FILELIST, it's one of the options that names a
2873 file. */
2874
2875 else if (op->type == OP_FILELIST)
2876 {
2877 fndatastr *fd = (fndatastr *)op->dataptr;
2878 fn = (fnstr *)malloc(sizeof(fnstr));
2879 if (fn == NULL)
2880 {
2881 fprintf(stderr, "pcregrep: malloc failed\n");
2882 goto EXIT2;
2883 }
2884 fn->next = NULL;
2885 fn->name = option_data;
2886 if (*(fd->anchor) == NULL)
2887 *(fd->anchor) = fn;
2888 else
2889 (*(fd->lastptr))->next = fn;
2890 *(fd->lastptr) = fn;
2891 }
2892
2893 /* Handle OP_BINARY_FILES */
2894
2895 else if (op->type == OP_BINFILES)
2896 {
2897 if (strcmp(option_data, "binary") == 0)
2898 binary_files = BIN_BINARY;
2899 else if (strcmp(option_data, "without-match") == 0)
2900 binary_files = BIN_NOMATCH;
2901 else if (strcmp(option_data, "text") == 0)
2902 binary_files = BIN_TEXT;
2903 else
2904 {
2905 fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2906 option_data);
2907 pcregrep_exit(usage(2));
2908 }
2909 }
2910
2911 /* Otherwise, deal with a single string or numeric data value. */
2912
2913 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2914 op->type != OP_OP_NUMBER)
2915 {
2916 *((char **)op->dataptr) = option_data;
2917 }
2918 else
2919 {
2920 unsigned long int n = decode_number(option_data, op, longop);
2921 if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
2922 else *((int *)op->dataptr) = n;
2923 }
2924 }
2925
2926 /* Options have been decoded. If -C was used, its value is used as a default
2927 for -A and -B. */
2928
2929 if (both_context > 0)
2930 {
2931 if (after_context == 0) after_context = both_context;
2932 if (before_context == 0) before_context = both_context;
2933 }
2934
2935 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2936 However, all three set show_only_matching because they display, each in their
2937 own way, only the data that has matched. */
2938
2939 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
2940 (file_offsets && line_offsets))
2941 {
2942 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2943 "and/or --line-offsets\n");
2944 pcregrep_exit(usage(2));
2945 }
2946
2947 if (only_matching != NULL || file_offsets || line_offsets)
2948 show_only_matching = TRUE;
2949
2950 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2951 LC_ALL environment variable is set, and if so, use it. */
2952
2953 if (locale == NULL)
2954 {
2955 locale = getenv("LC_ALL");
2956 locale_from = "LCC_ALL";
2957 }
2958
2959 if (locale == NULL)
2960 {
2961 locale = getenv("LC_CTYPE");
2962 locale_from = "LC_CTYPE";
2963 }
2964
2965 /* If a locale has been provided, set it, and generate the tables the PCRE
2966 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2967
2968 if (locale != NULL)
2969 {
2970 if (setlocale(LC_CTYPE, locale) == NULL)
2971 {
2972 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2973 locale, locale_from);
2974 return 2;
2975 }
2976 pcretables = pcre_maketables();
2977 }
2978
2979 /* Sort out colouring */
2980
2981 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2982 {
2983 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2984 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2985 else
2986 {
2987 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2988 colour_option);
2989 return 2;
2990 }
2991 if (do_colour)
2992 {
2993 char *cs = getenv("PCREGREP_COLOUR");
2994 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2995 if (cs != NULL) colour_string = cs;
2996 }
2997 }
2998
2999 /* Interpret the newline type; the default settings are Unix-like. */
3000
3001 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
3002 {
3003 pcre_options |= PCRE_NEWLINE_CR;
3004 endlinetype = EL_CR;
3005 }
3006 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
3007 {
3008 pcre_options |= PCRE_NEWLINE_LF;
3009 endlinetype = EL_LF;
3010 }
3011 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
3012 {
3013 pcre_options |= PCRE_NEWLINE_CRLF;
3014 endlinetype = EL_CRLF;
3015 }
3016 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
3017 {
3018 pcre_options |= PCRE_NEWLINE_ANY;
3019 endlinetype = EL_ANY;
3020 }
3021 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
3022 {
3023 pcre_options |= PCRE_NEWLINE_ANYCRLF;
3024 endlinetype = EL_ANYCRLF;
3025 }
3026 else
3027 {
3028 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
3029 return 2;
3030 }
3031
3032 /* Interpret the text values for -d and -D */
3033
3034 if (dee_option != NULL)
3035 {
3036 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
3037 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
3038 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
3039 else
3040 {
3041 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
3042 return 2;
3043 }
3044 }
3045
3046 if (DEE_option != NULL)
3047 {
3048 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
3049 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
3050 else
3051 {
3052 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
3053 return 2;
3054 }
3055 }
3056
3057 /* Check the values for Jeffrey Friedl's debugging options. */
3058
3059 #ifdef JFRIEDL_DEBUG
3060 if (S_arg > 9)
3061 {
3062 fprintf(stderr, "pcregrep: bad value for -S option\n");
3063 return 2;
3064 }
3065 if (jfriedl_XT != 0 || jfriedl_XR != 0)
3066 {
3067 if (jfriedl_XT == 0) jfriedl_XT = 1;
3068 if (jfriedl_XR == 0) jfriedl_XR = 1;
3069 }
3070 #endif
3071
3072 /* Get memory for the main buffer. */
3073
3074 bufsize = 3*bufthird;
3075 main_buffer = (char *)malloc(bufsize);
3076
3077 if (main_buffer == NULL)
3078 {
3079 fprintf(stderr, "pcregrep: malloc failed\n");
3080 goto EXIT2;
3081 }
3082
3083 /* If no patterns were provided by -e, and there are no files provided by -f,
3084 the first argument is the one and only pattern, and it must exist. */
3085
3086 if (patterns == NULL && pattern_files == NULL)
3087 {
3088 if (i >= argc) return usage(2);
3089 patterns = patterns_last = add_pattern(argv[i++], NULL);
3090 if (patterns == NULL) goto EXIT2;
3091 }
3092
3093 /* Compile the patterns that were provided on the command line, either by
3094 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3095 after all the command-line options are read so that we know which PCRE options
3096 to use. When -F is used, compile_pattern() may add another block into the
3097 chain, so we must not access the next pointer till after the compile. */
3098
3099 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3100 {
3101 if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3102 (j == 1 && patterns->next == NULL)? 0 : j))
3103 goto EXIT2;
3104 }
3105
3106 /* Read and compile the regular expressions that are provided in files. */
3107
3108 for (fn = pattern_files; fn != NULL; fn = fn->next)
3109 {
3110 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3111 goto EXIT2;
3112 }
3113
3114 /* Study the regular expressions, as we will be running them many times. If an
3115 extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3116 returned, even if studying produces no data. */
3117
3118 if (match_limit > 0 || match_limit_recursion > 0)
3119 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3120
3121 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3122
3123 #ifdef SUPPORT_PCREGREP_JIT
3124 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3125 jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3126 #endif
3127
3128 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3129 {
3130 cp->hint = pcre_study(cp->compiled, study_options, &error);
3131 if (error != NULL)
3132 {
3133 char s[16];
3134 if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
3135 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
3136 goto EXIT2;
3137 }
3138 #ifdef SUPPORT_PCREGREP_JIT
3139 if (jit_stack != NULL && cp->hint != NULL)
3140 pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3141 #endif
3142 }
3143
3144 /* If --match-limit or --recursion-limit was set, put the value(s) into the
3145 pcre_extra block for each pattern. There will always be an extra block because
3146 of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3147
3148 for (cp = patterns; cp != NULL; cp = cp->next)
3149 {
3150 if (match_limit > 0)
3151 {
3152 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3153 cp->hint->match_limit = match_limit;
3154 }
3155
3156 if (match_limit_recursion > 0)
3157 {
3158 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3159 cp->hint->match_limit_recursion = match_limit_recursion;
3160 }
3161 }
3162
3163 /* If there are include or exclude patterns read from the command line, compile
3164 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3165 0. */
3166
3167 for (j = 0; j < 4; j++)
3168 {
3169 int k;
3170 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3171 {
3172 if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3173 (k == 1 && cp->next == NULL)? 0 : k))
3174 goto EXIT2;
3175 }
3176 }
3177
3178 /* Read and compile include/exclude patterns from files. */
3179
3180 for (fn = include_from; fn != NULL; fn = fn->next)
3181 {
3182 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3183 goto EXIT2;
3184 }
3185
3186 for (fn = exclude_from; fn != NULL; fn = fn->next)
3187 {
3188 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3189 goto EXIT2;
3190 }
3191
3192 /* If there are no files that contain lists of files to search, and there are
3193 no file arguments, search stdin, and then exit. */
3194
3195 if (file_lists == NULL && i >= argc)
3196 {
3197 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3198 (filenames > FN_DEFAULT)? stdin_name : NULL);
3199 goto EXIT;
3200 }
3201
3202 /* If any files that contains a list of files to search have been specified,
3203 read them line by line and search the given files. */
3204
3205 for (fn = file_lists; fn != NULL; fn = fn->next)
3206 {
3207 char buffer[PATBUFSIZE];
3208 FILE *fl;
3209 if (strcmp(fn->name, "-") == 0) fl = stdin; else
3210 {
3211 fl = fopen(fn->name, "rb");
3212 if (fl == NULL)
3213 {
3214 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3215 strerror(errno));
3216 goto EXIT2;
3217 }
3218 }
3219 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3220 {
3221 int frc;
3222 char *end = buffer + (int)strlen(buffer);
3223 while (end > buffer && isspace(end[-1])) end--;
3224 *end = 0;
3225 if (*buffer != 0)
3226 {
3227 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3228 if (frc > 1) rc = frc;
3229 else if (frc == 0 && rc == 1) rc = 0;
3230 }
3231 }
3232 if (fl != stdin) fclose(fl);
3233 }
3234
3235 /* After handling file-list, work through remaining arguments. Pass in the fact
3236 that there is only one argument at top level - this suppresses the file name if
3237 the argument is not a directory and filenames are not otherwise forced. */
3238
3239 only_one_at_top = i == argc - 1 && file_lists == NULL;
3240
3241 for (; i < argc; i++)
3242 {
3243 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3244 only_one_at_top);
3245 if (frc > 1) rc = frc;
3246 else if (frc == 0 && rc == 1) rc = 0;
3247 }
3248
3249 EXIT:
3250 #ifdef SUPPORT_PCREGREP_JIT
3251 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3252 #endif
3253
3254 if (main_buffer != NULL) free(main_buffer);
3255
3256 free_pattern_chain(patterns);
3257 free_pattern_chain(include_patterns);
3258 free_pattern_chain(include_dir_patterns);
3259 free_pattern_chain(exclude_patterns);
3260 free_pattern_chain(exclude_dir_patterns);
3261
3262 free_file_chain(exclude_from);
3263 free_file_chain(include_from);
3264 free_file_chain(pattern_files);
3265 free_file_chain(file_lists);
3266
3267 while (only_matching != NULL)
3268 {
3269 omstr *this = only_matching;
3270 only_matching = this->next;
3271 free(this);
3272 }
3273
3274 pcregrep_exit(rc);
3275
3276 EXIT2:
3277 rc = 2;
3278 goto EXIT;
3279 }
3280
3281 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12