/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1003 - (show annotations) (download)
Wed Aug 15 16:45:36 2012 UTC (23 months, 2 weeks ago) by ph10
File MIME type: text/plain
File size: 88701 byte(s)
Fix a number of issues in pcregrep.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2012 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define OFFSET_SIZE 99
74
75 #if BUFSIZ > 8192
76 #define MAXPATLEN BUFSIZ
77 #else
78 #define MAXPATLEN 8192
79 #endif
80
81 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
82
83 /* Values for the "filenames" variable, which specifies options for file name
84 output. The order is important; it is assumed that a file name is wanted for
85 all values greater than FN_DEFAULT. */
86
87 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
88
89 /* File reading styles */
90
91 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
92
93 /* Actions for the -d and -D options */
94
95 enum { dee_READ, dee_SKIP, dee_RECURSE };
96 enum { DEE_READ, DEE_SKIP };
97
98 /* Actions for special processing options (flag bits) */
99
100 #define PO_WORD_MATCH 0x0001
101 #define PO_LINE_MATCH 0x0002
102 #define PO_FIXED_STRINGS 0x0004
103
104 /* Line ending types */
105
106 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107
108 /* Binary file options */
109
110 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
111
112 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
113 environments), a warning is issued if the value of fwrite() is ignored.
114 Unfortunately, casting to (void) does not suppress the warning. To get round
115 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
116 apply to fprintf(). */
117
118 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
119
120
121
122 /*************************************************
123 * Global variables *
124 *************************************************/
125
126 /* Jeffrey Friedl has some debugging requirements that are not part of the
127 regular code. */
128
129 #ifdef JFRIEDL_DEBUG
130 static int S_arg = -1;
131 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
132 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
133 static const char *jfriedl_prefix = "";
134 static const char *jfriedl_postfix = "";
135 #endif
136
137 static int endlinetype;
138
139 static char *colour_string = (char *)"1;31";
140 static char *colour_option = NULL;
141 static char *dee_option = NULL;
142 static char *DEE_option = NULL;
143 static char *locale = NULL;
144 static char *main_buffer = NULL;
145 static char *newline = NULL;
146 static char *stdin_name = (char *)"(standard input)";
147
148 static const unsigned char *pcretables = NULL;
149
150 static int after_context = 0;
151 static int before_context = 0;
152 static int binary_files = BIN_BINARY;
153 static int both_context = 0;
154 static int bufthird = PCREGREP_BUFSIZE;
155 static int bufsize = 3*PCREGREP_BUFSIZE;
156
157 #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
158 static int dee_action = dee_SKIP;
159 #else
160 static int dee_action = dee_READ;
161 #endif
162
163 static int DEE_action = DEE_READ;
164 static int error_count = 0;
165 static int filenames = FN_DEFAULT;
166 static int only_matching = -1;
167 static int pcre_options = 0;
168 static int process_options = 0;
169
170 #ifdef SUPPORT_PCREGREP_JIT
171 static int study_options = PCRE_STUDY_JIT_COMPILE;
172 #else
173 static int study_options = 0;
174 #endif
175
176 static unsigned long int match_limit = 0;
177 static unsigned long int match_limit_recursion = 0;
178
179 static BOOL count_only = FALSE;
180 static BOOL do_colour = FALSE;
181 static BOOL file_offsets = FALSE;
182 static BOOL hyphenpending = FALSE;
183 static BOOL invert = FALSE;
184 static BOOL line_buffered = FALSE;
185 static BOOL line_offsets = FALSE;
186 static BOOL multiline = FALSE;
187 static BOOL number = FALSE;
188 static BOOL omit_zero_count = FALSE;
189 static BOOL resource_error = FALSE;
190 static BOOL quiet = FALSE;
191 static BOOL silent = FALSE;
192 static BOOL utf8 = FALSE;
193
194 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
195
196 typedef struct fnstr {
197 struct fnstr *next;
198 char *name;
199 } fnstr;
200
201 static fnstr *exclude_from = NULL;
202 static fnstr *exclude_from_last = NULL;
203 static fnstr *include_from = NULL;
204 static fnstr *include_from_last = NULL;
205
206 static fnstr *file_lists = NULL;
207 static fnstr *file_lists_last = NULL;
208 static fnstr *pattern_files = NULL;
209 static fnstr *pattern_files_last = NULL;
210
211 /* Structure for holding the two variables that describe a file name chain. */
212
213 typedef struct fndatastr {
214 fnstr **anchor;
215 fnstr **lastptr;
216 } fndatastr;
217
218 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
219 static fndatastr include_from_data = { &include_from, &include_from_last };
220 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
221 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
222
223 /* Structure for pattern and its compiled form; used for matching patterns and
224 also for include/exclude patterns. */
225
226 typedef struct patstr {
227 struct patstr *next;
228 char *string;
229 pcre *compiled;
230 pcre_extra *hint;
231 } patstr;
232
233 static patstr *patterns = NULL;
234 static patstr *patterns_last = NULL;
235 static patstr *include_patterns = NULL;
236 static patstr *include_patterns_last = NULL;
237 static patstr *exclude_patterns = NULL;
238 static patstr *exclude_patterns_last = NULL;
239 static patstr *include_dir_patterns = NULL;
240 static patstr *include_dir_patterns_last = NULL;
241 static patstr *exclude_dir_patterns = NULL;
242 static patstr *exclude_dir_patterns_last = NULL;
243
244 /* Structure holding the two variables that describe a pattern chain. A pointer
245 to such structures is used for each appropriate option. */
246
247 typedef struct patdatastr {
248 patstr **anchor;
249 patstr **lastptr;
250 } patdatastr;
251
252 static patdatastr match_patdata = { &patterns, &patterns_last };
253 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
254 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
255 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
256 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
257
258 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
259 &include_dir_patterns, &exclude_dir_patterns };
260
261 static const char *incexname[4] = { "--include", "--exclude",
262 "--include-dir", "--exclude-dir" };
263
264 /* Structure for options and list of them */
265
266 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
267 OP_OP_NUMBER, OP_PATLIST, OP_FILELIST, OP_BINFILES };
268
269 typedef struct option_item {
270 int type;
271 int one_char;
272 void *dataptr;
273 const char *long_name;
274 const char *help_text;
275 } option_item;
276
277 /* Options without a single-letter equivalent get a negative value. This can be
278 used to identify them. */
279
280 #define N_COLOUR (-1)
281 #define N_EXCLUDE (-2)
282 #define N_EXCLUDE_DIR (-3)
283 #define N_HELP (-4)
284 #define N_INCLUDE (-5)
285 #define N_INCLUDE_DIR (-6)
286 #define N_LABEL (-7)
287 #define N_LOCALE (-8)
288 #define N_NULL (-9)
289 #define N_LOFFSETS (-10)
290 #define N_FOFFSETS (-11)
291 #define N_LBUFFER (-12)
292 #define N_M_LIMIT (-13)
293 #define N_M_LIMIT_REC (-14)
294 #define N_BUFSIZE (-15)
295 #define N_NOJIT (-16)
296 #define N_FILE_LIST (-17)
297 #define N_BINARY_FILES (-18)
298 #define N_EXCLUDE_FROM (-19)
299 #define N_INCLUDE_FROM (-20)
300
301 static option_item optionlist[] = {
302 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
303 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
304 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
305 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
306 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
307 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
308 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
309 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
310 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
311 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
312 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
313 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
314 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
315 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
316 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
317 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
318 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
319 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
320 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
321 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
322 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
323 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
324 #ifdef SUPPORT_PCREGREP_JIT
325 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
326 #else
327 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
328 #endif
329 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
330 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
331 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
332 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
333 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
334 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
335 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
336 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
337 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
338 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
339 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
340 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
341 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
342 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
343 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
344 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
345 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
346 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
347 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
348 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
349
350 /* These two were accidentally implemented with underscores instead of
351 hyphens in the option names. As this was not discovered for several releases,
352 the incorrect versions are left in the table for compatibility. However, the
353 --help function misses out any option that has an underscore in its name. */
354
355 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
356 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
357
358 #ifdef JFRIEDL_DEBUG
359 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
360 #endif
361 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
362 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
363 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
364 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
365 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
366 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
367 { OP_NODATA, 0, NULL, NULL, NULL }
368 };
369
370 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
371 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
372 that the combination of -w and -x has the same effect as -x on its own, so we
373 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
374 prefix+suffix is 10 characters; if anything longer is added, it must be
375 adjusted. */
376
377 static const char *prefix[] = {
378 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
379
380 static const char *suffix[] = {
381 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
382
383 /* UTF-8 tables - used only when the newline setting is "any". */
384
385 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
386
387 const char utf8_table4[] = {
388 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
389 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
390 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
391 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
392
393
394
395 /*************************************************
396 * Add item to chain of patterns *
397 *************************************************/
398
399 /* Used to add an item onto a chain, or just return an unconnected item if the
400 "after" argument is NULL.
401
402 Arguments:
403 s pattern string to add
404 after if not NULL points to item to insert after
405
406 Returns: new pattern block, or NULL after malloc failure
407 */
408
409 static patstr *
410 add_pattern(char *s, patstr *after)
411 {
412 patstr *p = (patstr *)malloc(sizeof(patstr));
413 if (p == NULL)
414 {
415 fprintf(stderr, "pcregrep: malloc failed\n");
416 return NULL;
417 }
418 if (strlen(s) > MAXPATLEN)
419 {
420 fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
421 MAXPATLEN);
422 return NULL;
423 }
424 p->next = NULL;
425 p->string = s;
426 p->compiled = NULL;
427 p->hint = NULL;
428
429 if (after != NULL)
430 {
431 p->next = after->next;
432 after->next = p;
433 }
434 return p;
435 }
436
437
438 /*************************************************
439 * Free chain of patterns *
440 *************************************************/
441
442 /* Used for several chains of patterns.
443
444 Argument: pointer to start of chain
445 Returns: nothing
446 */
447
448 static void
449 free_pattern_chain(patstr *pc)
450 {
451 while (pc != NULL)
452 {
453 patstr *p = pc;
454 pc = p->next;
455 if (p->hint != NULL) pcre_free_study(p->hint);
456 if (p->compiled != NULL) pcre_free(p->compiled);
457 free(p);
458 }
459 }
460
461
462 /*************************************************
463 * Free chain of file names *
464 *************************************************/
465
466 /*
467 Argument: pointer to start of chain
468 Returns: nothing
469 */
470
471 static void
472 free_file_chain(fnstr *fn)
473 {
474 while (fn != NULL)
475 {
476 fnstr *f = fn;
477 fn = f->next;
478 free(f);
479 }
480 }
481
482
483 /*************************************************
484 * Exit from the program *
485 *************************************************/
486
487 /* If there has been a resource error, give a suitable message.
488
489 Argument: the return code
490 Returns: does not return
491 */
492
493 static void
494 pcregrep_exit(int rc)
495 {
496 if (resource_error)
497 {
498 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
499 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
500 PCRE_ERROR_JIT_STACKLIMIT);
501 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
502 }
503
504 exit(rc);
505 }
506
507
508 /*************************************************
509 * OS-specific functions *
510 *************************************************/
511
512 /* These functions are defined so that they can be made system specific,
513 although at present the only ones are for Unix, Win32, and for "no support". */
514
515
516 /************* Directory scanning in Unix ***********/
517
518 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
519 #include <sys/types.h>
520 #include <sys/stat.h>
521 #include <dirent.h>
522
523 typedef DIR directory_type;
524 #define FILESEP '/'
525
526 static int
527 isdirectory(char *filename)
528 {
529 struct stat statbuf;
530 if (stat(filename, &statbuf) < 0)
531 return 0; /* In the expectation that opening as a file will fail */
532 return (statbuf.st_mode & S_IFMT) == S_IFDIR;
533 }
534
535 static directory_type *
536 opendirectory(char *filename)
537 {
538 return opendir(filename);
539 }
540
541 static char *
542 readdirectory(directory_type *dir)
543 {
544 for (;;)
545 {
546 struct dirent *dent = readdir(dir);
547 if (dent == NULL) return NULL;
548 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
549 return dent->d_name;
550 }
551 /* Control never reaches here */
552 }
553
554 static void
555 closedirectory(directory_type *dir)
556 {
557 closedir(dir);
558 }
559
560
561 /************* Test for regular file in Unix **********/
562
563 static int
564 isregfile(char *filename)
565 {
566 struct stat statbuf;
567 if (stat(filename, &statbuf) < 0)
568 return 1; /* In the expectation that opening as a file will fail */
569 return (statbuf.st_mode & S_IFMT) == S_IFREG;
570 }
571
572
573 /************* Test for a terminal in Unix **********/
574
575 static BOOL
576 is_stdout_tty(void)
577 {
578 return isatty(fileno(stdout));
579 }
580
581 static BOOL
582 is_file_tty(FILE *f)
583 {
584 return isatty(fileno(f));
585 }
586
587
588 /************* Directory scanning in Win32 ***********/
589
590 /* I (Philip Hazel) have no means of testing this code. It was contributed by
591 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
592 when it did not exist. David Byron added a patch that moved the #include of
593 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
594 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
595 undefined when it is indeed undefined. */
596
597 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
598
599 #ifndef STRICT
600 # define STRICT
601 #endif
602 #ifndef WIN32_LEAN_AND_MEAN
603 # define WIN32_LEAN_AND_MEAN
604 #endif
605
606 #include <windows.h>
607
608 #ifndef INVALID_FILE_ATTRIBUTES
609 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
610 #endif
611
612 typedef struct directory_type
613 {
614 HANDLE handle;
615 BOOL first;
616 WIN32_FIND_DATA data;
617 } directory_type;
618
619 #DEFINE FILESEP '/'
620
621 int
622 isdirectory(char *filename)
623 {
624 DWORD attr = GetFileAttributes(filename);
625 if (attr == INVALID_FILE_ATTRIBUTES)
626 return 0;
627 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
628 }
629
630 directory_type *
631 opendirectory(char *filename)
632 {
633 size_t len;
634 char *pattern;
635 directory_type *dir;
636 DWORD err;
637 len = strlen(filename);
638 pattern = (char *)malloc(len + 3);
639 dir = (directory_type *)malloc(sizeof(*dir));
640 if ((pattern == NULL) || (dir == NULL))
641 {
642 fprintf(stderr, "pcregrep: malloc failed\n");
643 pcregrep_exit(2);
644 }
645 memcpy(pattern, filename, len);
646 memcpy(&(pattern[len]), "\\*", 3);
647 dir->handle = FindFirstFile(pattern, &(dir->data));
648 if (dir->handle != INVALID_HANDLE_VALUE)
649 {
650 free(pattern);
651 dir->first = TRUE;
652 return dir;
653 }
654 err = GetLastError();
655 free(pattern);
656 free(dir);
657 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
658 return NULL;
659 }
660
661 char *
662 readdirectory(directory_type *dir)
663 {
664 for (;;)
665 {
666 if (!dir->first)
667 {
668 if (!FindNextFile(dir->handle, &(dir->data)))
669 return NULL;
670 }
671 else
672 {
673 dir->first = FALSE;
674 }
675 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
676 return dir->data.cFileName;
677 }
678 #ifndef _MSC_VER
679 return NULL; /* Keep compiler happy; never executed */
680 #endif
681 }
682
683 void
684 closedirectory(directory_type *dir)
685 {
686 FindClose(dir->handle);
687 free(dir);
688 }
689
690
691 /************* Test for regular file in Win32 **********/
692
693 /* I don't know how to do this, or if it can be done; assume all paths are
694 regular if they are not directories. */
695
696 int isregfile(char *filename)
697 {
698 return !isdirectory(filename);
699 }
700
701
702 /************* Test for a terminal in Win32 **********/
703
704 /* I don't know how to do this; assume never */
705
706 static BOOL
707 is_stdout_tty(void)
708 {
709 return FALSE;
710 }
711
712 static BOOL
713 is_file_tty(FILE *f)
714 {
715 return FALSE;
716 }
717
718
719 /************* Directory scanning when we can't do it ***********/
720
721 /* The type is void, and apart from isdirectory(), the functions do nothing. */
722
723 #else
724
725 #DEFINE FILESEP 0;
726 typedef void directory_type;
727
728 int isdirectory(char *filename) { return 0; }
729 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
730 char *readdirectory(directory_type *dir) { return (char*)0;}
731 void closedirectory(directory_type *dir) {}
732
733
734 /************* Test for regular when we can't do it **********/
735
736 /* Assume all files are regular. */
737
738 int isregfile(char *filename) { return 1; }
739
740
741 /************* Test for a terminal when we can't do it **********/
742
743 static BOOL
744 is_stdout_tty(void)
745 {
746 return FALSE;
747 }
748
749 static BOOL
750 is_file_tty(FILE *f)
751 {
752 return FALSE;
753 }
754
755 #endif
756
757
758
759 #ifndef HAVE_STRERROR
760 /*************************************************
761 * Provide strerror() for non-ANSI libraries *
762 *************************************************/
763
764 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
765 in their libraries, but can provide the same facility by this simple
766 alternative function. */
767
768 extern int sys_nerr;
769 extern char *sys_errlist[];
770
771 char *
772 strerror(int n)
773 {
774 if (n < 0 || n >= sys_nerr) return "unknown error number";
775 return sys_errlist[n];
776 }
777 #endif /* HAVE_STRERROR */
778
779
780
781 /*************************************************
782 * Test exclude/includes *
783 *************************************************/
784
785 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
786 there are no includes, the path must match an include pattern.
787
788 Arguments:
789 path the path to be matched
790 ip the chain of include patterns
791 ep the chain of exclude patterns
792
793 Returns: TRUE if the path is not excluded
794 */
795
796 static BOOL
797 test_incexc(char *path, patstr *ip, patstr *ep)
798 {
799 int plen = strlen(path);
800
801 for (; ep != NULL; ep = ep->next)
802 {
803 if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
804 return FALSE;
805 }
806
807 if (ip == NULL) return TRUE;
808
809 for (; ip != NULL; ip = ip->next)
810 {
811 if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
812 return TRUE;
813 }
814
815 return FALSE;
816 }
817
818
819
820 /*************************************************
821 * Read one line of input *
822 *************************************************/
823
824 /* Normally, input is read using fread() into a large buffer, so many lines may
825 be read at once. However, doing this for tty input means that no output appears
826 until a lot of input has been typed. Instead, tty input is handled line by
827 line. We cannot use fgets() for this, because it does not stop at a binary
828 zero, and therefore there is no way of telling how many characters it has read,
829 because there may be binary zeros embedded in the data.
830
831 Arguments:
832 buffer the buffer to read into
833 length the maximum number of characters to read
834 f the file
835
836 Returns: the number of characters read, zero at end of file
837 */
838
839 static unsigned int
840 read_one_line(char *buffer, int length, FILE *f)
841 {
842 int c;
843 int yield = 0;
844 while ((c = fgetc(f)) != EOF)
845 {
846 buffer[yield++] = c;
847 if (c == '\n' || yield >= length) break;
848 }
849 return yield;
850 }
851
852
853
854 /*************************************************
855 * Find end of line *
856 *************************************************/
857
858 /* The length of the endline sequence that is found is set via lenptr. This may
859 be zero at the very end of the file if there is no line-ending sequence there.
860
861 Arguments:
862 p current position in line
863 endptr end of available data
864 lenptr where to put the length of the eol sequence
865
866 Returns: pointer after the last byte of the line,
867 including the newline byte(s)
868 */
869
870 static char *
871 end_of_line(char *p, char *endptr, int *lenptr)
872 {
873 switch(endlinetype)
874 {
875 default: /* Just in case */
876 case EL_LF:
877 while (p < endptr && *p != '\n') p++;
878 if (p < endptr)
879 {
880 *lenptr = 1;
881 return p + 1;
882 }
883 *lenptr = 0;
884 return endptr;
885
886 case EL_CR:
887 while (p < endptr && *p != '\r') p++;
888 if (p < endptr)
889 {
890 *lenptr = 1;
891 return p + 1;
892 }
893 *lenptr = 0;
894 return endptr;
895
896 case EL_CRLF:
897 for (;;)
898 {
899 while (p < endptr && *p != '\r') p++;
900 if (++p >= endptr)
901 {
902 *lenptr = 0;
903 return endptr;
904 }
905 if (*p == '\n')
906 {
907 *lenptr = 2;
908 return p + 1;
909 }
910 }
911 break;
912
913 case EL_ANYCRLF:
914 while (p < endptr)
915 {
916 int extra = 0;
917 register int c = *((unsigned char *)p);
918
919 if (utf8 && c >= 0xc0)
920 {
921 int gcii, gcss;
922 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
923 gcss = 6*extra;
924 c = (c & utf8_table3[extra]) << gcss;
925 for (gcii = 1; gcii <= extra; gcii++)
926 {
927 gcss -= 6;
928 c |= (p[gcii] & 0x3f) << gcss;
929 }
930 }
931
932 p += 1 + extra;
933
934 switch (c)
935 {
936 case 0x0a: /* LF */
937 *lenptr = 1;
938 return p;
939
940 case 0x0d: /* CR */
941 if (p < endptr && *p == 0x0a)
942 {
943 *lenptr = 2;
944 p++;
945 }
946 else *lenptr = 1;
947 return p;
948
949 default:
950 break;
951 }
952 } /* End of loop for ANYCRLF case */
953
954 *lenptr = 0; /* Must have hit the end */
955 return endptr;
956
957 case EL_ANY:
958 while (p < endptr)
959 {
960 int extra = 0;
961 register int c = *((unsigned char *)p);
962
963 if (utf8 && c >= 0xc0)
964 {
965 int gcii, gcss;
966 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
967 gcss = 6*extra;
968 c = (c & utf8_table3[extra]) << gcss;
969 for (gcii = 1; gcii <= extra; gcii++)
970 {
971 gcss -= 6;
972 c |= (p[gcii] & 0x3f) << gcss;
973 }
974 }
975
976 p += 1 + extra;
977
978 switch (c)
979 {
980 case 0x0a: /* LF */
981 case 0x0b: /* VT */
982 case 0x0c: /* FF */
983 *lenptr = 1;
984 return p;
985
986 case 0x0d: /* CR */
987 if (p < endptr && *p == 0x0a)
988 {
989 *lenptr = 2;
990 p++;
991 }
992 else *lenptr = 1;
993 return p;
994
995 case 0x85: /* NEL */
996 *lenptr = utf8? 2 : 1;
997 return p;
998
999 case 0x2028: /* LS */
1000 case 0x2029: /* PS */
1001 *lenptr = 3;
1002 return p;
1003
1004 default:
1005 break;
1006 }
1007 } /* End of loop for ANY case */
1008
1009 *lenptr = 0; /* Must have hit the end */
1010 return endptr;
1011 } /* End of overall switch */
1012 }
1013
1014
1015
1016 /*************************************************
1017 * Find start of previous line *
1018 *************************************************/
1019
1020 /* This is called when looking back for before lines to print.
1021
1022 Arguments:
1023 p start of the subsequent line
1024 startptr start of available data
1025
1026 Returns: pointer to the start of the previous line
1027 */
1028
1029 static char *
1030 previous_line(char *p, char *startptr)
1031 {
1032 switch(endlinetype)
1033 {
1034 default: /* Just in case */
1035 case EL_LF:
1036 p--;
1037 while (p > startptr && p[-1] != '\n') p--;
1038 return p;
1039
1040 case EL_CR:
1041 p--;
1042 while (p > startptr && p[-1] != '\n') p--;
1043 return p;
1044
1045 case EL_CRLF:
1046 for (;;)
1047 {
1048 p -= 2;
1049 while (p > startptr && p[-1] != '\n') p--;
1050 if (p <= startptr + 1 || p[-2] == '\r') return p;
1051 }
1052 return p; /* But control should never get here */
1053
1054 case EL_ANY:
1055 case EL_ANYCRLF:
1056 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1057 if (utf8) while ((*p & 0xc0) == 0x80) p--;
1058
1059 while (p > startptr)
1060 {
1061 register int c;
1062 char *pp = p - 1;
1063
1064 if (utf8)
1065 {
1066 int extra = 0;
1067 while ((*pp & 0xc0) == 0x80) pp--;
1068 c = *((unsigned char *)pp);
1069 if (c >= 0xc0)
1070 {
1071 int gcii, gcss;
1072 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1073 gcss = 6*extra;
1074 c = (c & utf8_table3[extra]) << gcss;
1075 for (gcii = 1; gcii <= extra; gcii++)
1076 {
1077 gcss -= 6;
1078 c |= (pp[gcii] & 0x3f) << gcss;
1079 }
1080 }
1081 }
1082 else c = *((unsigned char *)pp);
1083
1084 if (endlinetype == EL_ANYCRLF) switch (c)
1085 {
1086 case 0x0a: /* LF */
1087 case 0x0d: /* CR */
1088 return p;
1089
1090 default:
1091 break;
1092 }
1093
1094 else switch (c)
1095 {
1096 case 0x0a: /* LF */
1097 case 0x0b: /* VT */
1098 case 0x0c: /* FF */
1099 case 0x0d: /* CR */
1100 case 0x85: /* NEL */
1101 case 0x2028: /* LS */
1102 case 0x2029: /* PS */
1103 return p;
1104
1105 default:
1106 break;
1107 }
1108
1109 p = pp; /* Back one character */
1110 } /* End of loop for ANY case */
1111
1112 return startptr; /* Hit start of data */
1113 } /* End of overall switch */
1114 }
1115
1116
1117
1118
1119
1120 /*************************************************
1121 * Print the previous "after" lines *
1122 *************************************************/
1123
1124 /* This is called if we are about to lose said lines because of buffer filling,
1125 and at the end of the file. The data in the line is written using fwrite() so
1126 that a binary zero does not terminate it.
1127
1128 Arguments:
1129 lastmatchnumber the number of the last matching line, plus one
1130 lastmatchrestart where we restarted after the last match
1131 endptr end of available data
1132 printname filename for printing
1133
1134 Returns: nothing
1135 */
1136
1137 static void
1138 do_after_lines(int lastmatchnumber, char *lastmatchrestart, char *endptr,
1139 char *printname)
1140 {
1141 if (after_context > 0 && lastmatchnumber > 0)
1142 {
1143 int count = 0;
1144 while (lastmatchrestart < endptr && count++ < after_context)
1145 {
1146 int ellength;
1147 char *pp = lastmatchrestart;
1148 if (printname != NULL) fprintf(stdout, "%s-", printname);
1149 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1150 pp = end_of_line(pp, endptr, &ellength);
1151 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1152 lastmatchrestart = pp;
1153 }
1154 hyphenpending = TRUE;
1155 }
1156 }
1157
1158
1159
1160 /*************************************************
1161 * Apply patterns to subject till one matches *
1162 *************************************************/
1163
1164 /* This function is called to run through all patterns, looking for a match. It
1165 is used multiple times for the same subject when colouring is enabled, in order
1166 to find all possible matches.
1167
1168 Arguments:
1169 matchptr the start of the subject
1170 length the length of the subject to match
1171 startoffset where to start matching
1172 offsets the offets vector to fill in
1173 mrc address of where to put the result of pcre_exec()
1174
1175 Returns: TRUE if there was a match
1176 FALSE if there was no match
1177 invert if there was a non-fatal error
1178 */
1179
1180 static BOOL
1181 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
1182 int *mrc)
1183 {
1184 int i;
1185 size_t slen = length;
1186 patstr *p = patterns;
1187 const char *msg = "this text:\n\n";
1188
1189 if (slen > 200)
1190 {
1191 slen = 200;
1192 msg = "text that starts:\n\n";
1193 }
1194 for (i = 1; p != NULL; p = p->next, i++)
1195 {
1196 *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1197 startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
1198 if (*mrc >= 0) return TRUE;
1199 if (*mrc == PCRE_ERROR_NOMATCH) continue;
1200 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1201 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1202 fprintf(stderr, "%s", msg);
1203 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1204 fprintf(stderr, "\n\n");
1205 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1206 *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1207 resource_error = TRUE;
1208 if (error_count++ > 20)
1209 {
1210 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1211 pcregrep_exit(2);
1212 }
1213 return invert; /* No more matching; don't show the line again */
1214 }
1215
1216 return FALSE; /* No match, no errors */
1217 }
1218
1219
1220
1221 /*************************************************
1222 * Grep an individual file *
1223 *************************************************/
1224
1225 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1226 times the value of bufthird. The matching point is never allowed to stray into
1227 the top third of the buffer, thus keeping more of the file available for
1228 context printing or for multiline scanning. For large files, the pointer will
1229 be in the middle third most of the time, so the bottom third is available for
1230 "before" context printing.
1231
1232 Arguments:
1233 handle the fopened FILE stream for a normal file
1234 the gzFile pointer when reading is via libz
1235 the BZFILE pointer when reading is via libbz2
1236 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1237 filename the file name or NULL (for errors)
1238 printname the file name if it is to be printed for each match
1239 or NULL if the file name is not to be printed
1240 it cannot be NULL if filenames[_nomatch]_only is set
1241
1242 Returns: 0 if there was at least one match
1243 1 otherwise (no matches)
1244 2 if an overlong line is encountered
1245 3 if there is a read error on a .bz2 file
1246 */
1247
1248 static int
1249 pcregrep(void *handle, int frtype, char *filename, char *printname)
1250 {
1251 int rc = 1;
1252 int linenumber = 1;
1253 int lastmatchnumber = 0;
1254 int count = 0;
1255 int filepos = 0;
1256 int offsets[OFFSET_SIZE];
1257 char *lastmatchrestart = NULL;
1258 char *ptr = main_buffer;
1259 char *endptr;
1260 size_t bufflength;
1261 BOOL binary = FALSE;
1262 BOOL endhyphenpending = FALSE;
1263 BOOL input_line_buffered = line_buffered;
1264 FILE *in = NULL; /* Ensure initialized */
1265
1266 #ifdef SUPPORT_LIBZ
1267 gzFile ingz = NULL;
1268 #endif
1269
1270 #ifdef SUPPORT_LIBBZ2
1271 BZFILE *inbz2 = NULL;
1272 #endif
1273
1274
1275 /* Do the first read into the start of the buffer and set up the pointer to end
1276 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1277 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1278 fail. */
1279
1280 #ifdef SUPPORT_LIBZ
1281 if (frtype == FR_LIBZ)
1282 {
1283 ingz = (gzFile)handle;
1284 bufflength = gzread (ingz, main_buffer, bufsize);
1285 }
1286 else
1287 #endif
1288
1289 #ifdef SUPPORT_LIBBZ2
1290 if (frtype == FR_LIBBZ2)
1291 {
1292 inbz2 = (BZFILE *)handle;
1293 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1294 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1295 } /* without the cast it is unsigned. */
1296 else
1297 #endif
1298
1299 {
1300 in = (FILE *)handle;
1301 if (is_file_tty(in)) input_line_buffered = TRUE;
1302 bufflength = input_line_buffered?
1303 read_one_line(main_buffer, bufsize, in) :
1304 fread(main_buffer, 1, bufsize, in);
1305 }
1306
1307 endptr = main_buffer + bufflength;
1308
1309 /* Unless binary-files=text, see if we have a binary file. This uses the same
1310 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1311 file. */
1312
1313 if (binary_files != BIN_TEXT)
1314 {
1315 binary =
1316 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1317 if (binary && binary_files == BIN_NOMATCH) return 1;
1318 }
1319
1320 /* Loop while the current pointer is not at the end of the file. For large
1321 files, endptr will be at the end of the buffer when we are in the middle of the
1322 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1323 way, the buffer is shifted left and re-filled. */
1324
1325 while (ptr < endptr)
1326 {
1327 int endlinelength;
1328 int mrc = 0;
1329 int startoffset = 0;
1330 BOOL match;
1331 char *matchptr = ptr;
1332 char *t = ptr;
1333 size_t length, linelength;
1334
1335 /* At this point, ptr is at the start of a line. We need to find the length
1336 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1337 length remainder of the data in the buffer. Otherwise, it is the length of
1338 the next line, excluding the terminating newline. After matching, we always
1339 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1340 option is used for compiling, so that any match is constrained to be in the
1341 first line. */
1342
1343 t = end_of_line(t, endptr, &endlinelength);
1344 linelength = t - ptr - endlinelength;
1345 length = multiline? (size_t)(endptr - ptr) : linelength;
1346
1347 /* Check to see if the line we are looking at extends right to the very end
1348 of the buffer without a line terminator. This means the line is too long to
1349 handle. */
1350
1351 if (endlinelength == 0 && t == main_buffer + bufsize)
1352 {
1353 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1354 "pcregrep: check the --buffer-size option\n",
1355 linenumber,
1356 (filename == NULL)? "" : " of file ",
1357 (filename == NULL)? "" : filename);
1358 return 2;
1359 }
1360
1361 /* Extra processing for Jeffrey Friedl's debugging. */
1362
1363 #ifdef JFRIEDL_DEBUG
1364 if (jfriedl_XT || jfriedl_XR)
1365 {
1366 #include <sys/time.h>
1367 #include <time.h>
1368 struct timeval start_time, end_time;
1369 struct timezone dummy;
1370 int i;
1371
1372 if (jfriedl_XT)
1373 {
1374 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1375 const char *orig = ptr;
1376 ptr = malloc(newlen + 1);
1377 if (!ptr) {
1378 printf("out of memory");
1379 pcregrep_exit(2);
1380 }
1381 endptr = ptr;
1382 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1383 for (i = 0; i < jfriedl_XT; i++) {
1384 strncpy(endptr, orig, length);
1385 endptr += length;
1386 }
1387 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1388 length = newlen;
1389 }
1390
1391 if (gettimeofday(&start_time, &dummy) != 0)
1392 perror("bad gettimeofday");
1393
1394
1395 for (i = 0; i < jfriedl_XR; i++)
1396 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1397 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1398
1399 if (gettimeofday(&end_time, &dummy) != 0)
1400 perror("bad gettimeofday");
1401
1402 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1403 -
1404 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1405
1406 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1407 return 0;
1408 }
1409 #endif
1410
1411 /* We come back here after a match when the -o option (only_matching) is set,
1412 in order to find any further matches in the same line. */
1413
1414 ONLY_MATCHING_RESTART:
1415
1416 /* Run through all the patterns until one matches or there is an error other
1417 than NOMATCH. This code is in a subroutine so that it can be re-used for
1418 finding subsequent matches when colouring matched lines. */
1419
1420 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1421
1422 /* If it's a match or a not-match (as required), do what's wanted. */
1423
1424 if (match != invert)
1425 {
1426 BOOL hyphenprinted = FALSE;
1427
1428 /* We've failed if we want a file that doesn't have any matches. */
1429
1430 if (filenames == FN_NOMATCH_ONLY) return 1;
1431
1432 /* Just count if just counting is wanted. */
1433
1434 if (count_only) count++;
1435
1436 /* When handling a binary file and binary-files==binary, the "binary"
1437 variable will be set true (it's false in all other cases). In this
1438 situation we just want to output the file name. No need to scan further. */
1439
1440 else if (binary)
1441 {
1442 fprintf(stdout, "Binary file %s matches\n", filename);
1443 return 0;
1444 }
1445
1446 /* If all we want is a file name, there is no need to scan any more lines
1447 in the file. */
1448
1449 else if (filenames == FN_MATCH_ONLY)
1450 {
1451 fprintf(stdout, "%s\n", printname);
1452 return 0;
1453 }
1454
1455 /* Likewise, if all we want is a yes/no answer. */
1456
1457 else if (quiet) return 0;
1458
1459 /* The --only-matching option prints just the substring that matched, or a
1460 captured portion of it, as long as this string is not empty, and the
1461 --file-offsets and --line-offsets options output offsets for the matching
1462 substring (they both force --only-matching = 0). None of these options
1463 prints any context. Afterwards, adjust the start and then jump back to look
1464 for further matches in the same line. If we are in invert mode, however,
1465 nothing is printed and we do not restart - this could still be useful
1466 because the return code is set. */
1467
1468 else if (only_matching >= 0)
1469 {
1470 if (!invert)
1471 {
1472 if (printname != NULL) fprintf(stdout, "%s:", printname);
1473 if (number) fprintf(stdout, "%d:", linenumber);
1474 if (line_offsets)
1475 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1476 offsets[1] - offsets[0]);
1477 else if (file_offsets)
1478 fprintf(stdout, "%d,%d\n",
1479 (int)(filepos + matchptr + offsets[0] - ptr),
1480 offsets[1] - offsets[0]);
1481 else if (only_matching < mrc)
1482 {
1483 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1484 if (plen > 0)
1485 {
1486 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1487 FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1488 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1489 fprintf(stdout, "\n");
1490 }
1491 }
1492 else if (printname != NULL || number) fprintf(stdout, "\n");
1493 match = FALSE;
1494 if (line_buffered) fflush(stdout);
1495 rc = 0; /* Had some success */
1496 startoffset = offsets[1]; /* Restart after the match */
1497 goto ONLY_MATCHING_RESTART;
1498 }
1499 }
1500
1501 /* This is the default case when none of the above options is set. We print
1502 the matching lines(s), possibly preceded and/or followed by other lines of
1503 context. */
1504
1505 else
1506 {
1507 /* See if there is a requirement to print some "after" lines from a
1508 previous match. We never print any overlaps. */
1509
1510 if (after_context > 0 && lastmatchnumber > 0)
1511 {
1512 int ellength;
1513 int linecount = 0;
1514 char *p = lastmatchrestart;
1515
1516 while (p < ptr && linecount < after_context)
1517 {
1518 p = end_of_line(p, ptr, &ellength);
1519 linecount++;
1520 }
1521
1522 /* It is important to advance lastmatchrestart during this printing so
1523 that it interacts correctly with any "before" printing below. Print
1524 each line's data using fwrite() in case there are binary zeroes. */
1525
1526 while (lastmatchrestart < p)
1527 {
1528 char *pp = lastmatchrestart;
1529 if (printname != NULL) fprintf(stdout, "%s-", printname);
1530 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1531 pp = end_of_line(pp, endptr, &ellength);
1532 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1533 lastmatchrestart = pp;
1534 }
1535 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1536 }
1537
1538 /* If there were non-contiguous lines printed above, insert hyphens. */
1539
1540 if (hyphenpending)
1541 {
1542 fprintf(stdout, "--\n");
1543 hyphenpending = FALSE;
1544 hyphenprinted = TRUE;
1545 }
1546
1547 /* See if there is a requirement to print some "before" lines for this
1548 match. Again, don't print overlaps. */
1549
1550 if (before_context > 0)
1551 {
1552 int linecount = 0;
1553 char *p = ptr;
1554
1555 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1556 linecount < before_context)
1557 {
1558 linecount++;
1559 p = previous_line(p, main_buffer);
1560 }
1561
1562 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1563 fprintf(stdout, "--\n");
1564
1565 while (p < ptr)
1566 {
1567 int ellength;
1568 char *pp = p;
1569 if (printname != NULL) fprintf(stdout, "%s-", printname);
1570 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1571 pp = end_of_line(pp, endptr, &ellength);
1572 FWRITE(p, 1, pp - p, stdout);
1573 p = pp;
1574 }
1575 }
1576
1577 /* Now print the matching line(s); ensure we set hyphenpending at the end
1578 of the file if any context lines are being output. */
1579
1580 if (after_context > 0 || before_context > 0)
1581 endhyphenpending = TRUE;
1582
1583 if (printname != NULL) fprintf(stdout, "%s:", printname);
1584 if (number) fprintf(stdout, "%d:", linenumber);
1585
1586 /* In multiline mode, we want to print to the end of the line in which
1587 the end of the matched string is found, so we adjust linelength and the
1588 line number appropriately, but only when there actually was a match
1589 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1590 the match will always be before the first newline sequence. */
1591
1592 if (multiline & !invert)
1593 {
1594 char *endmatch = ptr + offsets[1];
1595 t = ptr;
1596 while (t < endmatch)
1597 {
1598 t = end_of_line(t, endptr, &endlinelength);
1599 if (t < endmatch) linenumber++; else break;
1600 }
1601 linelength = t - ptr - endlinelength;
1602 }
1603
1604 /*** NOTE: Use only fwrite() to output the data line, so that binary
1605 zeroes are treated as just another data character. */
1606
1607 /* This extra option, for Jeffrey Friedl's debugging requirements,
1608 replaces the matched string, or a specific captured string if it exists,
1609 with X. When this happens, colouring is ignored. */
1610
1611 #ifdef JFRIEDL_DEBUG
1612 if (S_arg >= 0 && S_arg < mrc)
1613 {
1614 int first = S_arg * 2;
1615 int last = first + 1;
1616 FWRITE(ptr, 1, offsets[first], stdout);
1617 fprintf(stdout, "X");
1618 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1619 }
1620 else
1621 #endif
1622
1623 /* We have to split the line(s) up if colouring, and search for further
1624 matches, but not of course if the line is a non-match. */
1625
1626 if (do_colour && !invert)
1627 {
1628 int plength;
1629 FWRITE(ptr, 1, offsets[0], stdout);
1630 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1631 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1632 fprintf(stdout, "%c[00m", 0x1b);
1633 for (;;)
1634 {
1635 startoffset = offsets[1];
1636 if (startoffset >= (int)linelength + endlinelength ||
1637 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1638 break;
1639 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1640 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1641 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1642 fprintf(stdout, "%c[00m", 0x1b);
1643 }
1644
1645 /* In multiline mode, we may have already printed the complete line
1646 and its line-ending characters (if they matched the pattern), so there
1647 may be no more to print. */
1648
1649 plength = (int)((linelength + endlinelength) - startoffset);
1650 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1651 }
1652
1653 /* Not colouring; no need to search for further matches */
1654
1655 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1656 }
1657
1658 /* End of doing what has to be done for a match. If --line-buffered was
1659 given, flush the output. */
1660
1661 if (line_buffered) fflush(stdout);
1662 rc = 0; /* Had some success */
1663
1664 /* Remember where the last match happened for after_context. We remember
1665 where we are about to restart, and that line's number. */
1666
1667 lastmatchrestart = ptr + linelength + endlinelength;
1668 lastmatchnumber = linenumber + 1;
1669 }
1670
1671 /* For a match in multiline inverted mode (which of course did not cause
1672 anything to be printed), we have to move on to the end of the match before
1673 proceeding. */
1674
1675 if (multiline && invert && match)
1676 {
1677 int ellength;
1678 char *endmatch = ptr + offsets[1];
1679 t = ptr;
1680 while (t < endmatch)
1681 {
1682 t = end_of_line(t, endptr, &ellength);
1683 if (t <= endmatch) linenumber++; else break;
1684 }
1685 endmatch = end_of_line(endmatch, endptr, &ellength);
1686 linelength = endmatch - ptr - ellength;
1687 }
1688
1689 /* Advance to after the newline and increment the line number. The file
1690 offset to the current line is maintained in filepos. */
1691
1692 ptr += linelength + endlinelength;
1693 filepos += (int)(linelength + endlinelength);
1694 linenumber++;
1695
1696 /* If input is line buffered, and the buffer is not yet full, read another
1697 line and add it into the buffer. */
1698
1699 if (input_line_buffered && bufflength < (size_t)bufsize)
1700 {
1701 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1702 bufflength += add;
1703 endptr += add;
1704 }
1705
1706 /* If we haven't yet reached the end of the file (the buffer is full), and
1707 the current point is in the top 1/3 of the buffer, slide the buffer down by
1708 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1709 about to be lost, print them. */
1710
1711 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1712 {
1713 if (after_context > 0 &&
1714 lastmatchnumber > 0 &&
1715 lastmatchrestart < main_buffer + bufthird)
1716 {
1717 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1718 lastmatchnumber = 0;
1719 }
1720
1721 /* Now do the shuffle */
1722
1723 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1724 ptr -= bufthird;
1725
1726 #ifdef SUPPORT_LIBZ
1727 if (frtype == FR_LIBZ)
1728 bufflength = 2*bufthird +
1729 gzread (ingz, main_buffer + 2*bufthird, bufthird);
1730 else
1731 #endif
1732
1733 #ifdef SUPPORT_LIBBZ2
1734 if (frtype == FR_LIBBZ2)
1735 bufflength = 2*bufthird +
1736 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1737 else
1738 #endif
1739
1740 bufflength = 2*bufthird +
1741 (input_line_buffered?
1742 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1743 fread(main_buffer + 2*bufthird, 1, bufthird, in));
1744 endptr = main_buffer + bufflength;
1745
1746 /* Adjust any last match point */
1747
1748 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1749 }
1750 } /* Loop through the whole file */
1751
1752 /* End of file; print final "after" lines if wanted; do_after_lines sets
1753 hyphenpending if it prints something. */
1754
1755 if (only_matching < 0 && !count_only)
1756 {
1757 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1758 hyphenpending |= endhyphenpending;
1759 }
1760
1761 /* Print the file name if we are looking for those without matches and there
1762 were none. If we found a match, we won't have got this far. */
1763
1764 if (filenames == FN_NOMATCH_ONLY)
1765 {
1766 fprintf(stdout, "%s\n", printname);
1767 return 0;
1768 }
1769
1770 /* Print the match count if wanted */
1771
1772 if (count_only)
1773 {
1774 if (count > 0 || !omit_zero_count)
1775 {
1776 if (printname != NULL && filenames != FN_NONE)
1777 fprintf(stdout, "%s:", printname);
1778 fprintf(stdout, "%d\n", count);
1779 }
1780 }
1781
1782 return rc;
1783 }
1784
1785
1786
1787 /*************************************************
1788 * Grep a file or recurse into a directory *
1789 *************************************************/
1790
1791 /* Given a path name, if it's a directory, scan all the files if we are
1792 recursing; if it's a file, grep it.
1793
1794 Arguments:
1795 pathname the path to investigate
1796 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1797 only_one_at_top TRUE if the path is the only one at toplevel
1798
1799 Returns: -1 the file/directory was skipped
1800 0 if there was at least one match
1801 1 if there were no matches
1802 2 there was some kind of error
1803
1804 However, file opening failures are suppressed if "silent" is set.
1805 */
1806
1807 static int
1808 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1809 {
1810 int rc = 1;
1811 int frtype;
1812 void *handle;
1813 char *lastcomp;
1814 FILE *in = NULL; /* Ensure initialized */
1815
1816 #ifdef SUPPORT_LIBZ
1817 gzFile ingz = NULL;
1818 #endif
1819
1820 #ifdef SUPPORT_LIBBZ2
1821 BZFILE *inbz2 = NULL;
1822 #endif
1823
1824 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1825 int pathlen;
1826 #endif
1827
1828 /* If the file name is "-" we scan stdin */
1829
1830 if (strcmp(pathname, "-") == 0)
1831 {
1832 return pcregrep(stdin, FR_PLAIN, stdin_name,
1833 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1834 stdin_name : NULL);
1835 }
1836
1837 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
1838 directories, whereas --include and --exclude apply to everything else. The test
1839 is against the final component of the path. */
1840
1841 lastcomp = strrchr(pathname, FILESEP);
1842 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
1843
1844 /* If the file is a directory, skip if not recursing or if explicitly excluded.
1845 Otherwise, scan the directory and recurse for each path within it. The scanning
1846 code is localized so it can be made system-specific. */
1847
1848 if (isdirectory(pathname))
1849 {
1850 if (dee_action == dee_SKIP ||
1851 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
1852 return -1;
1853
1854 if (dee_action == dee_RECURSE)
1855 {
1856 char buffer[1024];
1857 char *nextfile;
1858 directory_type *dir = opendirectory(pathname);
1859
1860 if (dir == NULL)
1861 {
1862 if (!silent)
1863 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1864 strerror(errno));
1865 return 2;
1866 }
1867
1868 while ((nextfile = readdirectory(dir)) != NULL)
1869 {
1870 int frc;
1871 sprintf(buffer, "%.512s%c%.128s", pathname, FILESEP, nextfile);
1872 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1873 if (frc > 1) rc = frc;
1874 else if (frc == 0 && rc == 1) rc = 0;
1875 }
1876
1877 closedirectory(dir);
1878 return rc;
1879 }
1880 }
1881
1882 /* If the file is not a directory and not a regular file, skip it if that's
1883 been requested. Otherwise, check for explicit include/exclude. */
1884
1885 else if ((!isregfile(pathname) && DEE_action == DEE_SKIP) ||
1886 !test_incexc(lastcomp, include_patterns, exclude_patterns))
1887 return -1;
1888
1889 /* Control reaches here if we have a regular file, or if we have a directory
1890 and recursion or skipping was not requested, or if we have anything else and
1891 skipping was not requested. The scan proceeds. If this is the first and only
1892 argument at top level, we don't show the file name, unless we are only showing
1893 the file name, or the filename was forced (-H). */
1894
1895 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1896 pathlen = (int)(strlen(pathname));
1897 #endif
1898
1899 /* Open using zlib if it is supported and the file name ends with .gz. */
1900
1901 #ifdef SUPPORT_LIBZ
1902 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1903 {
1904 ingz = gzopen(pathname, "rb");
1905 if (ingz == NULL)
1906 {
1907 if (!silent)
1908 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1909 strerror(errno));
1910 return 2;
1911 }
1912 handle = (void *)ingz;
1913 frtype = FR_LIBZ;
1914 }
1915 else
1916 #endif
1917
1918 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1919
1920 #ifdef SUPPORT_LIBBZ2
1921 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1922 {
1923 inbz2 = BZ2_bzopen(pathname, "rb");
1924 handle = (void *)inbz2;
1925 frtype = FR_LIBBZ2;
1926 }
1927 else
1928 #endif
1929
1930 /* Otherwise use plain fopen(). The label is so that we can come back here if
1931 an attempt to read a .bz2 file indicates that it really is a plain file. */
1932
1933 #ifdef SUPPORT_LIBBZ2
1934 PLAIN_FILE:
1935 #endif
1936 {
1937 in = fopen(pathname, "rb");
1938 handle = (void *)in;
1939 frtype = FR_PLAIN;
1940 }
1941
1942 /* All the opening methods return errno when they fail. */
1943
1944 if (handle == NULL)
1945 {
1946 if (!silent)
1947 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1948 strerror(errno));
1949 return 2;
1950 }
1951
1952 /* Now grep the file */
1953
1954 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1955 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1956
1957 /* Close in an appropriate manner. */
1958
1959 #ifdef SUPPORT_LIBZ
1960 if (frtype == FR_LIBZ)
1961 gzclose(ingz);
1962 else
1963 #endif
1964
1965 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1966 read failed. If the error indicates that the file isn't in fact bzipped, try
1967 again as a normal file. */
1968
1969 #ifdef SUPPORT_LIBBZ2
1970 if (frtype == FR_LIBBZ2)
1971 {
1972 if (rc == 3)
1973 {
1974 int errnum;
1975 const char *err = BZ2_bzerror(inbz2, &errnum);
1976 if (errnum == BZ_DATA_ERROR_MAGIC)
1977 {
1978 BZ2_bzclose(inbz2);
1979 goto PLAIN_FILE;
1980 }
1981 else if (!silent)
1982 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1983 pathname, err);
1984 rc = 2; /* The normal "something went wrong" code */
1985 }
1986 BZ2_bzclose(inbz2);
1987 }
1988 else
1989 #endif
1990
1991 /* Normal file close */
1992
1993 fclose(in);
1994
1995 /* Pass back the yield from pcregrep(). */
1996
1997 return rc;
1998 }
1999
2000
2001
2002
2003 /*************************************************
2004 * Usage function *
2005 *************************************************/
2006
2007 static int
2008 usage(int rc)
2009 {
2010 option_item *op;
2011 fprintf(stderr, "Usage: pcregrep [-");
2012 for (op = optionlist; op->one_char != 0; op++)
2013 {
2014 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
2015 }
2016 fprintf(stderr, "] [long options] [pattern] [files]\n");
2017 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
2018 "options.\n");
2019 return rc;
2020 }
2021
2022
2023
2024
2025 /*************************************************
2026 * Help function *
2027 *************************************************/
2028
2029 static void
2030 help(void)
2031 {
2032 option_item *op;
2033
2034 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
2035 printf("Search for PATTERN in each FILE or standard input.\n");
2036 printf("PATTERN must be present if neither -e nor -f is used.\n");
2037 printf("\"-\" can be used as a file name to mean STDIN.\n");
2038
2039 #ifdef SUPPORT_LIBZ
2040 printf("Files whose names end in .gz are read using zlib.\n");
2041 #endif
2042
2043 #ifdef SUPPORT_LIBBZ2
2044 printf("Files whose names end in .bz2 are read using bzlib2.\n");
2045 #endif
2046
2047 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2048 printf("Other files and the standard input are read as plain files.\n\n");
2049 #else
2050 printf("All files are read as plain files, without any interpretation.\n\n");
2051 #endif
2052
2053 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
2054 printf("Options:\n");
2055
2056 for (op = optionlist; op->one_char != 0; op++)
2057 {
2058 int n;
2059 char s[4];
2060
2061 /* Two options were accidentally implemented and documented with underscores
2062 instead of hyphens in their names, something that was not noticed for quite a
2063 few releases. When fixing this, I left the underscored versions in the list
2064 in case people were using them. However, we don't want to display them in the
2065 help data. There are no other options that contain underscores, and we do not
2066 expect ever to implement such options. Therefore, just omit any option that
2067 contains an underscore. */
2068
2069 if (strchr(op->long_name, '_') != NULL) continue;
2070
2071 if (op->one_char > 0 && (op->long_name)[0] == 0)
2072 n = 31 - printf(" -%c", op->one_char);
2073 else
2074 {
2075 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
2076 else strcpy(s, " ");
2077 n = 31 - printf(" %s --%s", s, op->long_name);
2078 }
2079
2080 if (n < 1) n = 1;
2081 printf("%.*s%s\n", n, " ", op->help_text);
2082 }
2083
2084 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
2085 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
2086 printf("When reading patterns or file names from a file, trailing white\n");
2087 printf("space is removed and blank lines are ignored.\n");
2088 printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
2089
2090 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
2091 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
2092 }
2093
2094
2095
2096
2097 /*************************************************
2098 * Handle a single-letter, no data option *
2099 *************************************************/
2100
2101 static int
2102 handle_option(int letter, int options)
2103 {
2104 switch(letter)
2105 {
2106 case N_FOFFSETS: file_offsets = TRUE; break;
2107 case N_HELP: help(); pcregrep_exit(0);
2108 case N_LBUFFER: line_buffered = TRUE; break;
2109 case N_LOFFSETS: line_offsets = number = TRUE; break;
2110 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2111 case 'a': binary_files = BIN_TEXT; break;
2112 case 'c': count_only = TRUE; break;
2113 case 'F': process_options |= PO_FIXED_STRINGS; break;
2114 case 'H': filenames = FN_FORCE; break;
2115 case 'I': binary_files = BIN_NOMATCH; break;
2116 case 'h': filenames = FN_NONE; break;
2117 case 'i': options |= PCRE_CASELESS; break;
2118 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2119 case 'L': filenames = FN_NOMATCH_ONLY; break;
2120 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2121 case 'n': number = TRUE; break;
2122 case 'o': only_matching = 0; break;
2123 case 'q': quiet = TRUE; break;
2124 case 'r': dee_action = dee_RECURSE; break;
2125 case 's': silent = TRUE; break;
2126 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2127 case 'v': invert = TRUE; break;
2128 case 'w': process_options |= PO_WORD_MATCH; break;
2129 case 'x': process_options |= PO_LINE_MATCH; break;
2130
2131 case 'V':
2132 fprintf(stdout, "pcregrep version %s\n", pcre_version());
2133 pcregrep_exit(0);
2134 break;
2135
2136 default:
2137 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2138 pcregrep_exit(usage(2));
2139 }
2140
2141 return options;
2142 }
2143
2144
2145
2146
2147 /*************************************************
2148 * Construct printed ordinal *
2149 *************************************************/
2150
2151 /* This turns a number into "1st", "3rd", etc. */
2152
2153 static char *
2154 ordin(int n)
2155 {
2156 static char buffer[8];
2157 char *p = buffer;
2158 sprintf(p, "%d", n);
2159 while (*p != 0) p++;
2160 switch (n%10)
2161 {
2162 case 1: strcpy(p, "st"); break;
2163 case 2: strcpy(p, "nd"); break;
2164 case 3: strcpy(p, "rd"); break;
2165 default: strcpy(p, "th"); break;
2166 }
2167 return buffer;
2168 }
2169
2170
2171
2172 /*************************************************
2173 * Compile a single pattern *
2174 *************************************************/
2175
2176 /* Do nothing if the pattern has already been compiled. This is the case for
2177 include/exclude patterns read from a file.
2178
2179 When the -F option has been used, each "pattern" may be a list of strings,
2180 separated by line breaks. They will be matched literally. We split such a
2181 string and compile the first substring, inserting an additional block into the
2182 pattern chain.
2183
2184 Arguments:
2185 p points to the pattern block
2186 options the PCRE options
2187 popts the processing options
2188 fromfile TRUE if the pattern was read from a file
2189 fromtext file name or identifying text (e.g. "include")
2190 count 0 if this is the only command line pattern, or
2191 number of the command line pattern, or
2192 linenumber for a pattern from a file
2193
2194 Returns: TRUE on success, FALSE after an error
2195 */
2196
2197 static BOOL
2198 compile_pattern(patstr *p, int options, int popts, int fromfile,
2199 const char *fromtext, int count)
2200 {
2201 char buffer[PATBUFSIZE];
2202 const char *error;
2203 char *ps = p->string;
2204 int patlen = strlen(ps);
2205 int errptr;
2206
2207 if (p->compiled != NULL) return TRUE;
2208
2209 if ((popts & PO_FIXED_STRINGS) != 0)
2210 {
2211 int ellength;
2212 char *eop = ps + patlen;
2213 char *pe = end_of_line(ps, eop, &ellength);
2214
2215 if (ellength != 0)
2216 {
2217 if (add_pattern(pe, p) == NULL) return FALSE;
2218 patlen = (int)(pe - ps - ellength);
2219 }
2220 }
2221
2222 sprintf(buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
2223 p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2224 if (p->compiled != NULL) return TRUE;
2225
2226 /* Handle compile errors */
2227
2228 errptr -= (int)strlen(prefix[popts]);
2229 if (errptr > patlen) errptr = patlen;
2230
2231 if (fromfile)
2232 {
2233 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2234 "at offset %d: %s\n", count, fromtext, errptr, error);
2235 }
2236 else
2237 {
2238 if (count == 0)
2239 fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2240 fromtext, errptr, error);
2241 else
2242 fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2243 ordin(count), fromtext, errptr, error);
2244 }
2245
2246 return FALSE;
2247 }
2248
2249
2250
2251 /*************************************************
2252 * Read and compile a file of patterns *
2253 *************************************************/
2254
2255 /* This is used for --filelist, --include-from, and --exclude-from.
2256
2257 Arguments:
2258 name the name of the file; "-" is stdin
2259 patptr pointer to the pattern chain anchor
2260 patlastptr pointer to the last pattern pointer
2261 popts the process options to pass to pattern_compile()
2262
2263 Returns: TRUE if all went well
2264 */
2265
2266 static BOOL
2267 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2268 {
2269 int linenumber = 0;
2270 FILE *f;
2271 char *filename;
2272 char buffer[PATBUFSIZE];
2273
2274 if (strcmp(name, "-") == 0)
2275 {
2276 f = stdin;
2277 filename = stdin_name;
2278 }
2279 else
2280 {
2281 f = fopen(name, "r");
2282 if (f == NULL)
2283 {
2284 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2285 return FALSE;
2286 }
2287 filename = name;
2288 }
2289
2290 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2291 {
2292 char *s = buffer + (int)strlen(buffer);
2293 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2294 *s = 0;
2295 linenumber++;
2296 if (buffer[0] == 0) continue; /* Skip blank lines */
2297
2298 /* Note: this call to add_pattern() puts a pointer to the local variable
2299 "buffer" into the pattern chain. However, that pointer is used only when
2300 compiling the pattern, which happens immediately below, so we flatten it
2301 afterwards, as a precaution against any later code trying to use it. */
2302
2303 *patlastptr = add_pattern(buffer, *patlastptr);
2304 if (*patlastptr == NULL) return FALSE;
2305 if (*patptr == NULL) *patptr = *patlastptr;
2306
2307 /* This loop is needed because compiling a "pattern" when -F is set may add
2308 on additional literal patterns if the original contains a newline. In the
2309 common case, it never will, because fgets() stops at a newline. However,
2310 the -N option can be used to give pcregrep a different newline setting. */
2311
2312 for(;;)
2313 {
2314 if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2315 linenumber))
2316 return FALSE;
2317 (*patlastptr)->string = NULL; /* Insurance */
2318 if ((*patlastptr)->next == NULL) break;
2319 *patlastptr = (*patlastptr)->next;
2320 }
2321 }
2322
2323 if (f != stdin) fclose(f);
2324 return TRUE;
2325 }
2326
2327
2328
2329 /*************************************************
2330 * Main program *
2331 *************************************************/
2332
2333 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2334
2335 int
2336 main(int argc, char **argv)
2337 {
2338 int i, j;
2339 int rc = 1;
2340 BOOL only_one_at_top;
2341 patstr *cp;
2342 fnstr *fn;
2343 const char *locale_from = "--locale";
2344 const char *error;
2345
2346 #ifdef SUPPORT_PCREGREP_JIT
2347 pcre_jit_stack *jit_stack = NULL;
2348 #endif
2349
2350 /* Set the default line ending value from the default in the PCRE library;
2351 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2352 Note that the return values from pcre_config(), though derived from the ASCII
2353 codes, are the same in EBCDIC environments, so we must use the actual values
2354 rather than escapes such as as '\r'. */
2355
2356 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2357 switch(i)
2358 {
2359 default: newline = (char *)"lf"; break;
2360 case 13: newline = (char *)"cr"; break;
2361 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2362 case -1: newline = (char *)"any"; break;
2363 case -2: newline = (char *)"anycrlf"; break;
2364 }
2365
2366 /* Process the options */
2367
2368 for (i = 1; i < argc; i++)
2369 {
2370 option_item *op = NULL;
2371 char *option_data = (char *)""; /* default to keep compiler happy */
2372 BOOL longop;
2373 BOOL longopwasequals = FALSE;
2374
2375 if (argv[i][0] != '-') break;
2376
2377 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2378 but only if we have previously had -e or -f to define the patterns. */
2379
2380 if (argv[i][1] == 0)
2381 {
2382 if (pattern_files != NULL || patterns != NULL) break;
2383 else pcregrep_exit(usage(2));
2384 }
2385
2386 /* Handle a long name option, or -- to terminate the options */
2387
2388 if (argv[i][1] == '-')
2389 {
2390 char *arg = argv[i] + 2;
2391 char *argequals = strchr(arg, '=');
2392
2393 if (*arg == 0) /* -- terminates options */
2394 {
2395 i++;
2396 break; /* out of the options-handling loop */
2397 }
2398
2399 longop = TRUE;
2400
2401 /* Some long options have data that follows after =, for example file=name.
2402 Some options have variations in the long name spelling: specifically, we
2403 allow "regexp" because GNU grep allows it, though I personally go along
2404 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2405 These options are entered in the table as "regex(p)". Options can be in
2406 both these categories. */
2407
2408 for (op = optionlist; op->one_char != 0; op++)
2409 {
2410 char *opbra = strchr(op->long_name, '(');
2411 char *equals = strchr(op->long_name, '=');
2412
2413 /* Handle options with only one spelling of the name */
2414
2415 if (opbra == NULL) /* Does not contain '(' */
2416 {
2417 if (equals == NULL) /* Not thing=data case */
2418 {
2419 if (strcmp(arg, op->long_name) == 0) break;
2420 }
2421 else /* Special case xxx=data */
2422 {
2423 int oplen = (int)(equals - op->long_name);
2424 int arglen = (argequals == NULL)?
2425 (int)strlen(arg) : (int)(argequals - arg);
2426 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2427 {
2428 option_data = arg + arglen;
2429 if (*option_data == '=')
2430 {
2431 option_data++;
2432 longopwasequals = TRUE;
2433 }
2434 break;
2435 }
2436 }
2437 }
2438
2439 /* Handle options with an alternate spelling of the name */
2440
2441 else
2442 {
2443 char buff1[24];
2444 char buff2[24];
2445
2446 int baselen = (int)(opbra - op->long_name);
2447 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2448 int arglen = (argequals == NULL || equals == NULL)?
2449 (int)strlen(arg) : (int)(argequals - arg);
2450
2451 sprintf(buff1, "%.*s", baselen, op->long_name);
2452 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2453
2454 if (strncmp(arg, buff1, arglen) == 0 ||
2455 strncmp(arg, buff2, arglen) == 0)
2456 {
2457 if (equals != NULL && argequals != NULL)
2458 {
2459 option_data = argequals;
2460 if (*option_data == '=')
2461 {
2462 option_data++;
2463 longopwasequals = TRUE;
2464 }
2465 }
2466 break;
2467 }
2468 }
2469 }
2470
2471 if (op->one_char == 0)
2472 {
2473 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2474 pcregrep_exit(usage(2));
2475 }
2476 }
2477
2478 /* Jeffrey Friedl's debugging harness uses these additional options which
2479 are not in the right form for putting in the option table because they use
2480 only one hyphen, yet are more than one character long. By putting them
2481 separately here, they will not get displayed as part of the help() output,
2482 but I don't think Jeffrey will care about that. */
2483
2484 #ifdef JFRIEDL_DEBUG
2485 else if (strcmp(argv[i], "-pre") == 0) {
2486 jfriedl_prefix = argv[++i];
2487 continue;
2488 } else if (strcmp(argv[i], "-post") == 0) {
2489 jfriedl_postfix = argv[++i];
2490 continue;
2491 } else if (strcmp(argv[i], "-XT") == 0) {
2492 sscanf(argv[++i], "%d", &jfriedl_XT);
2493 continue;
2494 } else if (strcmp(argv[i], "-XR") == 0) {
2495 sscanf(argv[++i], "%d", &jfriedl_XR);
2496 continue;
2497 }
2498 #endif
2499
2500
2501 /* One-char options; many that have no data may be in a single argument; we
2502 continue till we hit the last one or one that needs data. */
2503
2504 else
2505 {
2506 char *s = argv[i] + 1;
2507 longop = FALSE;
2508 while (*s != 0)
2509 {
2510 for (op = optionlist; op->one_char != 0; op++)
2511 {
2512 if (*s == op->one_char) break;
2513 }
2514 if (op->one_char == 0)
2515 {
2516 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2517 *s, argv[i]);
2518 pcregrep_exit(usage(2));
2519 }
2520
2521 /* Check for a single-character option that has data: OP_OP_NUMBER
2522 is used for one that either has a numerical number or defaults, i.e. the
2523 data is optional. If a digit follows, there is data; if not, carry on
2524 with other single-character options in the same string. */
2525
2526 option_data = s+1;
2527 if (op->type == OP_OP_NUMBER)
2528 {
2529 if (isdigit((unsigned char)s[1])) break;
2530 }
2531 else /* Check for end or a dataless option */
2532 {
2533 if (op->type != OP_NODATA || s[1] == 0) break;
2534 }
2535
2536 /* Handle a single-character option with no data, then loop for the
2537 next character in the string. */
2538
2539 pcre_options = handle_option(*s++, pcre_options);
2540 }
2541 }
2542
2543 /* At this point we should have op pointing to a matched option. If the type
2544 is NO_DATA, it means that there is no data, and the option might set
2545 something in the PCRE options. */
2546
2547 if (op->type == OP_NODATA)
2548 {
2549 pcre_options = handle_option(op->one_char, pcre_options);
2550 continue;
2551 }
2552
2553 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2554 either has a value or defaults to something. It cannot have data in a
2555 separate item. At the moment, the only such options are "colo(u)r",
2556 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2557
2558 if (*option_data == 0 &&
2559 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2560 {
2561 switch (op->one_char)
2562 {
2563 case N_COLOUR:
2564 colour_option = (char *)"auto";
2565 break;
2566
2567 case 'o':
2568 only_matching = 0;
2569 break;
2570
2571 #ifdef JFRIEDL_DEBUG
2572 case 'S':
2573 S_arg = 0;
2574 break;
2575 #endif
2576 }
2577 continue;
2578 }
2579
2580 /* Otherwise, find the data string for the option. */
2581
2582 if (*option_data == 0)
2583 {
2584 if (i >= argc - 1 || longopwasequals)
2585 {
2586 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2587 pcregrep_exit(usage(2));
2588 }
2589 option_data = argv[++i];
2590 }
2591
2592 /* If the option type is OP_PATLIST, it's the -e option, or one of the
2593 include/exclude options, which can be called multiple times to create lists
2594 of patterns. */
2595
2596 if (op->type == OP_PATLIST)
2597 {
2598 patdatastr *pd = (patdatastr *)op->dataptr;
2599 *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2600 if (*(pd->lastptr) == NULL) goto EXIT2;
2601 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2602 }
2603
2604 /* If the option type is OP_FILELIST, it's one of the options that names a
2605 file. */
2606
2607 else if (op->type == OP_FILELIST)
2608 {
2609 fndatastr *fd = (fndatastr *)op->dataptr;
2610 fn = (fnstr *)malloc(sizeof(fnstr));
2611 if (fn == NULL)
2612 {
2613 fprintf(stderr, "pcregrep: malloc failed\n");
2614 goto EXIT2;
2615 }
2616 fn->next = NULL;
2617 fn->name = option_data;
2618 if (*(fd->anchor) == NULL)
2619 *(fd->anchor) = fn;
2620 else
2621 (*(fd->lastptr))->next = fn;
2622 *(fd->lastptr) = fn;
2623 }
2624
2625 /* Handle OP_BINARY_FILES */
2626
2627 else if (op->type == OP_BINFILES)
2628 {
2629 if (strcmp(option_data, "binary") == 0)
2630 binary_files = BIN_BINARY;
2631 else if (strcmp(option_data, "without-match") == 0)
2632 binary_files = BIN_NOMATCH;
2633 else if (strcmp(option_data, "text") == 0)
2634 binary_files = BIN_TEXT;
2635 else
2636 {
2637 fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2638 option_data);
2639 pcregrep_exit(usage(2));
2640 }
2641 }
2642
2643 /* Otherwise, deal with single string or numeric data values. */
2644
2645 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2646 op->type != OP_OP_NUMBER)
2647 {
2648 *((char **)op->dataptr) = option_data;
2649 }
2650
2651 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2652 only for unpicking arguments, so just keep it simple. */
2653
2654 else
2655 {
2656 unsigned long int n = 0;
2657 char *endptr = option_data;
2658 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2659 while (isdigit((unsigned char)(*endptr)))
2660 n = n * 10 + (int)(*endptr++ - '0');
2661 if (toupper(*endptr) == 'K')
2662 {
2663 n *= 1024;
2664 endptr++;
2665 }
2666 else if (toupper(*endptr) == 'M')
2667 {
2668 n *= 1024*1024;
2669 endptr++;
2670 }
2671 if (*endptr != 0)
2672 {
2673 if (longop)
2674 {
2675 char *equals = strchr(op->long_name, '=');
2676 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2677 (int)(equals - op->long_name);
2678 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2679 option_data, nlen, op->long_name);
2680 }
2681 else
2682 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2683 option_data, op->one_char);
2684 pcregrep_exit(usage(2));
2685 }
2686 if (op->type == OP_LONGNUMBER)
2687 *((unsigned long int *)op->dataptr) = n;
2688 else
2689 *((int *)op->dataptr) = n;
2690 }
2691 }
2692
2693 /* Options have been decoded. If -C was used, its value is used as a default
2694 for -A and -B. */
2695
2696 if (both_context > 0)
2697 {
2698 if (after_context == 0) after_context = both_context;
2699 if (before_context == 0) before_context = both_context;
2700 }
2701
2702 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2703 However, the latter two set only_matching. */
2704
2705 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2706 (file_offsets && line_offsets))
2707 {
2708 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2709 "and/or --line-offsets\n");
2710 pcregrep_exit(usage(2));
2711 }
2712
2713 if (file_offsets || line_offsets) only_matching = 0;
2714
2715 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2716 LC_ALL environment variable is set, and if so, use it. */
2717
2718 if (locale == NULL)
2719 {
2720 locale = getenv("LC_ALL");
2721 locale_from = "LCC_ALL";
2722 }
2723
2724 if (locale == NULL)
2725 {
2726 locale = getenv("LC_CTYPE");
2727 locale_from = "LC_CTYPE";
2728 }
2729
2730 /* If a locale has been provided, set it, and generate the tables the PCRE
2731 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2732
2733 if (locale != NULL)
2734 {
2735 if (setlocale(LC_CTYPE, locale) == NULL)
2736 {
2737 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2738 locale, locale_from);
2739 return 2;
2740 }
2741 pcretables = pcre_maketables();
2742 }
2743
2744 /* Sort out colouring */
2745
2746 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2747 {
2748 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2749 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2750 else
2751 {
2752 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2753 colour_option);
2754 return 2;
2755 }
2756 if (do_colour)
2757 {
2758 char *cs = getenv("PCREGREP_COLOUR");
2759 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2760 if (cs != NULL) colour_string = cs;
2761 }
2762 }
2763
2764 /* Interpret the newline type; the default settings are Unix-like. */
2765
2766 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2767 {
2768 pcre_options |= PCRE_NEWLINE_CR;
2769 endlinetype = EL_CR;
2770 }
2771 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2772 {
2773 pcre_options |= PCRE_NEWLINE_LF;
2774 endlinetype = EL_LF;
2775 }
2776 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2777 {
2778 pcre_options |= PCRE_NEWLINE_CRLF;
2779 endlinetype = EL_CRLF;
2780 }
2781 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2782 {
2783 pcre_options |= PCRE_NEWLINE_ANY;
2784 endlinetype = EL_ANY;
2785 }
2786 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2787 {
2788 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2789 endlinetype = EL_ANYCRLF;
2790 }
2791 else
2792 {
2793 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2794 return 2;
2795 }
2796
2797 /* Interpret the text values for -d and -D */
2798
2799 if (dee_option != NULL)
2800 {
2801 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2802 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2803 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2804 else
2805 {
2806 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2807 return 2;
2808 }
2809 }
2810
2811 if (DEE_option != NULL)
2812 {
2813 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2814 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2815 else
2816 {
2817 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2818 return 2;
2819 }
2820 }
2821
2822 /* Check the values for Jeffrey Friedl's debugging options. */
2823
2824 #ifdef JFRIEDL_DEBUG
2825 if (S_arg > 9)
2826 {
2827 fprintf(stderr, "pcregrep: bad value for -S option\n");
2828 return 2;
2829 }
2830 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2831 {
2832 if (jfriedl_XT == 0) jfriedl_XT = 1;
2833 if (jfriedl_XR == 0) jfriedl_XR = 1;
2834 }
2835 #endif
2836
2837 /* Get memory for the main buffer. */
2838
2839 bufsize = 3*bufthird;
2840 main_buffer = (char *)malloc(bufsize);
2841
2842 if (main_buffer == NULL)
2843 {
2844 fprintf(stderr, "pcregrep: malloc failed\n");
2845 goto EXIT2;
2846 }
2847
2848 /* If no patterns were provided by -e, and there are no files provided by -f,
2849 the first argument is the one and only pattern, and it must exist. */
2850
2851 if (patterns == NULL && pattern_files == NULL)
2852 {
2853 if (i >= argc) return usage(2);
2854 patterns = patterns_last = add_pattern(argv[i++], NULL);
2855 if (patterns == NULL) goto EXIT2;
2856 }
2857
2858 /* Compile the patterns that were provided on the command line, either by
2859 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
2860 after all the command-line options are read so that we know which PCRE options
2861 to use. When -F is used, compile_pattern() may add another block into the
2862 chain, so we must not access the next pointer till after the compile. */
2863
2864 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2865 {
2866 if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
2867 (j == 1 && patterns->next == NULL)? 0 : j))
2868 goto EXIT2;
2869 }
2870
2871 /* Read and compile the regular expressions that are provided in files. */
2872
2873 for (fn = pattern_files; fn != NULL; fn = fn->next)
2874 {
2875 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
2876 goto EXIT2;
2877 }
2878
2879 /* Study the regular expressions, as we will be running them many times. Unless
2880 JIT has been explicitly disabled, arrange a stack for it to use. */
2881
2882 #ifdef SUPPORT_PCREGREP_JIT
2883 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2884 jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2885 #endif
2886
2887 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
2888 {
2889 cp->hint = pcre_study(cp->compiled, study_options, &error);
2890 if (error != NULL)
2891 {
2892 char s[16];
2893 if (patterns->next == NULL) s[0] = 0; else sprintf(s, " number %d", j);
2894 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2895 goto EXIT2;
2896 }
2897 #ifdef SUPPORT_PCREGREP_JIT
2898 if (jit_stack != NULL && cp->hint != NULL)
2899 pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
2900 #endif
2901 }
2902
2903 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2904 pcre_extra block for each pattern. */
2905
2906 if (match_limit > 0 || match_limit_recursion > 0)
2907 {
2908 for (cp = patterns; cp != NULL; cp = cp->next)
2909 {
2910 if (cp->hint == NULL)
2911 {
2912 cp->hint = (pcre_extra *)malloc(sizeof(pcre_extra));
2913 if (cp->hint == NULL)
2914 {
2915 fprintf(stderr, "pcregrep: malloc failed\n");
2916 pcregrep_exit(2);
2917 }
2918 }
2919 if (match_limit > 0)
2920 {
2921 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
2922 cp->hint->match_limit = match_limit;
2923 }
2924 if (match_limit_recursion > 0)
2925 {
2926 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2927 cp->hint->match_limit_recursion = match_limit_recursion;
2928 }
2929 }
2930 }
2931
2932 /* If there are include or exclude patterns read from the command line, compile
2933 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
2934 0. */
2935
2936 for (j = 0; j < 4; j++)
2937 {
2938 int k;
2939 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
2940 {
2941 if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
2942 (k == 1 && cp->next == NULL)? 0 : k))
2943 goto EXIT2;
2944 }
2945 }
2946
2947 /* Read and compile include/exclude patterns from files. */
2948
2949 for (fn = include_from; fn != NULL; fn = fn->next)
2950 {
2951 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
2952 goto EXIT2;
2953 }
2954
2955 for (fn = exclude_from; fn != NULL; fn = fn->next)
2956 {
2957 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
2958 goto EXIT2;
2959 }
2960
2961 /* If there are no files that contain lists of files to search, and there are
2962 no file arguments, search stdin, and then exit. */
2963
2964 if (file_lists == NULL && i >= argc)
2965 {
2966 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2967 (filenames > FN_DEFAULT)? stdin_name : NULL);
2968 goto EXIT;
2969 }
2970
2971 /* If any files that contains a list of files to search have been specified,
2972 read them line by line and search the given files. */
2973
2974 for (fn = file_lists; fn != NULL; fn = fn->next)
2975 {
2976 char buffer[PATBUFSIZE];
2977 FILE *fl;
2978 if (strcmp(fn->name, "-") == 0) fl = stdin; else
2979 {
2980 fl = fopen(fn->name, "rb");
2981 if (fl == NULL)
2982 {
2983 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
2984 strerror(errno));
2985 goto EXIT2;
2986 }
2987 }
2988 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
2989 {
2990 int frc;
2991 char *end = buffer + (int)strlen(buffer);
2992 while (end > buffer && isspace(end[-1])) end--;
2993 *end = 0;
2994 if (*buffer != 0)
2995 {
2996 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
2997 if (frc > 1) rc = frc;
2998 else if (frc == 0 && rc == 1) rc = 0;
2999 }
3000 }
3001 if (fl != stdin) fclose(fl);
3002 }
3003
3004 /* After handling file-list, work through remaining arguments. Pass in the fact
3005 that there is only one argument at top level - this suppresses the file name if
3006 the argument is not a directory and filenames are not otherwise forced. */
3007
3008 only_one_at_top = i == argc - 1 && file_lists == NULL;
3009
3010 for (; i < argc; i++)
3011 {
3012 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3013 only_one_at_top);
3014 if (frc > 1) rc = frc;
3015 else if (frc == 0 && rc == 1) rc = 0;
3016 }
3017
3018 EXIT:
3019 #ifdef SUPPORT_PCREGREP_JIT
3020 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3021 #endif
3022
3023 if (main_buffer != NULL) free(main_buffer);
3024
3025 free_pattern_chain(patterns);
3026 free_pattern_chain(include_patterns);
3027 free_pattern_chain(include_dir_patterns);
3028 free_pattern_chain(exclude_patterns);
3029 free_pattern_chain(exclude_dir_patterns);
3030
3031 free_file_chain(exclude_from);
3032 free_file_chain(include_from);
3033 free_file_chain(pattern_files);
3034 free_file_chain(file_lists);
3035
3036 pcregrep_exit(rc);
3037
3038 EXIT2:
3039 rc = 2;
3040 goto EXIT;
3041 }
3042
3043 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12