/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 324 - (show annotations) (download)
Fri Mar 7 19:48:32 2008 UTC (6 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 65711 byte(s)
Fix bugs with --include and --exclude in pcregrep.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2008 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74
75 #if BUFSIZ > 8192
76 #define MBUFTHIRD BUFSIZ
77 #else
78 #define MBUFTHIRD 8192
79 #endif
80
81 /* Values for the "filenames" variable, which specifies options for file name
82 output. The order is important; it is assumed that a file name is wanted for
83 all values greater than FN_DEFAULT. */
84
85 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
86
87 /* File reading styles */
88
89 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
90
91 /* Actions for the -d and -D options */
92
93 enum { dee_READ, dee_SKIP, dee_RECURSE };
94 enum { DEE_READ, DEE_SKIP };
95
96 /* Actions for special processing options (flag bits) */
97
98 #define PO_WORD_MATCH 0x0001
99 #define PO_LINE_MATCH 0x0002
100 #define PO_FIXED_STRINGS 0x0004
101
102 /* Line ending types */
103
104 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
105
106
107
108 /*************************************************
109 * Global variables *
110 *************************************************/
111
112 /* Jeffrey Friedl has some debugging requirements that are not part of the
113 regular code. */
114
115 #ifdef JFRIEDL_DEBUG
116 static int S_arg = -1;
117 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
118 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
119 static const char *jfriedl_prefix = "";
120 static const char *jfriedl_postfix = "";
121 #endif
122
123 static int endlinetype;
124
125 static char *colour_string = (char *)"1;31";
126 static char *colour_option = NULL;
127 static char *dee_option = NULL;
128 static char *DEE_option = NULL;
129 static char *newline = NULL;
130 static char *pattern_filename = NULL;
131 static char *stdin_name = (char *)"(standard input)";
132 static char *locale = NULL;
133
134 static const unsigned char *pcretables = NULL;
135
136 static int pattern_count = 0;
137 static pcre **pattern_list = NULL;
138 static pcre_extra **hints_list = NULL;
139
140 static char *include_pattern = NULL;
141 static char *exclude_pattern = NULL;
142
143 static pcre *include_compiled = NULL;
144 static pcre *exclude_compiled = NULL;
145
146 static int after_context = 0;
147 static int before_context = 0;
148 static int both_context = 0;
149 static int dee_action = dee_READ;
150 static int DEE_action = DEE_READ;
151 static int error_count = 0;
152 static int filenames = FN_DEFAULT;
153 static int process_options = 0;
154
155 static BOOL count_only = FALSE;
156 static BOOL do_colour = FALSE;
157 static BOOL file_offsets = FALSE;
158 static BOOL hyphenpending = FALSE;
159 static BOOL invert = FALSE;
160 static BOOL line_offsets = FALSE;
161 static BOOL multiline = FALSE;
162 static BOOL number = FALSE;
163 static BOOL only_matching = FALSE;
164 static BOOL quiet = FALSE;
165 static BOOL silent = FALSE;
166 static BOOL utf8 = FALSE;
167
168 /* Structure for options and list of them */
169
170 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
171 OP_PATLIST };
172
173 typedef struct option_item {
174 int type;
175 int one_char;
176 void *dataptr;
177 const char *long_name;
178 const char *help_text;
179 } option_item;
180
181 /* Options without a single-letter equivalent get a negative value. This can be
182 used to identify them. */
183
184 #define N_COLOUR (-1)
185 #define N_EXCLUDE (-2)
186 #define N_HELP (-3)
187 #define N_INCLUDE (-4)
188 #define N_LABEL (-5)
189 #define N_LOCALE (-6)
190 #define N_NULL (-7)
191 #define N_LOFFSETS (-8)
192 #define N_FOFFSETS (-9)
193
194 static option_item optionlist[] = {
195 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
196 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
197 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
198 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
199 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
200 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
201 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
202 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
203 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
204 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
205 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
206 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
207 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
208 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
209 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
210 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
211 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
212 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
213 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
214 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
215 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
216 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
217 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
218 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
219 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
220 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
221 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
222 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
223 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
224 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
225 #ifdef JFRIEDL_DEBUG
226 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
227 #endif
228 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
229 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
230 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
231 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
232 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
233 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
234 { OP_NODATA, 0, NULL, NULL, NULL }
235 };
236
237 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
238 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
239 that the combination of -w and -x has the same effect as -x on its own, so we
240 can treat them as the same. */
241
242 static const char *prefix[] = {
243 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
244
245 static const char *suffix[] = {
246 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
247
248 /* UTF-8 tables - used only when the newline setting is "any". */
249
250 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
251
252 const char utf8_table4[] = {
253 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
254 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
255 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
256 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
257
258
259
260 /*************************************************
261 * OS-specific functions *
262 *************************************************/
263
264 /* These functions are defined so that they can be made system specific,
265 although at present the only ones are for Unix, Win32, and for "no support". */
266
267
268 /************* Directory scanning in Unix ***********/
269
270 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
271 #include <sys/types.h>
272 #include <sys/stat.h>
273 #include <dirent.h>
274
275 typedef DIR directory_type;
276
277 static int
278 isdirectory(char *filename)
279 {
280 struct stat statbuf;
281 if (stat(filename, &statbuf) < 0)
282 return 0; /* In the expectation that opening as a file will fail */
283 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
284 }
285
286 static directory_type *
287 opendirectory(char *filename)
288 {
289 return opendir(filename);
290 }
291
292 static char *
293 readdirectory(directory_type *dir)
294 {
295 for (;;)
296 {
297 struct dirent *dent = readdir(dir);
298 if (dent == NULL) return NULL;
299 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
300 return dent->d_name;
301 }
302 /* Control never reaches here */
303 }
304
305 static void
306 closedirectory(directory_type *dir)
307 {
308 closedir(dir);
309 }
310
311
312 /************* Test for regular file in Unix **********/
313
314 static int
315 isregfile(char *filename)
316 {
317 struct stat statbuf;
318 if (stat(filename, &statbuf) < 0)
319 return 1; /* In the expectation that opening as a file will fail */
320 return (statbuf.st_mode & S_IFMT) == S_IFREG;
321 }
322
323
324 /************* Test stdout for being a terminal in Unix **********/
325
326 static BOOL
327 is_stdout_tty(void)
328 {
329 return isatty(fileno(stdout));
330 }
331
332
333 /************* Directory scanning in Win32 ***********/
334
335 /* I (Philip Hazel) have no means of testing this code. It was contributed by
336 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
337 when it did not exist. David Byron added a patch that moved the #include of
338 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
339 */
340
341 #elif HAVE_WINDOWS_H
342
343 #ifndef STRICT
344 # define STRICT
345 #endif
346 #ifndef WIN32_LEAN_AND_MEAN
347 # define WIN32_LEAN_AND_MEAN
348 #endif
349
350 #include <windows.h>
351
352 #ifndef INVALID_FILE_ATTRIBUTES
353 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
354 #endif
355
356 typedef struct directory_type
357 {
358 HANDLE handle;
359 BOOL first;
360 WIN32_FIND_DATA data;
361 } directory_type;
362
363 int
364 isdirectory(char *filename)
365 {
366 DWORD attr = GetFileAttributes(filename);
367 if (attr == INVALID_FILE_ATTRIBUTES)
368 return 0;
369 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
370 }
371
372 directory_type *
373 opendirectory(char *filename)
374 {
375 size_t len;
376 char *pattern;
377 directory_type *dir;
378 DWORD err;
379 len = strlen(filename);
380 pattern = (char *) malloc(len + 3);
381 dir = (directory_type *) malloc(sizeof(*dir));
382 if ((pattern == NULL) || (dir == NULL))
383 {
384 fprintf(stderr, "pcregrep: malloc failed\n");
385 exit(2);
386 }
387 memcpy(pattern, filename, len);
388 memcpy(&(pattern[len]), "\\*", 3);
389 dir->handle = FindFirstFile(pattern, &(dir->data));
390 if (dir->handle != INVALID_HANDLE_VALUE)
391 {
392 free(pattern);
393 dir->first = TRUE;
394 return dir;
395 }
396 err = GetLastError();
397 free(pattern);
398 free(dir);
399 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
400 return NULL;
401 }
402
403 char *
404 readdirectory(directory_type *dir)
405 {
406 for (;;)
407 {
408 if (!dir->first)
409 {
410 if (!FindNextFile(dir->handle, &(dir->data)))
411 return NULL;
412 }
413 else
414 {
415 dir->first = FALSE;
416 }
417 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
418 return dir->data.cFileName;
419 }
420 #ifndef _MSC_VER
421 return NULL; /* Keep compiler happy; never executed */
422 #endif
423 }
424
425 void
426 closedirectory(directory_type *dir)
427 {
428 FindClose(dir->handle);
429 free(dir);
430 }
431
432
433 /************* Test for regular file in Win32 **********/
434
435 /* I don't know how to do this, or if it can be done; assume all paths are
436 regular if they are not directories. */
437
438 int isregfile(char *filename)
439 {
440 return !isdirectory(filename);
441 }
442
443
444 /************* Test stdout for being a terminal in Win32 **********/
445
446 /* I don't know how to do this; assume never */
447
448 static BOOL
449 is_stdout_tty(void)
450 {
451 return FALSE;
452 }
453
454
455 /************* Directory scanning when we can't do it ***********/
456
457 /* The type is void, and apart from isdirectory(), the functions do nothing. */
458
459 #else
460
461 typedef void directory_type;
462
463 int isdirectory(char *filename) { return 0; }
464 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
465 char *readdirectory(directory_type *dir) { return (char*)0;}
466 void closedirectory(directory_type *dir) {}
467
468
469 /************* Test for regular when we can't do it **********/
470
471 /* Assume all files are regular. */
472
473 int isregfile(char *filename) { return 1; }
474
475
476 /************* Test stdout for being a terminal when we can't do it **********/
477
478 static BOOL
479 is_stdout_tty(void)
480 {
481 return FALSE;
482 }
483
484
485 #endif
486
487
488
489 #ifndef HAVE_STRERROR
490 /*************************************************
491 * Provide strerror() for non-ANSI libraries *
492 *************************************************/
493
494 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
495 in their libraries, but can provide the same facility by this simple
496 alternative function. */
497
498 extern int sys_nerr;
499 extern char *sys_errlist[];
500
501 char *
502 strerror(int n)
503 {
504 if (n < 0 || n >= sys_nerr) return "unknown error number";
505 return sys_errlist[n];
506 }
507 #endif /* HAVE_STRERROR */
508
509
510
511 /*************************************************
512 * Find end of line *
513 *************************************************/
514
515 /* The length of the endline sequence that is found is set via lenptr. This may
516 be zero at the very end of the file if there is no line-ending sequence there.
517
518 Arguments:
519 p current position in line
520 endptr end of available data
521 lenptr where to put the length of the eol sequence
522
523 Returns: pointer to the last byte of the line
524 */
525
526 static char *
527 end_of_line(char *p, char *endptr, int *lenptr)
528 {
529 switch(endlinetype)
530 {
531 default: /* Just in case */
532 case EL_LF:
533 while (p < endptr && *p != '\n') p++;
534 if (p < endptr)
535 {
536 *lenptr = 1;
537 return p + 1;
538 }
539 *lenptr = 0;
540 return endptr;
541
542 case EL_CR:
543 while (p < endptr && *p != '\r') p++;
544 if (p < endptr)
545 {
546 *lenptr = 1;
547 return p + 1;
548 }
549 *lenptr = 0;
550 return endptr;
551
552 case EL_CRLF:
553 for (;;)
554 {
555 while (p < endptr && *p != '\r') p++;
556 if (++p >= endptr)
557 {
558 *lenptr = 0;
559 return endptr;
560 }
561 if (*p == '\n')
562 {
563 *lenptr = 2;
564 return p + 1;
565 }
566 }
567 break;
568
569 case EL_ANYCRLF:
570 while (p < endptr)
571 {
572 int extra = 0;
573 register int c = *((unsigned char *)p);
574
575 if (utf8 && c >= 0xc0)
576 {
577 int gcii, gcss;
578 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
579 gcss = 6*extra;
580 c = (c & utf8_table3[extra]) << gcss;
581 for (gcii = 1; gcii <= extra; gcii++)
582 {
583 gcss -= 6;
584 c |= (p[gcii] & 0x3f) << gcss;
585 }
586 }
587
588 p += 1 + extra;
589
590 switch (c)
591 {
592 case 0x0a: /* LF */
593 *lenptr = 1;
594 return p;
595
596 case 0x0d: /* CR */
597 if (p < endptr && *p == 0x0a)
598 {
599 *lenptr = 2;
600 p++;
601 }
602 else *lenptr = 1;
603 return p;
604
605 default:
606 break;
607 }
608 } /* End of loop for ANYCRLF case */
609
610 *lenptr = 0; /* Must have hit the end */
611 return endptr;
612
613 case EL_ANY:
614 while (p < endptr)
615 {
616 int extra = 0;
617 register int c = *((unsigned char *)p);
618
619 if (utf8 && c >= 0xc0)
620 {
621 int gcii, gcss;
622 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
623 gcss = 6*extra;
624 c = (c & utf8_table3[extra]) << gcss;
625 for (gcii = 1; gcii <= extra; gcii++)
626 {
627 gcss -= 6;
628 c |= (p[gcii] & 0x3f) << gcss;
629 }
630 }
631
632 p += 1 + extra;
633
634 switch (c)
635 {
636 case 0x0a: /* LF */
637 case 0x0b: /* VT */
638 case 0x0c: /* FF */
639 *lenptr = 1;
640 return p;
641
642 case 0x0d: /* CR */
643 if (p < endptr && *p == 0x0a)
644 {
645 *lenptr = 2;
646 p++;
647 }
648 else *lenptr = 1;
649 return p;
650
651 case 0x85: /* NEL */
652 *lenptr = utf8? 2 : 1;
653 return p;
654
655 case 0x2028: /* LS */
656 case 0x2029: /* PS */
657 *lenptr = 3;
658 return p;
659
660 default:
661 break;
662 }
663 } /* End of loop for ANY case */
664
665 *lenptr = 0; /* Must have hit the end */
666 return endptr;
667 } /* End of overall switch */
668 }
669
670
671
672 /*************************************************
673 * Find start of previous line *
674 *************************************************/
675
676 /* This is called when looking back for before lines to print.
677
678 Arguments:
679 p start of the subsequent line
680 startptr start of available data
681
682 Returns: pointer to the start of the previous line
683 */
684
685 static char *
686 previous_line(char *p, char *startptr)
687 {
688 switch(endlinetype)
689 {
690 default: /* Just in case */
691 case EL_LF:
692 p--;
693 while (p > startptr && p[-1] != '\n') p--;
694 return p;
695
696 case EL_CR:
697 p--;
698 while (p > startptr && p[-1] != '\n') p--;
699 return p;
700
701 case EL_CRLF:
702 for (;;)
703 {
704 p -= 2;
705 while (p > startptr && p[-1] != '\n') p--;
706 if (p <= startptr + 1 || p[-2] == '\r') return p;
707 }
708 return p; /* But control should never get here */
709
710 case EL_ANY:
711 case EL_ANYCRLF:
712 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
713 if (utf8) while ((*p & 0xc0) == 0x80) p--;
714
715 while (p > startptr)
716 {
717 register int c;
718 char *pp = p - 1;
719
720 if (utf8)
721 {
722 int extra = 0;
723 while ((*pp & 0xc0) == 0x80) pp--;
724 c = *((unsigned char *)pp);
725 if (c >= 0xc0)
726 {
727 int gcii, gcss;
728 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
729 gcss = 6*extra;
730 c = (c & utf8_table3[extra]) << gcss;
731 for (gcii = 1; gcii <= extra; gcii++)
732 {
733 gcss -= 6;
734 c |= (pp[gcii] & 0x3f) << gcss;
735 }
736 }
737 }
738 else c = *((unsigned char *)pp);
739
740 if (endlinetype == EL_ANYCRLF) switch (c)
741 {
742 case 0x0a: /* LF */
743 case 0x0d: /* CR */
744 return p;
745
746 default:
747 break;
748 }
749
750 else switch (c)
751 {
752 case 0x0a: /* LF */
753 case 0x0b: /* VT */
754 case 0x0c: /* FF */
755 case 0x0d: /* CR */
756 case 0x85: /* NEL */
757 case 0x2028: /* LS */
758 case 0x2029: /* PS */
759 return p;
760
761 default:
762 break;
763 }
764
765 p = pp; /* Back one character */
766 } /* End of loop for ANY case */
767
768 return startptr; /* Hit start of data */
769 } /* End of overall switch */
770 }
771
772
773
774
775
776 /*************************************************
777 * Print the previous "after" lines *
778 *************************************************/
779
780 /* This is called if we are about to lose said lines because of buffer filling,
781 and at the end of the file. The data in the line is written using fwrite() so
782 that a binary zero does not terminate it.
783
784 Arguments:
785 lastmatchnumber the number of the last matching line, plus one
786 lastmatchrestart where we restarted after the last match
787 endptr end of available data
788 printname filename for printing
789
790 Returns: nothing
791 */
792
793 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
794 char *endptr, char *printname)
795 {
796 if (after_context > 0 && lastmatchnumber > 0)
797 {
798 int count = 0;
799 while (lastmatchrestart < endptr && count++ < after_context)
800 {
801 int ellength;
802 char *pp = lastmatchrestart;
803 if (printname != NULL) fprintf(stdout, "%s-", printname);
804 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
805 pp = end_of_line(pp, endptr, &ellength);
806 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
807 lastmatchrestart = pp;
808 }
809 hyphenpending = TRUE;
810 }
811 }
812
813
814
815 /*************************************************
816 * Grep an individual file *
817 *************************************************/
818
819 /* This is called from grep_or_recurse() below. It uses a buffer that is three
820 times the value of MBUFTHIRD. The matching point is never allowed to stray into
821 the top third of the buffer, thus keeping more of the file available for
822 context printing or for multiline scanning. For large files, the pointer will
823 be in the middle third most of the time, so the bottom third is available for
824 "before" context printing.
825
826 Arguments:
827 handle the fopened FILE stream for a normal file
828 the gzFile pointer when reading is via libz
829 the BZFILE pointer when reading is via libbz2
830 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
831 printname the file name if it is to be printed for each match
832 or NULL if the file name is not to be printed
833 it cannot be NULL if filenames[_nomatch]_only is set
834
835 Returns: 0 if there was at least one match
836 1 otherwise (no matches)
837 2 if there is a read error on a .bz2 file
838 */
839
840 static int
841 pcregrep(void *handle, int frtype, char *printname)
842 {
843 int rc = 1;
844 int linenumber = 1;
845 int lastmatchnumber = 0;
846 int count = 0;
847 int filepos = 0;
848 int offsets[99];
849 char *lastmatchrestart = NULL;
850 char buffer[3*MBUFTHIRD];
851 char *ptr = buffer;
852 char *endptr;
853 size_t bufflength;
854 BOOL endhyphenpending = FALSE;
855 FILE *in = NULL; /* Ensure initialized */
856
857 #ifdef SUPPORT_LIBZ
858 gzFile ingz = NULL;
859 #endif
860
861 #ifdef SUPPORT_LIBBZ2
862 BZFILE *inbz2 = NULL;
863 #endif
864
865
866 /* Do the first read into the start of the buffer and set up the pointer to end
867 of what we have. In the case of libz, a non-zipped .gz file will be read as a
868 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
869 fail. */
870
871 #ifdef SUPPORT_LIBZ
872 if (frtype == FR_LIBZ)
873 {
874 ingz = (gzFile)handle;
875 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
876 }
877 else
878 #endif
879
880 #ifdef SUPPORT_LIBBZ2
881 if (frtype == FR_LIBBZ2)
882 {
883 inbz2 = (BZFILE *)handle;
884 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
885 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
886 } /* without the cast it is unsigned. */
887 else
888 #endif
889
890 {
891 in = (FILE *)handle;
892 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
893 }
894
895 endptr = buffer + bufflength;
896
897 /* Loop while the current pointer is not at the end of the file. For large
898 files, endptr will be at the end of the buffer when we are in the middle of the
899 file, but ptr will never get there, because as soon as it gets over 2/3 of the
900 way, the buffer is shifted left and re-filled. */
901
902 while (ptr < endptr)
903 {
904 int i, endlinelength;
905 int mrc = 0;
906 BOOL match = FALSE;
907 char *matchptr = ptr;
908 char *t = ptr;
909 size_t length, linelength;
910
911 /* At this point, ptr is at the start of a line. We need to find the length
912 of the subject string to pass to pcre_exec(). In multiline mode, it is the
913 length remainder of the data in the buffer. Otherwise, it is the length of
914 the next line. After matching, we always advance by the length of the next
915 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
916 that any match is constrained to be in the first line. */
917
918 t = end_of_line(t, endptr, &endlinelength);
919 linelength = t - ptr - endlinelength;
920 length = multiline? (size_t)(endptr - ptr) : linelength;
921
922 /* Extra processing for Jeffrey Friedl's debugging. */
923
924 #ifdef JFRIEDL_DEBUG
925 if (jfriedl_XT || jfriedl_XR)
926 {
927 #include <sys/time.h>
928 #include <time.h>
929 struct timeval start_time, end_time;
930 struct timezone dummy;
931
932 if (jfriedl_XT)
933 {
934 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
935 const char *orig = ptr;
936 ptr = malloc(newlen + 1);
937 if (!ptr) {
938 printf("out of memory");
939 exit(2);
940 }
941 endptr = ptr;
942 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
943 for (i = 0; i < jfriedl_XT; i++) {
944 strncpy(endptr, orig, length);
945 endptr += length;
946 }
947 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
948 length = newlen;
949 }
950
951 if (gettimeofday(&start_time, &dummy) != 0)
952 perror("bad gettimeofday");
953
954
955 for (i = 0; i < jfriedl_XR; i++)
956 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
957
958 if (gettimeofday(&end_time, &dummy) != 0)
959 perror("bad gettimeofday");
960
961 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
962 -
963 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
964
965 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
966 return 0;
967 }
968 #endif
969
970 /* We come back here after a match when the -o option (only_matching) is set,
971 in order to find any further matches in the same line. */
972
973 ONLY_MATCHING_RESTART:
974
975 /* Run through all the patterns until one matches. Note that we don't include
976 the final newline in the subject string. */
977
978 for (i = 0; i < pattern_count; i++)
979 {
980 mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
981 offsets, 99);
982 if (mrc >= 0) { match = TRUE; break; }
983 if (mrc != PCRE_ERROR_NOMATCH)
984 {
985 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
986 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
987 fprintf(stderr, "this line:\n");
988 fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
989 fprintf(stderr, "\n");
990 if (error_count == 0 &&
991 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
992 {
993 fprintf(stderr, "pcregrep: error %d means that a resource limit "
994 "was exceeded\n", mrc);
995 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
996 }
997 if (error_count++ > 20)
998 {
999 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
1000 exit(2);
1001 }
1002 match = invert; /* No more matching; don't show the line again */
1003 break;
1004 }
1005 }
1006
1007 /* If it's a match or a not-match (as required), do what's wanted. */
1008
1009 if (match != invert)
1010 {
1011 BOOL hyphenprinted = FALSE;
1012
1013 /* We've failed if we want a file that doesn't have any matches. */
1014
1015 if (filenames == FN_NOMATCH_ONLY) return 1;
1016
1017 /* Just count if just counting is wanted. */
1018
1019 if (count_only) count++;
1020
1021 /* If all we want is a file name, there is no need to scan any more lines
1022 in the file. */
1023
1024 else if (filenames == FN_ONLY)
1025 {
1026 fprintf(stdout, "%s\n", printname);
1027 return 0;
1028 }
1029
1030 /* Likewise, if all we want is a yes/no answer. */
1031
1032 else if (quiet) return 0;
1033
1034 /* The --only-matching option prints just the substring that matched, and
1035 the --file-offsets and --line-offsets options output offsets for the
1036 matching substring (they both force --only-matching). None of these options
1037 prints any context. Afterwards, adjust the start and length, and then jump
1038 back to look for further matches in the same line. If we are in invert
1039 mode, however, nothing is printed - this could be still useful because the
1040 return code is set. */
1041
1042 else if (only_matching)
1043 {
1044 if (!invert)
1045 {
1046 if (printname != NULL) fprintf(stdout, "%s:", printname);
1047 if (number) fprintf(stdout, "%d:", linenumber);
1048 if (line_offsets)
1049 fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
1050 offsets[1] - offsets[0]);
1051 else if (file_offsets)
1052 fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1053 offsets[1] - offsets[0]);
1054 else
1055 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1056 fprintf(stdout, "\n");
1057 matchptr += offsets[1];
1058 length -= offsets[1];
1059 match = FALSE;
1060 goto ONLY_MATCHING_RESTART;
1061 }
1062 }
1063
1064 /* This is the default case when none of the above options is set. We print
1065 the matching lines(s), possibly preceded and/or followed by other lines of
1066 context. */
1067
1068 else
1069 {
1070 /* See if there is a requirement to print some "after" lines from a
1071 previous match. We never print any overlaps. */
1072
1073 if (after_context > 0 && lastmatchnumber > 0)
1074 {
1075 int ellength;
1076 int linecount = 0;
1077 char *p = lastmatchrestart;
1078
1079 while (p < ptr && linecount < after_context)
1080 {
1081 p = end_of_line(p, ptr, &ellength);
1082 linecount++;
1083 }
1084
1085 /* It is important to advance lastmatchrestart during this printing so
1086 that it interacts correctly with any "before" printing below. Print
1087 each line's data using fwrite() in case there are binary zeroes. */
1088
1089 while (lastmatchrestart < p)
1090 {
1091 char *pp = lastmatchrestart;
1092 if (printname != NULL) fprintf(stdout, "%s-", printname);
1093 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1094 pp = end_of_line(pp, endptr, &ellength);
1095 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1096 lastmatchrestart = pp;
1097 }
1098 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1099 }
1100
1101 /* If there were non-contiguous lines printed above, insert hyphens. */
1102
1103 if (hyphenpending)
1104 {
1105 fprintf(stdout, "--\n");
1106 hyphenpending = FALSE;
1107 hyphenprinted = TRUE;
1108 }
1109
1110 /* See if there is a requirement to print some "before" lines for this
1111 match. Again, don't print overlaps. */
1112
1113 if (before_context > 0)
1114 {
1115 int linecount = 0;
1116 char *p = ptr;
1117
1118 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1119 linecount < before_context)
1120 {
1121 linecount++;
1122 p = previous_line(p, buffer);
1123 }
1124
1125 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1126 fprintf(stdout, "--\n");
1127
1128 while (p < ptr)
1129 {
1130 int ellength;
1131 char *pp = p;
1132 if (printname != NULL) fprintf(stdout, "%s-", printname);
1133 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1134 pp = end_of_line(pp, endptr, &ellength);
1135 fwrite(p, 1, pp - p, stdout);
1136 p = pp;
1137 }
1138 }
1139
1140 /* Now print the matching line(s); ensure we set hyphenpending at the end
1141 of the file if any context lines are being output. */
1142
1143 if (after_context > 0 || before_context > 0)
1144 endhyphenpending = TRUE;
1145
1146 if (printname != NULL) fprintf(stdout, "%s:", printname);
1147 if (number) fprintf(stdout, "%d:", linenumber);
1148
1149 /* In multiline mode, we want to print to the end of the line in which
1150 the end of the matched string is found, so we adjust linelength and the
1151 line number appropriately, but only when there actually was a match
1152 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1153 the match will always be before the first newline sequence. */
1154
1155 if (multiline)
1156 {
1157 int ellength;
1158 char *endmatch = ptr;
1159 if (!invert)
1160 {
1161 endmatch += offsets[1];
1162 t = ptr;
1163 while (t < endmatch)
1164 {
1165 t = end_of_line(t, endptr, &ellength);
1166 if (t <= endmatch) linenumber++; else break;
1167 }
1168 }
1169 endmatch = end_of_line(endmatch, endptr, &ellength);
1170 linelength = endmatch - ptr - ellength;
1171 }
1172
1173 /*** NOTE: Use only fwrite() to output the data line, so that binary
1174 zeroes are treated as just another data character. */
1175
1176 /* This extra option, for Jeffrey Friedl's debugging requirements,
1177 replaces the matched string, or a specific captured string if it exists,
1178 with X. When this happens, colouring is ignored. */
1179
1180 #ifdef JFRIEDL_DEBUG
1181 if (S_arg >= 0 && S_arg < mrc)
1182 {
1183 int first = S_arg * 2;
1184 int last = first + 1;
1185 fwrite(ptr, 1, offsets[first], stdout);
1186 fprintf(stdout, "X");
1187 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1188 }
1189 else
1190 #endif
1191
1192 /* We have to split the line(s) up if colouring. */
1193
1194 if (do_colour)
1195 {
1196 fwrite(ptr, 1, offsets[0], stdout);
1197 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1198 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1199 fprintf(stdout, "%c[00m", 0x1b);
1200 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1201 stdout);
1202 }
1203 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1204 }
1205
1206 /* End of doing what has to be done for a match */
1207
1208 rc = 0; /* Had some success */
1209
1210 /* Remember where the last match happened for after_context. We remember
1211 where we are about to restart, and that line's number. */
1212
1213 lastmatchrestart = ptr + linelength + endlinelength;
1214 lastmatchnumber = linenumber + 1;
1215 }
1216
1217 /* For a match in multiline inverted mode (which of course did not cause
1218 anything to be printed), we have to move on to the end of the match before
1219 proceeding. */
1220
1221 if (multiline && invert && match)
1222 {
1223 int ellength;
1224 char *endmatch = ptr + offsets[1];
1225 t = ptr;
1226 while (t < endmatch)
1227 {
1228 t = end_of_line(t, endptr, &ellength);
1229 if (t <= endmatch) linenumber++; else break;
1230 }
1231 endmatch = end_of_line(endmatch, endptr, &ellength);
1232 linelength = endmatch - ptr - ellength;
1233 }
1234
1235 /* Advance to after the newline and increment the line number. The file
1236 offset to the current line is maintained in filepos. */
1237
1238 ptr += linelength + endlinelength;
1239 filepos += linelength + endlinelength;
1240 linenumber++;
1241
1242 /* If we haven't yet reached the end of the file (the buffer is full), and
1243 the current point is in the top 1/3 of the buffer, slide the buffer down by
1244 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1245 about to be lost, print them. */
1246
1247 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1248 {
1249 if (after_context > 0 &&
1250 lastmatchnumber > 0 &&
1251 lastmatchrestart < buffer + MBUFTHIRD)
1252 {
1253 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1254 lastmatchnumber = 0;
1255 }
1256
1257 /* Now do the shuffle */
1258
1259 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1260 ptr -= MBUFTHIRD;
1261
1262 #ifdef SUPPORT_LIBZ
1263 if (frtype == FR_LIBZ)
1264 bufflength = 2*MBUFTHIRD +
1265 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1266 else
1267 #endif
1268
1269 #ifdef SUPPORT_LIBBZ2
1270 if (frtype == FR_LIBBZ2)
1271 bufflength = 2*MBUFTHIRD +
1272 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1273 else
1274 #endif
1275
1276 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1277
1278 endptr = buffer + bufflength;
1279
1280 /* Adjust any last match point */
1281
1282 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1283 }
1284 } /* Loop through the whole file */
1285
1286 /* End of file; print final "after" lines if wanted; do_after_lines sets
1287 hyphenpending if it prints something. */
1288
1289 if (!only_matching && !count_only)
1290 {
1291 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1292 hyphenpending |= endhyphenpending;
1293 }
1294
1295 /* Print the file name if we are looking for those without matches and there
1296 were none. If we found a match, we won't have got this far. */
1297
1298 if (filenames == FN_NOMATCH_ONLY)
1299 {
1300 fprintf(stdout, "%s\n", printname);
1301 return 0;
1302 }
1303
1304 /* Print the match count if wanted */
1305
1306 if (count_only)
1307 {
1308 if (printname != NULL) fprintf(stdout, "%s:", printname);
1309 fprintf(stdout, "%d\n", count);
1310 }
1311
1312 return rc;
1313 }
1314
1315
1316
1317 /*************************************************
1318 * Grep a file or recurse into a directory *
1319 *************************************************/
1320
1321 /* Given a path name, if it's a directory, scan all the files if we are
1322 recursing; if it's a file, grep it.
1323
1324 Arguments:
1325 pathname the path to investigate
1326 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1327 only_one_at_top TRUE if the path is the only one at toplevel
1328
1329 Returns: 0 if there was at least one match
1330 1 if there were no matches
1331 2 there was some kind of error
1332
1333 However, file opening failures are suppressed if "silent" is set.
1334 */
1335
1336 static int
1337 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1338 {
1339 int rc = 1;
1340 int sep;
1341 int frtype;
1342 int pathlen;
1343 void *handle;
1344 FILE *in = NULL; /* Ensure initialized */
1345
1346 #ifdef SUPPORT_LIBZ
1347 gzFile ingz = NULL;
1348 #endif
1349
1350 #ifdef SUPPORT_LIBBZ2
1351 BZFILE *inbz2 = NULL;
1352 #endif
1353
1354 /* If the file name is "-" we scan stdin */
1355
1356 if (strcmp(pathname, "-") == 0)
1357 {
1358 return pcregrep(stdin, FR_PLAIN,
1359 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1360 stdin_name : NULL);
1361 }
1362
1363 /* If the file is a directory, skip if skipping or if we are recursing, scan
1364 each file within it, subject to any include or exclude patterns that were set.
1365 The scanning code is localized so it can be made system-specific. */
1366
1367 if ((sep = isdirectory(pathname)) != 0)
1368 {
1369 if (dee_action == dee_SKIP) return 1;
1370 if (dee_action == dee_RECURSE)
1371 {
1372 char buffer[1024];
1373 char *nextfile;
1374 directory_type *dir = opendirectory(pathname);
1375
1376 if (dir == NULL)
1377 {
1378 if (!silent)
1379 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1380 strerror(errno));
1381 return 2;
1382 }
1383
1384 while ((nextfile = readdirectory(dir)) != NULL)
1385 {
1386 int frc, nflen;
1387 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1388 nflen = strlen(nextfile);
1389
1390 if (!isdirectory(buffer))
1391 {
1392 if (exclude_compiled != NULL &&
1393 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1394 continue;
1395
1396 if (include_compiled != NULL &&
1397 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1398 continue;
1399 }
1400
1401 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1402 if (frc > 1) rc = frc;
1403 else if (frc == 0 && rc == 1) rc = 0;
1404 }
1405
1406 closedirectory(dir);
1407 return rc;
1408 }
1409 }
1410
1411 /* If the file is not a directory and not a regular file, skip it if that's
1412 been requested. */
1413
1414 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1415
1416 /* Control reaches here if we have a regular file, or if we have a directory
1417 and recursion or skipping was not requested, or if we have anything else and
1418 skipping was not requested. The scan proceeds. If this is the first and only
1419 argument at top level, we don't show the file name, unless we are only showing
1420 the file name, or the filename was forced (-H). */
1421
1422 pathlen = strlen(pathname);
1423
1424 /* Open using zlib if it is supported and the file name ends with .gz. */
1425
1426 #ifdef SUPPORT_LIBZ
1427 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1428 {
1429 ingz = gzopen(pathname, "rb");
1430 if (ingz == NULL)
1431 {
1432 if (!silent)
1433 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1434 strerror(errno));
1435 return 2;
1436 }
1437 handle = (void *)ingz;
1438 frtype = FR_LIBZ;
1439 }
1440 else
1441 #endif
1442
1443 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1444
1445 #ifdef SUPPORT_LIBBZ2
1446 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1447 {
1448 inbz2 = BZ2_bzopen(pathname, "rb");
1449 handle = (void *)inbz2;
1450 frtype = FR_LIBBZ2;
1451 }
1452 else
1453 #endif
1454
1455 /* Otherwise use plain fopen(). The label is so that we can come back here if
1456 an attempt to read a .bz2 file indicates that it really is a plain file. */
1457
1458 #ifdef SUPPORT_LIBBZ2
1459 PLAIN_FILE:
1460 #endif
1461 {
1462 in = fopen(pathname, "r");
1463 handle = (void *)in;
1464 frtype = FR_PLAIN;
1465 }
1466
1467 /* All the opening methods return errno when they fail. */
1468
1469 if (handle == NULL)
1470 {
1471 if (!silent)
1472 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1473 strerror(errno));
1474 return 2;
1475 }
1476
1477 /* Now grep the file */
1478
1479 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1480 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1481
1482 /* Close in an appropriate manner. */
1483
1484 #ifdef SUPPORT_LIBZ
1485 if (frtype == FR_LIBZ)
1486 gzclose(ingz);
1487 else
1488 #endif
1489
1490 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1491 read failed. If the error indicates that the file isn't in fact bzipped, try
1492 again as a normal file. */
1493
1494 #ifdef SUPPORT_LIBBZ2
1495 if (frtype == FR_LIBBZ2)
1496 {
1497 if (rc == 2)
1498 {
1499 int errnum;
1500 const char *err = BZ2_bzerror(inbz2, &errnum);
1501 if (errnum == BZ_DATA_ERROR_MAGIC)
1502 {
1503 BZ2_bzclose(inbz2);
1504 goto PLAIN_FILE;
1505 }
1506 else if (!silent)
1507 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1508 pathname, err);
1509 }
1510 BZ2_bzclose(inbz2);
1511 }
1512 else
1513 #endif
1514
1515 /* Normal file close */
1516
1517 fclose(in);
1518
1519 /* Pass back the yield from pcregrep(). */
1520
1521 return rc;
1522 }
1523
1524
1525
1526
1527 /*************************************************
1528 * Usage function *
1529 *************************************************/
1530
1531 static int
1532 usage(int rc)
1533 {
1534 option_item *op;
1535 fprintf(stderr, "Usage: pcregrep [-");
1536 for (op = optionlist; op->one_char != 0; op++)
1537 {
1538 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1539 }
1540 fprintf(stderr, "] [long options] [pattern] [files]\n");
1541 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1542 "options.\n");
1543 return rc;
1544 }
1545
1546
1547
1548
1549 /*************************************************
1550 * Help function *
1551 *************************************************/
1552
1553 static void
1554 help(void)
1555 {
1556 option_item *op;
1557
1558 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1559 printf("Search for PATTERN in each FILE or standard input.\n");
1560 printf("PATTERN must be present if neither -e nor -f is used.\n");
1561 printf("\"-\" can be used as a file name to mean STDIN.\n");
1562
1563 #ifdef SUPPORT_LIBZ
1564 printf("Files whose names end in .gz are read using zlib.\n");
1565 #endif
1566
1567 #ifdef SUPPORT_LIBBZ2
1568 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1569 #endif
1570
1571 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1572 printf("Other files and the standard input are read as plain files.\n\n");
1573 #else
1574 printf("All files are read as plain files, without any interpretation.\n\n");
1575 #endif
1576
1577 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1578 printf("Options:\n");
1579
1580 for (op = optionlist; op->one_char != 0; op++)
1581 {
1582 int n;
1583 char s[4];
1584 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1585 n = 30 - printf(" %s --%s", s, op->long_name);
1586 if (n < 1) n = 1;
1587 printf("%.*s%s\n", n, " ", op->help_text);
1588 }
1589
1590 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1591 printf("trailing white space is removed and blank lines are ignored.\n");
1592 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1593
1594 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1595 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1596 }
1597
1598
1599
1600
1601 /*************************************************
1602 * Handle a single-letter, no data option *
1603 *************************************************/
1604
1605 static int
1606 handle_option(int letter, int options)
1607 {
1608 switch(letter)
1609 {
1610 case N_FOFFSETS: file_offsets = TRUE; break;
1611 case N_HELP: help(); exit(0);
1612 case N_LOFFSETS: line_offsets = number = TRUE; break;
1613 case 'c': count_only = TRUE; break;
1614 case 'F': process_options |= PO_FIXED_STRINGS; break;
1615 case 'H': filenames = FN_FORCE; break;
1616 case 'h': filenames = FN_NONE; break;
1617 case 'i': options |= PCRE_CASELESS; break;
1618 case 'l': filenames = FN_ONLY; break;
1619 case 'L': filenames = FN_NOMATCH_ONLY; break;
1620 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1621 case 'n': number = TRUE; break;
1622 case 'o': only_matching = TRUE; break;
1623 case 'q': quiet = TRUE; break;
1624 case 'r': dee_action = dee_RECURSE; break;
1625 case 's': silent = TRUE; break;
1626 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1627 case 'v': invert = TRUE; break;
1628 case 'w': process_options |= PO_WORD_MATCH; break;
1629 case 'x': process_options |= PO_LINE_MATCH; break;
1630
1631 case 'V':
1632 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1633 exit(0);
1634 break;
1635
1636 default:
1637 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1638 exit(usage(2));
1639 }
1640
1641 return options;
1642 }
1643
1644
1645
1646
1647 /*************************************************
1648 * Construct printed ordinal *
1649 *************************************************/
1650
1651 /* This turns a number into "1st", "3rd", etc. */
1652
1653 static char *
1654 ordin(int n)
1655 {
1656 static char buffer[8];
1657 char *p = buffer;
1658 sprintf(p, "%d", n);
1659 while (*p != 0) p++;
1660 switch (n%10)
1661 {
1662 case 1: strcpy(p, "st"); break;
1663 case 2: strcpy(p, "nd"); break;
1664 case 3: strcpy(p, "rd"); break;
1665 default: strcpy(p, "th"); break;
1666 }
1667 return buffer;
1668 }
1669
1670
1671
1672 /*************************************************
1673 * Compile a single pattern *
1674 *************************************************/
1675
1676 /* When the -F option has been used, this is called for each substring.
1677 Otherwise it's called for each supplied pattern.
1678
1679 Arguments:
1680 pattern the pattern string
1681 options the PCRE options
1682 filename the file name, or NULL for a command-line pattern
1683 count 0 if this is the only command line pattern, or
1684 number of the command line pattern, or
1685 linenumber for a pattern from a file
1686
1687 Returns: TRUE on success, FALSE after an error
1688 */
1689
1690 static BOOL
1691 compile_single_pattern(char *pattern, int options, char *filename, int count)
1692 {
1693 char buffer[MBUFTHIRD + 16];
1694 const char *error;
1695 int errptr;
1696
1697 if (pattern_count >= MAX_PATTERN_COUNT)
1698 {
1699 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1700 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1701 return FALSE;
1702 }
1703
1704 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1705 suffix[process_options]);
1706 pattern_list[pattern_count] =
1707 pcre_compile(buffer, options, &error, &errptr, pcretables);
1708 if (pattern_list[pattern_count] != NULL)
1709 {
1710 pattern_count++;
1711 return TRUE;
1712 }
1713
1714 /* Handle compile errors */
1715
1716 errptr -= (int)strlen(prefix[process_options]);
1717 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1718
1719 if (filename == NULL)
1720 {
1721 if (count == 0)
1722 fprintf(stderr, "pcregrep: Error in command-line regex "
1723 "at offset %d: %s\n", errptr, error);
1724 else
1725 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1726 "at offset %d: %s\n", ordin(count), errptr, error);
1727 }
1728 else
1729 {
1730 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1731 "at offset %d: %s\n", count, filename, errptr, error);
1732 }
1733
1734 return FALSE;
1735 }
1736
1737
1738
1739 /*************************************************
1740 * Compile one supplied pattern *
1741 *************************************************/
1742
1743 /* When the -F option has been used, each string may be a list of strings,
1744 separated by line breaks. They will be matched literally.
1745
1746 Arguments:
1747 pattern the pattern string
1748 options the PCRE options
1749 filename the file name, or NULL for a command-line pattern
1750 count 0 if this is the only command line pattern, or
1751 number of the command line pattern, or
1752 linenumber for a pattern from a file
1753
1754 Returns: TRUE on success, FALSE after an error
1755 */
1756
1757 static BOOL
1758 compile_pattern(char *pattern, int options, char *filename, int count)
1759 {
1760 if ((process_options & PO_FIXED_STRINGS) != 0)
1761 {
1762 char *eop = pattern + strlen(pattern);
1763 char buffer[MBUFTHIRD];
1764 for(;;)
1765 {
1766 int ellength;
1767 char *p = end_of_line(pattern, eop, &ellength);
1768 if (ellength == 0)
1769 return compile_single_pattern(pattern, options, filename, count);
1770 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1771 pattern = p;
1772 if (!compile_single_pattern(buffer, options, filename, count))
1773 return FALSE;
1774 }
1775 }
1776 else return compile_single_pattern(pattern, options, filename, count);
1777 }
1778
1779
1780
1781 /*************************************************
1782 * Main program *
1783 *************************************************/
1784
1785 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1786
1787 int
1788 main(int argc, char **argv)
1789 {
1790 int i, j;
1791 int rc = 1;
1792 int pcre_options = 0;
1793 int cmd_pattern_count = 0;
1794 int hint_count = 0;
1795 int errptr;
1796 BOOL only_one_at_top;
1797 char *patterns[MAX_PATTERN_COUNT];
1798 const char *locale_from = "--locale";
1799 const char *error;
1800
1801 /* Set the default line ending value from the default in the PCRE library;
1802 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1803 */
1804
1805 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1806 switch(i)
1807 {
1808 default: newline = (char *)"lf"; break;
1809 case '\r': newline = (char *)"cr"; break;
1810 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1811 case -1: newline = (char *)"any"; break;
1812 case -2: newline = (char *)"anycrlf"; break;
1813 }
1814
1815 /* Process the options */
1816
1817 for (i = 1; i < argc; i++)
1818 {
1819 option_item *op = NULL;
1820 char *option_data = (char *)""; /* default to keep compiler happy */
1821 BOOL longop;
1822 BOOL longopwasequals = FALSE;
1823
1824 if (argv[i][0] != '-') break;
1825
1826 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1827 but only if we have previously had -e or -f to define the patterns. */
1828
1829 if (argv[i][1] == 0)
1830 {
1831 if (pattern_filename != NULL || pattern_count > 0) break;
1832 else exit(usage(2));
1833 }
1834
1835 /* Handle a long name option, or -- to terminate the options */
1836
1837 if (argv[i][1] == '-')
1838 {
1839 char *arg = argv[i] + 2;
1840 char *argequals = strchr(arg, '=');
1841
1842 if (*arg == 0) /* -- terminates options */
1843 {
1844 i++;
1845 break; /* out of the options-handling loop */
1846 }
1847
1848 longop = TRUE;
1849
1850 /* Some long options have data that follows after =, for example file=name.
1851 Some options have variations in the long name spelling: specifically, we
1852 allow "regexp" because GNU grep allows it, though I personally go along
1853 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1854 These options are entered in the table as "regex(p)". No option is in both
1855 these categories, fortunately. */
1856
1857 for (op = optionlist; op->one_char != 0; op++)
1858 {
1859 char *opbra = strchr(op->long_name, '(');
1860 char *equals = strchr(op->long_name, '=');
1861 if (opbra == NULL) /* Not a (p) case */
1862 {
1863 if (equals == NULL) /* Not thing=data case */
1864 {
1865 if (strcmp(arg, op->long_name) == 0) break;
1866 }
1867 else /* Special case xxx=data */
1868 {
1869 int oplen = equals - op->long_name;
1870 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1871 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1872 {
1873 option_data = arg + arglen;
1874 if (*option_data == '=')
1875 {
1876 option_data++;
1877 longopwasequals = TRUE;
1878 }
1879 break;
1880 }
1881 }
1882 }
1883 else /* Special case xxxx(p) */
1884 {
1885 char buff1[24];
1886 char buff2[24];
1887 int baselen = opbra - op->long_name;
1888 sprintf(buff1, "%.*s", baselen, op->long_name);
1889 sprintf(buff2, "%s%.*s", buff1,
1890 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1891 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1892 break;
1893 }
1894 }
1895
1896 if (op->one_char == 0)
1897 {
1898 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1899 exit(usage(2));
1900 }
1901 }
1902
1903
1904 /* Jeffrey Friedl's debugging harness uses these additional options which
1905 are not in the right form for putting in the option table because they use
1906 only one hyphen, yet are more than one character long. By putting them
1907 separately here, they will not get displayed as part of the help() output,
1908 but I don't think Jeffrey will care about that. */
1909
1910 #ifdef JFRIEDL_DEBUG
1911 else if (strcmp(argv[i], "-pre") == 0) {
1912 jfriedl_prefix = argv[++i];
1913 continue;
1914 } else if (strcmp(argv[i], "-post") == 0) {
1915 jfriedl_postfix = argv[++i];
1916 continue;
1917 } else if (strcmp(argv[i], "-XT") == 0) {
1918 sscanf(argv[++i], "%d", &jfriedl_XT);
1919 continue;
1920 } else if (strcmp(argv[i], "-XR") == 0) {
1921 sscanf(argv[++i], "%d", &jfriedl_XR);
1922 continue;
1923 }
1924 #endif
1925
1926
1927 /* One-char options; many that have no data may be in a single argument; we
1928 continue till we hit the last one or one that needs data. */
1929
1930 else
1931 {
1932 char *s = argv[i] + 1;
1933 longop = FALSE;
1934 while (*s != 0)
1935 {
1936 for (op = optionlist; op->one_char != 0; op++)
1937 { if (*s == op->one_char) break; }
1938 if (op->one_char == 0)
1939 {
1940 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1941 *s, argv[i]);
1942 exit(usage(2));
1943 }
1944 if (op->type != OP_NODATA || s[1] == 0)
1945 {
1946 option_data = s+1;
1947 break;
1948 }
1949 pcre_options = handle_option(*s++, pcre_options);
1950 }
1951 }
1952
1953 /* At this point we should have op pointing to a matched option. If the type
1954 is NO_DATA, it means that there is no data, and the option might set
1955 something in the PCRE options. */
1956
1957 if (op->type == OP_NODATA)
1958 {
1959 pcre_options = handle_option(op->one_char, pcre_options);
1960 continue;
1961 }
1962
1963 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1964 either has a value or defaults to something. It cannot have data in a
1965 separate item. At the moment, the only such options are "colo(u)r" and
1966 Jeffrey Friedl's special -S debugging option. */
1967
1968 if (*option_data == 0 &&
1969 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1970 {
1971 switch (op->one_char)
1972 {
1973 case N_COLOUR:
1974 colour_option = (char *)"auto";
1975 break;
1976 #ifdef JFRIEDL_DEBUG
1977 case 'S':
1978 S_arg = 0;
1979 break;
1980 #endif
1981 }
1982 continue;
1983 }
1984
1985 /* Otherwise, find the data string for the option. */
1986
1987 if (*option_data == 0)
1988 {
1989 if (i >= argc - 1 || longopwasequals)
1990 {
1991 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1992 exit(usage(2));
1993 }
1994 option_data = argv[++i];
1995 }
1996
1997 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1998 multiple times to create a list of patterns. */
1999
2000 if (op->type == OP_PATLIST)
2001 {
2002 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2003 {
2004 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2005 MAX_PATTERN_COUNT);
2006 return 2;
2007 }
2008 patterns[cmd_pattern_count++] = option_data;
2009 }
2010
2011 /* Otherwise, deal with single string or numeric data values. */
2012
2013 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2014 {
2015 *((char **)op->dataptr) = option_data;
2016 }
2017 else
2018 {
2019 char *endptr;
2020 int n = strtoul(option_data, &endptr, 10);
2021 if (*endptr != 0)
2022 {
2023 if (longop)
2024 {
2025 char *equals = strchr(op->long_name, '=');
2026 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2027 equals - op->long_name;
2028 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2029 option_data, nlen, op->long_name);
2030 }
2031 else
2032 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2033 option_data, op->one_char);
2034 exit(usage(2));
2035 }
2036 *((int *)op->dataptr) = n;
2037 }
2038 }
2039
2040 /* Options have been decoded. If -C was used, its value is used as a default
2041 for -A and -B. */
2042
2043 if (both_context > 0)
2044 {
2045 if (after_context == 0) after_context = both_context;
2046 if (before_context == 0) before_context = both_context;
2047 }
2048
2049 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2050 However, the latter two set the only_matching flag. */
2051
2052 if ((only_matching && (file_offsets || line_offsets)) ||
2053 (file_offsets && line_offsets))
2054 {
2055 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2056 "and/or --line-offsets\n");
2057 exit(usage(2));
2058 }
2059
2060 if (file_offsets || line_offsets) only_matching = TRUE;
2061
2062 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2063 LC_ALL environment variable is set, and if so, use it. */
2064
2065 if (locale == NULL)
2066 {
2067 locale = getenv("LC_ALL");
2068 locale_from = "LCC_ALL";
2069 }
2070
2071 if (locale == NULL)
2072 {
2073 locale = getenv("LC_CTYPE");
2074 locale_from = "LC_CTYPE";
2075 }
2076
2077 /* If a locale has been provided, set it, and generate the tables the PCRE
2078 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2079
2080 if (locale != NULL)
2081 {
2082 if (setlocale(LC_CTYPE, locale) == NULL)
2083 {
2084 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2085 locale, locale_from);
2086 return 2;
2087 }
2088 pcretables = pcre_maketables();
2089 }
2090
2091 /* Sort out colouring */
2092
2093 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2094 {
2095 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2096 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2097 else
2098 {
2099 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2100 colour_option);
2101 return 2;
2102 }
2103 if (do_colour)
2104 {
2105 char *cs = getenv("PCREGREP_COLOUR");
2106 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2107 if (cs != NULL) colour_string = cs;
2108 }
2109 }
2110
2111 /* Interpret the newline type; the default settings are Unix-like. */
2112
2113 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2114 {
2115 pcre_options |= PCRE_NEWLINE_CR;
2116 endlinetype = EL_CR;
2117 }
2118 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2119 {
2120 pcre_options |= PCRE_NEWLINE_LF;
2121 endlinetype = EL_LF;
2122 }
2123 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2124 {
2125 pcre_options |= PCRE_NEWLINE_CRLF;
2126 endlinetype = EL_CRLF;
2127 }
2128 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2129 {
2130 pcre_options |= PCRE_NEWLINE_ANY;
2131 endlinetype = EL_ANY;
2132 }
2133 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2134 {
2135 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2136 endlinetype = EL_ANYCRLF;
2137 }
2138 else
2139 {
2140 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2141 return 2;
2142 }
2143
2144 /* Interpret the text values for -d and -D */
2145
2146 if (dee_option != NULL)
2147 {
2148 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2149 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2150 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2151 else
2152 {
2153 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2154 return 2;
2155 }
2156 }
2157
2158 if (DEE_option != NULL)
2159 {
2160 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2161 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2162 else
2163 {
2164 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2165 return 2;
2166 }
2167 }
2168
2169 /* Check the values for Jeffrey Friedl's debugging options. */
2170
2171 #ifdef JFRIEDL_DEBUG
2172 if (S_arg > 9)
2173 {
2174 fprintf(stderr, "pcregrep: bad value for -S option\n");
2175 return 2;
2176 }
2177 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2178 {
2179 if (jfriedl_XT == 0) jfriedl_XT = 1;
2180 if (jfriedl_XR == 0) jfriedl_XR = 1;
2181 }
2182 #endif
2183
2184 /* Get memory to store the pattern and hints lists. */
2185
2186 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2187 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2188
2189 if (pattern_list == NULL || hints_list == NULL)
2190 {
2191 fprintf(stderr, "pcregrep: malloc failed\n");
2192 goto EXIT2;
2193 }
2194
2195 /* If no patterns were provided by -e, and there is no file provided by -f,
2196 the first argument is the one and only pattern, and it must exist. */
2197
2198 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2199 {
2200 if (i >= argc) return usage(2);
2201 patterns[cmd_pattern_count++] = argv[i++];
2202 }
2203
2204 /* Compile the patterns that were provided on the command line, either by
2205 multiple uses of -e or as a single unkeyed pattern. */
2206
2207 for (j = 0; j < cmd_pattern_count; j++)
2208 {
2209 if (!compile_pattern(patterns[j], pcre_options, NULL,
2210 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2211 goto EXIT2;
2212 }
2213
2214 /* Compile the regular expressions that are provided in a file. */
2215
2216 if (pattern_filename != NULL)
2217 {
2218 int linenumber = 0;
2219 FILE *f;
2220 char *filename;
2221 char buffer[MBUFTHIRD];
2222
2223 if (strcmp(pattern_filename, "-") == 0)
2224 {
2225 f = stdin;
2226 filename = stdin_name;
2227 }
2228 else
2229 {
2230 f = fopen(pattern_filename, "r");
2231 if (f == NULL)
2232 {
2233 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2234 strerror(errno));
2235 goto EXIT2;
2236 }
2237 filename = pattern_filename;
2238 }
2239
2240 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2241 {
2242 char *s = buffer + (int)strlen(buffer);
2243 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2244 *s = 0;
2245 linenumber++;
2246 if (buffer[0] == 0) continue; /* Skip blank lines */
2247 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2248 goto EXIT2;
2249 }
2250
2251 if (f != stdin) fclose(f);
2252 }
2253
2254 /* Study the regular expressions, as we will be running them many times */
2255
2256 for (j = 0; j < pattern_count; j++)
2257 {
2258 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2259 if (error != NULL)
2260 {
2261 char s[16];
2262 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2263 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2264 goto EXIT2;
2265 }
2266 hint_count++;
2267 }
2268
2269 /* If there are include or exclude patterns, compile them. */
2270
2271 if (exclude_pattern != NULL)
2272 {
2273 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2274 pcretables);
2275 if (exclude_compiled == NULL)
2276 {
2277 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2278 errptr, error);
2279 goto EXIT2;
2280 }
2281 }
2282
2283 if (include_pattern != NULL)
2284 {
2285 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2286 pcretables);
2287 if (include_compiled == NULL)
2288 {
2289 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2290 errptr, error);
2291 goto EXIT2;
2292 }
2293 }
2294
2295 /* If there are no further arguments, do the business on stdin and exit. */
2296
2297 if (i >= argc)
2298 {
2299 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2300 goto EXIT;
2301 }
2302
2303 /* Otherwise, work through the remaining arguments as files or directories.
2304 Pass in the fact that there is only one argument at top level - this suppresses
2305 the file name if the argument is not a directory and filenames are not
2306 otherwise forced. */
2307
2308 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2309
2310 for (; i < argc; i++)
2311 {
2312 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2313 only_one_at_top);
2314 if (frc > 1) rc = frc;
2315 else if (frc == 0 && rc == 1) rc = 0;
2316 }
2317
2318 EXIT:
2319 if (pattern_list != NULL)
2320 {
2321 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2322 free(pattern_list);
2323 }
2324 if (hints_list != NULL)
2325 {
2326 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2327 free(hints_list);
2328 }
2329 return rc;
2330
2331 EXIT2:
2332 rc = 2;
2333 goto EXIT;
2334 }
2335
2336 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12