/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 345 - (show annotations) (download)
Mon Apr 28 15:10:02 2008 UTC (6 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 67152 byte(s)
Tidies for the 7.7-RC1 distribution.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2008 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74
75 #if BUFSIZ > 8192
76 #define MBUFTHIRD BUFSIZ
77 #else
78 #define MBUFTHIRD 8192
79 #endif
80
81 /* Values for the "filenames" variable, which specifies options for file name
82 output. The order is important; it is assumed that a file name is wanted for
83 all values greater than FN_DEFAULT. */
84
85 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
86
87 /* File reading styles */
88
89 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
90
91 /* Actions for the -d and -D options */
92
93 enum { dee_READ, dee_SKIP, dee_RECURSE };
94 enum { DEE_READ, DEE_SKIP };
95
96 /* Actions for special processing options (flag bits) */
97
98 #define PO_WORD_MATCH 0x0001
99 #define PO_LINE_MATCH 0x0002
100 #define PO_FIXED_STRINGS 0x0004
101
102 /* Line ending types */
103
104 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
105
106
107
108 /*************************************************
109 * Global variables *
110 *************************************************/
111
112 /* Jeffrey Friedl has some debugging requirements that are not part of the
113 regular code. */
114
115 #ifdef JFRIEDL_DEBUG
116 static int S_arg = -1;
117 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
118 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
119 static const char *jfriedl_prefix = "";
120 static const char *jfriedl_postfix = "";
121 #endif
122
123 static int endlinetype;
124
125 static char *colour_string = (char *)"1;31";
126 static char *colour_option = NULL;
127 static char *dee_option = NULL;
128 static char *DEE_option = NULL;
129 static char *newline = NULL;
130 static char *pattern_filename = NULL;
131 static char *stdin_name = (char *)"(standard input)";
132 static char *locale = NULL;
133
134 static const unsigned char *pcretables = NULL;
135
136 static int pattern_count = 0;
137 static pcre **pattern_list = NULL;
138 static pcre_extra **hints_list = NULL;
139
140 static char *include_pattern = NULL;
141 static char *exclude_pattern = NULL;
142 static char *include_dir_pattern = NULL;
143 static char *exclude_dir_pattern = NULL;
144
145 static pcre *include_compiled = NULL;
146 static pcre *exclude_compiled = NULL;
147 static pcre *include_dir_compiled = NULL;
148 static pcre *exclude_dir_compiled = NULL;
149
150 static int after_context = 0;
151 static int before_context = 0;
152 static int both_context = 0;
153 static int dee_action = dee_READ;
154 static int DEE_action = DEE_READ;
155 static int error_count = 0;
156 static int filenames = FN_DEFAULT;
157 static int process_options = 0;
158
159 static BOOL count_only = FALSE;
160 static BOOL do_colour = FALSE;
161 static BOOL file_offsets = FALSE;
162 static BOOL hyphenpending = FALSE;
163 static BOOL invert = FALSE;
164 static BOOL line_offsets = FALSE;
165 static BOOL multiline = FALSE;
166 static BOOL number = FALSE;
167 static BOOL only_matching = FALSE;
168 static BOOL quiet = FALSE;
169 static BOOL silent = FALSE;
170 static BOOL utf8 = FALSE;
171
172 /* Structure for options and list of them */
173
174 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
175 OP_PATLIST };
176
177 typedef struct option_item {
178 int type;
179 int one_char;
180 void *dataptr;
181 const char *long_name;
182 const char *help_text;
183 } option_item;
184
185 /* Options without a single-letter equivalent get a negative value. This can be
186 used to identify them. */
187
188 #define N_COLOUR (-1)
189 #define N_EXCLUDE (-2)
190 #define N_EXCLUDE_DIR (-3)
191 #define N_HELP (-4)
192 #define N_INCLUDE (-5)
193 #define N_INCLUDE_DIR (-6)
194 #define N_LABEL (-7)
195 #define N_LOCALE (-8)
196 #define N_NULL (-9)
197 #define N_LOFFSETS (-10)
198 #define N_FOFFSETS (-11)
199
200 static option_item optionlist[] = {
201 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
202 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
203 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
204 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
205 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
206 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
207 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
208 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
209 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
210 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
211 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
212 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
213 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
214 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
215 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
216 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
217 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
218 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
219 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
220 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
221 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
222 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
223 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
224 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
225 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
226 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
227 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
228 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
229 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
230 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
231 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
232 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
233 #ifdef JFRIEDL_DEBUG
234 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
235 #endif
236 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
237 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
238 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
239 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
240 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
241 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
242 { OP_NODATA, 0, NULL, NULL, NULL }
243 };
244
245 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
246 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
247 that the combination of -w and -x has the same effect as -x on its own, so we
248 can treat them as the same. */
249
250 static const char *prefix[] = {
251 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
252
253 static const char *suffix[] = {
254 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
255
256 /* UTF-8 tables - used only when the newline setting is "any". */
257
258 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
259
260 const char utf8_table4[] = {
261 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
262 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
264 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
265
266
267
268 /*************************************************
269 * OS-specific functions *
270 *************************************************/
271
272 /* These functions are defined so that they can be made system specific,
273 although at present the only ones are for Unix, Win32, and for "no support". */
274
275
276 /************* Directory scanning in Unix ***********/
277
278 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
279 #include <sys/types.h>
280 #include <sys/stat.h>
281 #include <dirent.h>
282
283 typedef DIR directory_type;
284
285 static int
286 isdirectory(char *filename)
287 {
288 struct stat statbuf;
289 if (stat(filename, &statbuf) < 0)
290 return 0; /* In the expectation that opening as a file will fail */
291 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
292 }
293
294 static directory_type *
295 opendirectory(char *filename)
296 {
297 return opendir(filename);
298 }
299
300 static char *
301 readdirectory(directory_type *dir)
302 {
303 for (;;)
304 {
305 struct dirent *dent = readdir(dir);
306 if (dent == NULL) return NULL;
307 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
308 return dent->d_name;
309 }
310 /* Control never reaches here */
311 }
312
313 static void
314 closedirectory(directory_type *dir)
315 {
316 closedir(dir);
317 }
318
319
320 /************* Test for regular file in Unix **********/
321
322 static int
323 isregfile(char *filename)
324 {
325 struct stat statbuf;
326 if (stat(filename, &statbuf) < 0)
327 return 1; /* In the expectation that opening as a file will fail */
328 return (statbuf.st_mode & S_IFMT) == S_IFREG;
329 }
330
331
332 /************* Test stdout for being a terminal in Unix **********/
333
334 static BOOL
335 is_stdout_tty(void)
336 {
337 return isatty(fileno(stdout));
338 }
339
340
341 /************* Directory scanning in Win32 ***********/
342
343 /* I (Philip Hazel) have no means of testing this code. It was contributed by
344 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
345 when it did not exist. David Byron added a patch that moved the #include of
346 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
347 */
348
349 #elif HAVE_WINDOWS_H
350
351 #ifndef STRICT
352 # define STRICT
353 #endif
354 #ifndef WIN32_LEAN_AND_MEAN
355 # define WIN32_LEAN_AND_MEAN
356 #endif
357
358 #include <windows.h>
359
360 #ifndef INVALID_FILE_ATTRIBUTES
361 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
362 #endif
363
364 typedef struct directory_type
365 {
366 HANDLE handle;
367 BOOL first;
368 WIN32_FIND_DATA data;
369 } directory_type;
370
371 int
372 isdirectory(char *filename)
373 {
374 DWORD attr = GetFileAttributes(filename);
375 if (attr == INVALID_FILE_ATTRIBUTES)
376 return 0;
377 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
378 }
379
380 directory_type *
381 opendirectory(char *filename)
382 {
383 size_t len;
384 char *pattern;
385 directory_type *dir;
386 DWORD err;
387 len = strlen(filename);
388 pattern = (char *) malloc(len + 3);
389 dir = (directory_type *) malloc(sizeof(*dir));
390 if ((pattern == NULL) || (dir == NULL))
391 {
392 fprintf(stderr, "pcregrep: malloc failed\n");
393 exit(2);
394 }
395 memcpy(pattern, filename, len);
396 memcpy(&(pattern[len]), "\\*", 3);
397 dir->handle = FindFirstFile(pattern, &(dir->data));
398 if (dir->handle != INVALID_HANDLE_VALUE)
399 {
400 free(pattern);
401 dir->first = TRUE;
402 return dir;
403 }
404 err = GetLastError();
405 free(pattern);
406 free(dir);
407 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
408 return NULL;
409 }
410
411 char *
412 readdirectory(directory_type *dir)
413 {
414 for (;;)
415 {
416 if (!dir->first)
417 {
418 if (!FindNextFile(dir->handle, &(dir->data)))
419 return NULL;
420 }
421 else
422 {
423 dir->first = FALSE;
424 }
425 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
426 return dir->data.cFileName;
427 }
428 #ifndef _MSC_VER
429 return NULL; /* Keep compiler happy; never executed */
430 #endif
431 }
432
433 void
434 closedirectory(directory_type *dir)
435 {
436 FindClose(dir->handle);
437 free(dir);
438 }
439
440
441 /************* Test for regular file in Win32 **********/
442
443 /* I don't know how to do this, or if it can be done; assume all paths are
444 regular if they are not directories. */
445
446 int isregfile(char *filename)
447 {
448 return !isdirectory(filename);
449 }
450
451
452 /************* Test stdout for being a terminal in Win32 **********/
453
454 /* I don't know how to do this; assume never */
455
456 static BOOL
457 is_stdout_tty(void)
458 {
459 return FALSE;
460 }
461
462
463 /************* Directory scanning when we can't do it ***********/
464
465 /* The type is void, and apart from isdirectory(), the functions do nothing. */
466
467 #else
468
469 typedef void directory_type;
470
471 int isdirectory(char *filename) { return 0; }
472 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
473 char *readdirectory(directory_type *dir) { return (char*)0;}
474 void closedirectory(directory_type *dir) {}
475
476
477 /************* Test for regular when we can't do it **********/
478
479 /* Assume all files are regular. */
480
481 int isregfile(char *filename) { return 1; }
482
483
484 /************* Test stdout for being a terminal when we can't do it **********/
485
486 static BOOL
487 is_stdout_tty(void)
488 {
489 return FALSE;
490 }
491
492
493 #endif
494
495
496
497 #ifndef HAVE_STRERROR
498 /*************************************************
499 * Provide strerror() for non-ANSI libraries *
500 *************************************************/
501
502 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
503 in their libraries, but can provide the same facility by this simple
504 alternative function. */
505
506 extern int sys_nerr;
507 extern char *sys_errlist[];
508
509 char *
510 strerror(int n)
511 {
512 if (n < 0 || n >= sys_nerr) return "unknown error number";
513 return sys_errlist[n];
514 }
515 #endif /* HAVE_STRERROR */
516
517
518
519 /*************************************************
520 * Find end of line *
521 *************************************************/
522
523 /* The length of the endline sequence that is found is set via lenptr. This may
524 be zero at the very end of the file if there is no line-ending sequence there.
525
526 Arguments:
527 p current position in line
528 endptr end of available data
529 lenptr where to put the length of the eol sequence
530
531 Returns: pointer to the last byte of the line
532 */
533
534 static char *
535 end_of_line(char *p, char *endptr, int *lenptr)
536 {
537 switch(endlinetype)
538 {
539 default: /* Just in case */
540 case EL_LF:
541 while (p < endptr && *p != '\n') p++;
542 if (p < endptr)
543 {
544 *lenptr = 1;
545 return p + 1;
546 }
547 *lenptr = 0;
548 return endptr;
549
550 case EL_CR:
551 while (p < endptr && *p != '\r') p++;
552 if (p < endptr)
553 {
554 *lenptr = 1;
555 return p + 1;
556 }
557 *lenptr = 0;
558 return endptr;
559
560 case EL_CRLF:
561 for (;;)
562 {
563 while (p < endptr && *p != '\r') p++;
564 if (++p >= endptr)
565 {
566 *lenptr = 0;
567 return endptr;
568 }
569 if (*p == '\n')
570 {
571 *lenptr = 2;
572 return p + 1;
573 }
574 }
575 break;
576
577 case EL_ANYCRLF:
578 while (p < endptr)
579 {
580 int extra = 0;
581 register int c = *((unsigned char *)p);
582
583 if (utf8 && c >= 0xc0)
584 {
585 int gcii, gcss;
586 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
587 gcss = 6*extra;
588 c = (c & utf8_table3[extra]) << gcss;
589 for (gcii = 1; gcii <= extra; gcii++)
590 {
591 gcss -= 6;
592 c |= (p[gcii] & 0x3f) << gcss;
593 }
594 }
595
596 p += 1 + extra;
597
598 switch (c)
599 {
600 case 0x0a: /* LF */
601 *lenptr = 1;
602 return p;
603
604 case 0x0d: /* CR */
605 if (p < endptr && *p == 0x0a)
606 {
607 *lenptr = 2;
608 p++;
609 }
610 else *lenptr = 1;
611 return p;
612
613 default:
614 break;
615 }
616 } /* End of loop for ANYCRLF case */
617
618 *lenptr = 0; /* Must have hit the end */
619 return endptr;
620
621 case EL_ANY:
622 while (p < endptr)
623 {
624 int extra = 0;
625 register int c = *((unsigned char *)p);
626
627 if (utf8 && c >= 0xc0)
628 {
629 int gcii, gcss;
630 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
631 gcss = 6*extra;
632 c = (c & utf8_table3[extra]) << gcss;
633 for (gcii = 1; gcii <= extra; gcii++)
634 {
635 gcss -= 6;
636 c |= (p[gcii] & 0x3f) << gcss;
637 }
638 }
639
640 p += 1 + extra;
641
642 switch (c)
643 {
644 case 0x0a: /* LF */
645 case 0x0b: /* VT */
646 case 0x0c: /* FF */
647 *lenptr = 1;
648 return p;
649
650 case 0x0d: /* CR */
651 if (p < endptr && *p == 0x0a)
652 {
653 *lenptr = 2;
654 p++;
655 }
656 else *lenptr = 1;
657 return p;
658
659 case 0x85: /* NEL */
660 *lenptr = utf8? 2 : 1;
661 return p;
662
663 case 0x2028: /* LS */
664 case 0x2029: /* PS */
665 *lenptr = 3;
666 return p;
667
668 default:
669 break;
670 }
671 } /* End of loop for ANY case */
672
673 *lenptr = 0; /* Must have hit the end */
674 return endptr;
675 } /* End of overall switch */
676 }
677
678
679
680 /*************************************************
681 * Find start of previous line *
682 *************************************************/
683
684 /* This is called when looking back for before lines to print.
685
686 Arguments:
687 p start of the subsequent line
688 startptr start of available data
689
690 Returns: pointer to the start of the previous line
691 */
692
693 static char *
694 previous_line(char *p, char *startptr)
695 {
696 switch(endlinetype)
697 {
698 default: /* Just in case */
699 case EL_LF:
700 p--;
701 while (p > startptr && p[-1] != '\n') p--;
702 return p;
703
704 case EL_CR:
705 p--;
706 while (p > startptr && p[-1] != '\n') p--;
707 return p;
708
709 case EL_CRLF:
710 for (;;)
711 {
712 p -= 2;
713 while (p > startptr && p[-1] != '\n') p--;
714 if (p <= startptr + 1 || p[-2] == '\r') return p;
715 }
716 return p; /* But control should never get here */
717
718 case EL_ANY:
719 case EL_ANYCRLF:
720 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
721 if (utf8) while ((*p & 0xc0) == 0x80) p--;
722
723 while (p > startptr)
724 {
725 register int c;
726 char *pp = p - 1;
727
728 if (utf8)
729 {
730 int extra = 0;
731 while ((*pp & 0xc0) == 0x80) pp--;
732 c = *((unsigned char *)pp);
733 if (c >= 0xc0)
734 {
735 int gcii, gcss;
736 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
737 gcss = 6*extra;
738 c = (c & utf8_table3[extra]) << gcss;
739 for (gcii = 1; gcii <= extra; gcii++)
740 {
741 gcss -= 6;
742 c |= (pp[gcii] & 0x3f) << gcss;
743 }
744 }
745 }
746 else c = *((unsigned char *)pp);
747
748 if (endlinetype == EL_ANYCRLF) switch (c)
749 {
750 case 0x0a: /* LF */
751 case 0x0d: /* CR */
752 return p;
753
754 default:
755 break;
756 }
757
758 else switch (c)
759 {
760 case 0x0a: /* LF */
761 case 0x0b: /* VT */
762 case 0x0c: /* FF */
763 case 0x0d: /* CR */
764 case 0x85: /* NEL */
765 case 0x2028: /* LS */
766 case 0x2029: /* PS */
767 return p;
768
769 default:
770 break;
771 }
772
773 p = pp; /* Back one character */
774 } /* End of loop for ANY case */
775
776 return startptr; /* Hit start of data */
777 } /* End of overall switch */
778 }
779
780
781
782
783
784 /*************************************************
785 * Print the previous "after" lines *
786 *************************************************/
787
788 /* This is called if we are about to lose said lines because of buffer filling,
789 and at the end of the file. The data in the line is written using fwrite() so
790 that a binary zero does not terminate it.
791
792 Arguments:
793 lastmatchnumber the number of the last matching line, plus one
794 lastmatchrestart where we restarted after the last match
795 endptr end of available data
796 printname filename for printing
797
798 Returns: nothing
799 */
800
801 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
802 char *endptr, char *printname)
803 {
804 if (after_context > 0 && lastmatchnumber > 0)
805 {
806 int count = 0;
807 while (lastmatchrestart < endptr && count++ < after_context)
808 {
809 int ellength;
810 char *pp = lastmatchrestart;
811 if (printname != NULL) fprintf(stdout, "%s-", printname);
812 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
813 pp = end_of_line(pp, endptr, &ellength);
814 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
815 lastmatchrestart = pp;
816 }
817 hyphenpending = TRUE;
818 }
819 }
820
821
822
823 /*************************************************
824 * Grep an individual file *
825 *************************************************/
826
827 /* This is called from grep_or_recurse() below. It uses a buffer that is three
828 times the value of MBUFTHIRD. The matching point is never allowed to stray into
829 the top third of the buffer, thus keeping more of the file available for
830 context printing or for multiline scanning. For large files, the pointer will
831 be in the middle third most of the time, so the bottom third is available for
832 "before" context printing.
833
834 Arguments:
835 handle the fopened FILE stream for a normal file
836 the gzFile pointer when reading is via libz
837 the BZFILE pointer when reading is via libbz2
838 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
839 printname the file name if it is to be printed for each match
840 or NULL if the file name is not to be printed
841 it cannot be NULL if filenames[_nomatch]_only is set
842
843 Returns: 0 if there was at least one match
844 1 otherwise (no matches)
845 2 if there is a read error on a .bz2 file
846 */
847
848 static int
849 pcregrep(void *handle, int frtype, char *printname)
850 {
851 int rc = 1;
852 int linenumber = 1;
853 int lastmatchnumber = 0;
854 int count = 0;
855 int filepos = 0;
856 int offsets[99];
857 char *lastmatchrestart = NULL;
858 char buffer[3*MBUFTHIRD];
859 char *ptr = buffer;
860 char *endptr;
861 size_t bufflength;
862 BOOL endhyphenpending = FALSE;
863 FILE *in = NULL; /* Ensure initialized */
864
865 #ifdef SUPPORT_LIBZ
866 gzFile ingz = NULL;
867 #endif
868
869 #ifdef SUPPORT_LIBBZ2
870 BZFILE *inbz2 = NULL;
871 #endif
872
873
874 /* Do the first read into the start of the buffer and set up the pointer to end
875 of what we have. In the case of libz, a non-zipped .gz file will be read as a
876 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
877 fail. */
878
879 #ifdef SUPPORT_LIBZ
880 if (frtype == FR_LIBZ)
881 {
882 ingz = (gzFile)handle;
883 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
884 }
885 else
886 #endif
887
888 #ifdef SUPPORT_LIBBZ2
889 if (frtype == FR_LIBBZ2)
890 {
891 inbz2 = (BZFILE *)handle;
892 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
893 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
894 } /* without the cast it is unsigned. */
895 else
896 #endif
897
898 {
899 in = (FILE *)handle;
900 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
901 }
902
903 endptr = buffer + bufflength;
904
905 /* Loop while the current pointer is not at the end of the file. For large
906 files, endptr will be at the end of the buffer when we are in the middle of the
907 file, but ptr will never get there, because as soon as it gets over 2/3 of the
908 way, the buffer is shifted left and re-filled. */
909
910 while (ptr < endptr)
911 {
912 int i, endlinelength;
913 int mrc = 0;
914 BOOL match = FALSE;
915 char *matchptr = ptr;
916 char *t = ptr;
917 size_t length, linelength;
918
919 /* At this point, ptr is at the start of a line. We need to find the length
920 of the subject string to pass to pcre_exec(). In multiline mode, it is the
921 length remainder of the data in the buffer. Otherwise, it is the length of
922 the next line. After matching, we always advance by the length of the next
923 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
924 that any match is constrained to be in the first line. */
925
926 t = end_of_line(t, endptr, &endlinelength);
927 linelength = t - ptr - endlinelength;
928 length = multiline? (size_t)(endptr - ptr) : linelength;
929
930 /* Extra processing for Jeffrey Friedl's debugging. */
931
932 #ifdef JFRIEDL_DEBUG
933 if (jfriedl_XT || jfriedl_XR)
934 {
935 #include <sys/time.h>
936 #include <time.h>
937 struct timeval start_time, end_time;
938 struct timezone dummy;
939
940 if (jfriedl_XT)
941 {
942 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
943 const char *orig = ptr;
944 ptr = malloc(newlen + 1);
945 if (!ptr) {
946 printf("out of memory");
947 exit(2);
948 }
949 endptr = ptr;
950 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
951 for (i = 0; i < jfriedl_XT; i++) {
952 strncpy(endptr, orig, length);
953 endptr += length;
954 }
955 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
956 length = newlen;
957 }
958
959 if (gettimeofday(&start_time, &dummy) != 0)
960 perror("bad gettimeofday");
961
962
963 for (i = 0; i < jfriedl_XR; i++)
964 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
965
966 if (gettimeofday(&end_time, &dummy) != 0)
967 perror("bad gettimeofday");
968
969 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
970 -
971 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
972
973 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
974 return 0;
975 }
976 #endif
977
978 /* We come back here after a match when the -o option (only_matching) is set,
979 in order to find any further matches in the same line. */
980
981 ONLY_MATCHING_RESTART:
982
983 /* Run through all the patterns until one matches. Note that we don't include
984 the final newline in the subject string. */
985
986 for (i = 0; i < pattern_count; i++)
987 {
988 mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
989 offsets, 99);
990 if (mrc >= 0) { match = TRUE; break; }
991 if (mrc != PCRE_ERROR_NOMATCH)
992 {
993 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
994 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
995 fprintf(stderr, "this line:\n");
996 fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
997 fprintf(stderr, "\n");
998 if (error_count == 0 &&
999 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
1000 {
1001 fprintf(stderr, "pcregrep: error %d means that a resource limit "
1002 "was exceeded\n", mrc);
1003 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
1004 }
1005 if (error_count++ > 20)
1006 {
1007 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
1008 exit(2);
1009 }
1010 match = invert; /* No more matching; don't show the line again */
1011 break;
1012 }
1013 }
1014
1015 /* If it's a match or a not-match (as required), do what's wanted. */
1016
1017 if (match != invert)
1018 {
1019 BOOL hyphenprinted = FALSE;
1020
1021 /* We've failed if we want a file that doesn't have any matches. */
1022
1023 if (filenames == FN_NOMATCH_ONLY) return 1;
1024
1025 /* Just count if just counting is wanted. */
1026
1027 if (count_only) count++;
1028
1029 /* If all we want is a file name, there is no need to scan any more lines
1030 in the file. */
1031
1032 else if (filenames == FN_ONLY)
1033 {
1034 fprintf(stdout, "%s\n", printname);
1035 return 0;
1036 }
1037
1038 /* Likewise, if all we want is a yes/no answer. */
1039
1040 else if (quiet) return 0;
1041
1042 /* The --only-matching option prints just the substring that matched, and
1043 the --file-offsets and --line-offsets options output offsets for the
1044 matching substring (they both force --only-matching). None of these options
1045 prints any context. Afterwards, adjust the start and length, and then jump
1046 back to look for further matches in the same line. If we are in invert
1047 mode, however, nothing is printed - this could be still useful because the
1048 return code is set. */
1049
1050 else if (only_matching)
1051 {
1052 if (!invert)
1053 {
1054 if (printname != NULL) fprintf(stdout, "%s:", printname);
1055 if (number) fprintf(stdout, "%d:", linenumber);
1056 if (line_offsets)
1057 fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
1058 offsets[1] - offsets[0]);
1059 else if (file_offsets)
1060 fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1061 offsets[1] - offsets[0]);
1062 else
1063 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1064 fprintf(stdout, "\n");
1065 matchptr += offsets[1];
1066 length -= offsets[1];
1067 match = FALSE;
1068 goto ONLY_MATCHING_RESTART;
1069 }
1070 }
1071
1072 /* This is the default case when none of the above options is set. We print
1073 the matching lines(s), possibly preceded and/or followed by other lines of
1074 context. */
1075
1076 else
1077 {
1078 /* See if there is a requirement to print some "after" lines from a
1079 previous match. We never print any overlaps. */
1080
1081 if (after_context > 0 && lastmatchnumber > 0)
1082 {
1083 int ellength;
1084 int linecount = 0;
1085 char *p = lastmatchrestart;
1086
1087 while (p < ptr && linecount < after_context)
1088 {
1089 p = end_of_line(p, ptr, &ellength);
1090 linecount++;
1091 }
1092
1093 /* It is important to advance lastmatchrestart during this printing so
1094 that it interacts correctly with any "before" printing below. Print
1095 each line's data using fwrite() in case there are binary zeroes. */
1096
1097 while (lastmatchrestart < p)
1098 {
1099 char *pp = lastmatchrestart;
1100 if (printname != NULL) fprintf(stdout, "%s-", printname);
1101 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1102 pp = end_of_line(pp, endptr, &ellength);
1103 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1104 lastmatchrestart = pp;
1105 }
1106 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1107 }
1108
1109 /* If there were non-contiguous lines printed above, insert hyphens. */
1110
1111 if (hyphenpending)
1112 {
1113 fprintf(stdout, "--\n");
1114 hyphenpending = FALSE;
1115 hyphenprinted = TRUE;
1116 }
1117
1118 /* See if there is a requirement to print some "before" lines for this
1119 match. Again, don't print overlaps. */
1120
1121 if (before_context > 0)
1122 {
1123 int linecount = 0;
1124 char *p = ptr;
1125
1126 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1127 linecount < before_context)
1128 {
1129 linecount++;
1130 p = previous_line(p, buffer);
1131 }
1132
1133 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1134 fprintf(stdout, "--\n");
1135
1136 while (p < ptr)
1137 {
1138 int ellength;
1139 char *pp = p;
1140 if (printname != NULL) fprintf(stdout, "%s-", printname);
1141 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1142 pp = end_of_line(pp, endptr, &ellength);
1143 fwrite(p, 1, pp - p, stdout);
1144 p = pp;
1145 }
1146 }
1147
1148 /* Now print the matching line(s); ensure we set hyphenpending at the end
1149 of the file if any context lines are being output. */
1150
1151 if (after_context > 0 || before_context > 0)
1152 endhyphenpending = TRUE;
1153
1154 if (printname != NULL) fprintf(stdout, "%s:", printname);
1155 if (number) fprintf(stdout, "%d:", linenumber);
1156
1157 /* In multiline mode, we want to print to the end of the line in which
1158 the end of the matched string is found, so we adjust linelength and the
1159 line number appropriately, but only when there actually was a match
1160 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1161 the match will always be before the first newline sequence. */
1162
1163 if (multiline)
1164 {
1165 int ellength;
1166 char *endmatch = ptr;
1167 if (!invert)
1168 {
1169 endmatch += offsets[1];
1170 t = ptr;
1171 while (t < endmatch)
1172 {
1173 t = end_of_line(t, endptr, &ellength);
1174 if (t <= endmatch) linenumber++; else break;
1175 }
1176 }
1177 endmatch = end_of_line(endmatch, endptr, &ellength);
1178 linelength = endmatch - ptr - ellength;
1179 }
1180
1181 /*** NOTE: Use only fwrite() to output the data line, so that binary
1182 zeroes are treated as just another data character. */
1183
1184 /* This extra option, for Jeffrey Friedl's debugging requirements,
1185 replaces the matched string, or a specific captured string if it exists,
1186 with X. When this happens, colouring is ignored. */
1187
1188 #ifdef JFRIEDL_DEBUG
1189 if (S_arg >= 0 && S_arg < mrc)
1190 {
1191 int first = S_arg * 2;
1192 int last = first + 1;
1193 fwrite(ptr, 1, offsets[first], stdout);
1194 fprintf(stdout, "X");
1195 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1196 }
1197 else
1198 #endif
1199
1200 /* We have to split the line(s) up if colouring. */
1201
1202 if (do_colour)
1203 {
1204 fwrite(ptr, 1, offsets[0], stdout);
1205 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1206 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1207 fprintf(stdout, "%c[00m", 0x1b);
1208 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1209 stdout);
1210 }
1211 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1212 }
1213
1214 /* End of doing what has to be done for a match */
1215
1216 rc = 0; /* Had some success */
1217
1218 /* Remember where the last match happened for after_context. We remember
1219 where we are about to restart, and that line's number. */
1220
1221 lastmatchrestart = ptr + linelength + endlinelength;
1222 lastmatchnumber = linenumber + 1;
1223 }
1224
1225 /* For a match in multiline inverted mode (which of course did not cause
1226 anything to be printed), we have to move on to the end of the match before
1227 proceeding. */
1228
1229 if (multiline && invert && match)
1230 {
1231 int ellength;
1232 char *endmatch = ptr + offsets[1];
1233 t = ptr;
1234 while (t < endmatch)
1235 {
1236 t = end_of_line(t, endptr, &ellength);
1237 if (t <= endmatch) linenumber++; else break;
1238 }
1239 endmatch = end_of_line(endmatch, endptr, &ellength);
1240 linelength = endmatch - ptr - ellength;
1241 }
1242
1243 /* Advance to after the newline and increment the line number. The file
1244 offset to the current line is maintained in filepos. */
1245
1246 ptr += linelength + endlinelength;
1247 filepos += linelength + endlinelength;
1248 linenumber++;
1249
1250 /* If we haven't yet reached the end of the file (the buffer is full), and
1251 the current point is in the top 1/3 of the buffer, slide the buffer down by
1252 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1253 about to be lost, print them. */
1254
1255 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1256 {
1257 if (after_context > 0 &&
1258 lastmatchnumber > 0 &&
1259 lastmatchrestart < buffer + MBUFTHIRD)
1260 {
1261 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1262 lastmatchnumber = 0;
1263 }
1264
1265 /* Now do the shuffle */
1266
1267 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1268 ptr -= MBUFTHIRD;
1269
1270 #ifdef SUPPORT_LIBZ
1271 if (frtype == FR_LIBZ)
1272 bufflength = 2*MBUFTHIRD +
1273 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1274 else
1275 #endif
1276
1277 #ifdef SUPPORT_LIBBZ2
1278 if (frtype == FR_LIBBZ2)
1279 bufflength = 2*MBUFTHIRD +
1280 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1281 else
1282 #endif
1283
1284 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1285
1286 endptr = buffer + bufflength;
1287
1288 /* Adjust any last match point */
1289
1290 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1291 }
1292 } /* Loop through the whole file */
1293
1294 /* End of file; print final "after" lines if wanted; do_after_lines sets
1295 hyphenpending if it prints something. */
1296
1297 if (!only_matching && !count_only)
1298 {
1299 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1300 hyphenpending |= endhyphenpending;
1301 }
1302
1303 /* Print the file name if we are looking for those without matches and there
1304 were none. If we found a match, we won't have got this far. */
1305
1306 if (filenames == FN_NOMATCH_ONLY)
1307 {
1308 fprintf(stdout, "%s\n", printname);
1309 return 0;
1310 }
1311
1312 /* Print the match count if wanted */
1313
1314 if (count_only)
1315 {
1316 if (printname != NULL) fprintf(stdout, "%s:", printname);
1317 fprintf(stdout, "%d\n", count);
1318 }
1319
1320 return rc;
1321 }
1322
1323
1324
1325 /*************************************************
1326 * Grep a file or recurse into a directory *
1327 *************************************************/
1328
1329 /* Given a path name, if it's a directory, scan all the files if we are
1330 recursing; if it's a file, grep it.
1331
1332 Arguments:
1333 pathname the path to investigate
1334 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1335 only_one_at_top TRUE if the path is the only one at toplevel
1336
1337 Returns: 0 if there was at least one match
1338 1 if there were no matches
1339 2 there was some kind of error
1340
1341 However, file opening failures are suppressed if "silent" is set.
1342 */
1343
1344 static int
1345 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1346 {
1347 int rc = 1;
1348 int sep;
1349 int frtype;
1350 int pathlen;
1351 void *handle;
1352 FILE *in = NULL; /* Ensure initialized */
1353
1354 #ifdef SUPPORT_LIBZ
1355 gzFile ingz = NULL;
1356 #endif
1357
1358 #ifdef SUPPORT_LIBBZ2
1359 BZFILE *inbz2 = NULL;
1360 #endif
1361
1362 /* If the file name is "-" we scan stdin */
1363
1364 if (strcmp(pathname, "-") == 0)
1365 {
1366 return pcregrep(stdin, FR_PLAIN,
1367 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1368 stdin_name : NULL);
1369 }
1370
1371 /* If the file is a directory, skip if skipping or if we are recursing, scan
1372 each file and directory within it, subject to any include or exclude patterns
1373 that were set. The scanning code is localized so it can be made
1374 system-specific. */
1375
1376 if ((sep = isdirectory(pathname)) != 0)
1377 {
1378 if (dee_action == dee_SKIP) return 1;
1379 if (dee_action == dee_RECURSE)
1380 {
1381 char buffer[1024];
1382 char *nextfile;
1383 directory_type *dir = opendirectory(pathname);
1384
1385 if (dir == NULL)
1386 {
1387 if (!silent)
1388 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1389 strerror(errno));
1390 return 2;
1391 }
1392
1393 while ((nextfile = readdirectory(dir)) != NULL)
1394 {
1395 int frc, nflen;
1396 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1397 nflen = strlen(nextfile);
1398
1399 if (isdirectory(buffer))
1400 {
1401 if (exclude_dir_compiled != NULL &&
1402 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1403 continue;
1404
1405 if (include_dir_compiled != NULL &&
1406 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1407 continue;
1408 }
1409 else
1410 {
1411 if (exclude_compiled != NULL &&
1412 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1413 continue;
1414
1415 if (include_compiled != NULL &&
1416 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1417 continue;
1418 }
1419
1420 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1421 if (frc > 1) rc = frc;
1422 else if (frc == 0 && rc == 1) rc = 0;
1423 }
1424
1425 closedirectory(dir);
1426 return rc;
1427 }
1428 }
1429
1430 /* If the file is not a directory and not a regular file, skip it if that's
1431 been requested. */
1432
1433 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1434
1435 /* Control reaches here if we have a regular file, or if we have a directory
1436 and recursion or skipping was not requested, or if we have anything else and
1437 skipping was not requested. The scan proceeds. If this is the first and only
1438 argument at top level, we don't show the file name, unless we are only showing
1439 the file name, or the filename was forced (-H). */
1440
1441 pathlen = strlen(pathname);
1442
1443 /* Open using zlib if it is supported and the file name ends with .gz. */
1444
1445 #ifdef SUPPORT_LIBZ
1446 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1447 {
1448 ingz = gzopen(pathname, "rb");
1449 if (ingz == NULL)
1450 {
1451 if (!silent)
1452 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1453 strerror(errno));
1454 return 2;
1455 }
1456 handle = (void *)ingz;
1457 frtype = FR_LIBZ;
1458 }
1459 else
1460 #endif
1461
1462 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1463
1464 #ifdef SUPPORT_LIBBZ2
1465 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1466 {
1467 inbz2 = BZ2_bzopen(pathname, "rb");
1468 handle = (void *)inbz2;
1469 frtype = FR_LIBBZ2;
1470 }
1471 else
1472 #endif
1473
1474 /* Otherwise use plain fopen(). The label is so that we can come back here if
1475 an attempt to read a .bz2 file indicates that it really is a plain file. */
1476
1477 #ifdef SUPPORT_LIBBZ2
1478 PLAIN_FILE:
1479 #endif
1480 {
1481 in = fopen(pathname, "r");
1482 handle = (void *)in;
1483 frtype = FR_PLAIN;
1484 }
1485
1486 /* All the opening methods return errno when they fail. */
1487
1488 if (handle == NULL)
1489 {
1490 if (!silent)
1491 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1492 strerror(errno));
1493 return 2;
1494 }
1495
1496 /* Now grep the file */
1497
1498 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1499 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1500
1501 /* Close in an appropriate manner. */
1502
1503 #ifdef SUPPORT_LIBZ
1504 if (frtype == FR_LIBZ)
1505 gzclose(ingz);
1506 else
1507 #endif
1508
1509 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1510 read failed. If the error indicates that the file isn't in fact bzipped, try
1511 again as a normal file. */
1512
1513 #ifdef SUPPORT_LIBBZ2
1514 if (frtype == FR_LIBBZ2)
1515 {
1516 if (rc == 2)
1517 {
1518 int errnum;
1519 const char *err = BZ2_bzerror(inbz2, &errnum);
1520 if (errnum == BZ_DATA_ERROR_MAGIC)
1521 {
1522 BZ2_bzclose(inbz2);
1523 goto PLAIN_FILE;
1524 }
1525 else if (!silent)
1526 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1527 pathname, err);
1528 }
1529 BZ2_bzclose(inbz2);
1530 }
1531 else
1532 #endif
1533
1534 /* Normal file close */
1535
1536 fclose(in);
1537
1538 /* Pass back the yield from pcregrep(). */
1539
1540 return rc;
1541 }
1542
1543
1544
1545
1546 /*************************************************
1547 * Usage function *
1548 *************************************************/
1549
1550 static int
1551 usage(int rc)
1552 {
1553 option_item *op;
1554 fprintf(stderr, "Usage: pcregrep [-");
1555 for (op = optionlist; op->one_char != 0; op++)
1556 {
1557 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1558 }
1559 fprintf(stderr, "] [long options] [pattern] [files]\n");
1560 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1561 "options.\n");
1562 return rc;
1563 }
1564
1565
1566
1567
1568 /*************************************************
1569 * Help function *
1570 *************************************************/
1571
1572 static void
1573 help(void)
1574 {
1575 option_item *op;
1576
1577 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1578 printf("Search for PATTERN in each FILE or standard input.\n");
1579 printf("PATTERN must be present if neither -e nor -f is used.\n");
1580 printf("\"-\" can be used as a file name to mean STDIN.\n");
1581
1582 #ifdef SUPPORT_LIBZ
1583 printf("Files whose names end in .gz are read using zlib.\n");
1584 #endif
1585
1586 #ifdef SUPPORT_LIBBZ2
1587 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1588 #endif
1589
1590 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1591 printf("Other files and the standard input are read as plain files.\n\n");
1592 #else
1593 printf("All files are read as plain files, without any interpretation.\n\n");
1594 #endif
1595
1596 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1597 printf("Options:\n");
1598
1599 for (op = optionlist; op->one_char != 0; op++)
1600 {
1601 int n;
1602 char s[4];
1603 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1604 n = 30 - printf(" %s --%s", s, op->long_name);
1605 if (n < 1) n = 1;
1606 printf("%.*s%s\n", n, " ", op->help_text);
1607 }
1608
1609 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1610 printf("trailing white space is removed and blank lines are ignored.\n");
1611 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1612
1613 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1614 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1615 }
1616
1617
1618
1619
1620 /*************************************************
1621 * Handle a single-letter, no data option *
1622 *************************************************/
1623
1624 static int
1625 handle_option(int letter, int options)
1626 {
1627 switch(letter)
1628 {
1629 case N_FOFFSETS: file_offsets = TRUE; break;
1630 case N_HELP: help(); exit(0);
1631 case N_LOFFSETS: line_offsets = number = TRUE; break;
1632 case 'c': count_only = TRUE; break;
1633 case 'F': process_options |= PO_FIXED_STRINGS; break;
1634 case 'H': filenames = FN_FORCE; break;
1635 case 'h': filenames = FN_NONE; break;
1636 case 'i': options |= PCRE_CASELESS; break;
1637 case 'l': filenames = FN_ONLY; break;
1638 case 'L': filenames = FN_NOMATCH_ONLY; break;
1639 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1640 case 'n': number = TRUE; break;
1641 case 'o': only_matching = TRUE; break;
1642 case 'q': quiet = TRUE; break;
1643 case 'r': dee_action = dee_RECURSE; break;
1644 case 's': silent = TRUE; break;
1645 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1646 case 'v': invert = TRUE; break;
1647 case 'w': process_options |= PO_WORD_MATCH; break;
1648 case 'x': process_options |= PO_LINE_MATCH; break;
1649
1650 case 'V':
1651 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1652 exit(0);
1653 break;
1654
1655 default:
1656 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1657 exit(usage(2));
1658 }
1659
1660 return options;
1661 }
1662
1663
1664
1665
1666 /*************************************************
1667 * Construct printed ordinal *
1668 *************************************************/
1669
1670 /* This turns a number into "1st", "3rd", etc. */
1671
1672 static char *
1673 ordin(int n)
1674 {
1675 static char buffer[8];
1676 char *p = buffer;
1677 sprintf(p, "%d", n);
1678 while (*p != 0) p++;
1679 switch (n%10)
1680 {
1681 case 1: strcpy(p, "st"); break;
1682 case 2: strcpy(p, "nd"); break;
1683 case 3: strcpy(p, "rd"); break;
1684 default: strcpy(p, "th"); break;
1685 }
1686 return buffer;
1687 }
1688
1689
1690
1691 /*************************************************
1692 * Compile a single pattern *
1693 *************************************************/
1694
1695 /* When the -F option has been used, this is called for each substring.
1696 Otherwise it's called for each supplied pattern.
1697
1698 Arguments:
1699 pattern the pattern string
1700 options the PCRE options
1701 filename the file name, or NULL for a command-line pattern
1702 count 0 if this is the only command line pattern, or
1703 number of the command line pattern, or
1704 linenumber for a pattern from a file
1705
1706 Returns: TRUE on success, FALSE after an error
1707 */
1708
1709 static BOOL
1710 compile_single_pattern(char *pattern, int options, char *filename, int count)
1711 {
1712 char buffer[MBUFTHIRD + 16];
1713 const char *error;
1714 int errptr;
1715
1716 if (pattern_count >= MAX_PATTERN_COUNT)
1717 {
1718 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1719 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1720 return FALSE;
1721 }
1722
1723 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1724 suffix[process_options]);
1725 pattern_list[pattern_count] =
1726 pcre_compile(buffer, options, &error, &errptr, pcretables);
1727 if (pattern_list[pattern_count] != NULL)
1728 {
1729 pattern_count++;
1730 return TRUE;
1731 }
1732
1733 /* Handle compile errors */
1734
1735 errptr -= (int)strlen(prefix[process_options]);
1736 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1737
1738 if (filename == NULL)
1739 {
1740 if (count == 0)
1741 fprintf(stderr, "pcregrep: Error in command-line regex "
1742 "at offset %d: %s\n", errptr, error);
1743 else
1744 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1745 "at offset %d: %s\n", ordin(count), errptr, error);
1746 }
1747 else
1748 {
1749 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1750 "at offset %d: %s\n", count, filename, errptr, error);
1751 }
1752
1753 return FALSE;
1754 }
1755
1756
1757
1758 /*************************************************
1759 * Compile one supplied pattern *
1760 *************************************************/
1761
1762 /* When the -F option has been used, each string may be a list of strings,
1763 separated by line breaks. They will be matched literally.
1764
1765 Arguments:
1766 pattern the pattern string
1767 options the PCRE options
1768 filename the file name, or NULL for a command-line pattern
1769 count 0 if this is the only command line pattern, or
1770 number of the command line pattern, or
1771 linenumber for a pattern from a file
1772
1773 Returns: TRUE on success, FALSE after an error
1774 */
1775
1776 static BOOL
1777 compile_pattern(char *pattern, int options, char *filename, int count)
1778 {
1779 if ((process_options & PO_FIXED_STRINGS) != 0)
1780 {
1781 char *eop = pattern + strlen(pattern);
1782 char buffer[MBUFTHIRD];
1783 for(;;)
1784 {
1785 int ellength;
1786 char *p = end_of_line(pattern, eop, &ellength);
1787 if (ellength == 0)
1788 return compile_single_pattern(pattern, options, filename, count);
1789 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1790 pattern = p;
1791 if (!compile_single_pattern(buffer, options, filename, count))
1792 return FALSE;
1793 }
1794 }
1795 else return compile_single_pattern(pattern, options, filename, count);
1796 }
1797
1798
1799
1800 /*************************************************
1801 * Main program *
1802 *************************************************/
1803
1804 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1805
1806 int
1807 main(int argc, char **argv)
1808 {
1809 int i, j;
1810 int rc = 1;
1811 int pcre_options = 0;
1812 int cmd_pattern_count = 0;
1813 int hint_count = 0;
1814 int errptr;
1815 BOOL only_one_at_top;
1816 char *patterns[MAX_PATTERN_COUNT];
1817 const char *locale_from = "--locale";
1818 const char *error;
1819
1820 /* Set the default line ending value from the default in the PCRE library;
1821 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1822 */
1823
1824 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1825 switch(i)
1826 {
1827 default: newline = (char *)"lf"; break;
1828 case '\r': newline = (char *)"cr"; break;
1829 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1830 case -1: newline = (char *)"any"; break;
1831 case -2: newline = (char *)"anycrlf"; break;
1832 }
1833
1834 /* Process the options */
1835
1836 for (i = 1; i < argc; i++)
1837 {
1838 option_item *op = NULL;
1839 char *option_data = (char *)""; /* default to keep compiler happy */
1840 BOOL longop;
1841 BOOL longopwasequals = FALSE;
1842
1843 if (argv[i][0] != '-') break;
1844
1845 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1846 but only if we have previously had -e or -f to define the patterns. */
1847
1848 if (argv[i][1] == 0)
1849 {
1850 if (pattern_filename != NULL || pattern_count > 0) break;
1851 else exit(usage(2));
1852 }
1853
1854 /* Handle a long name option, or -- to terminate the options */
1855
1856 if (argv[i][1] == '-')
1857 {
1858 char *arg = argv[i] + 2;
1859 char *argequals = strchr(arg, '=');
1860
1861 if (*arg == 0) /* -- terminates options */
1862 {
1863 i++;
1864 break; /* out of the options-handling loop */
1865 }
1866
1867 longop = TRUE;
1868
1869 /* Some long options have data that follows after =, for example file=name.
1870 Some options have variations in the long name spelling: specifically, we
1871 allow "regexp" because GNU grep allows it, though I personally go along
1872 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1873 These options are entered in the table as "regex(p)". No option is in both
1874 these categories, fortunately. */
1875
1876 for (op = optionlist; op->one_char != 0; op++)
1877 {
1878 char *opbra = strchr(op->long_name, '(');
1879 char *equals = strchr(op->long_name, '=');
1880 if (opbra == NULL) /* Not a (p) case */
1881 {
1882 if (equals == NULL) /* Not thing=data case */
1883 {
1884 if (strcmp(arg, op->long_name) == 0) break;
1885 }
1886 else /* Special case xxx=data */
1887 {
1888 int oplen = equals - op->long_name;
1889 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1890 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1891 {
1892 option_data = arg + arglen;
1893 if (*option_data == '=')
1894 {
1895 option_data++;
1896 longopwasequals = TRUE;
1897 }
1898 break;
1899 }
1900 }
1901 }
1902 else /* Special case xxxx(p) */
1903 {
1904 char buff1[24];
1905 char buff2[24];
1906 int baselen = opbra - op->long_name;
1907 sprintf(buff1, "%.*s", baselen, op->long_name);
1908 sprintf(buff2, "%s%.*s", buff1,
1909 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1910 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1911 break;
1912 }
1913 }
1914
1915 if (op->one_char == 0)
1916 {
1917 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1918 exit(usage(2));
1919 }
1920 }
1921
1922
1923 /* Jeffrey Friedl's debugging harness uses these additional options which
1924 are not in the right form for putting in the option table because they use
1925 only one hyphen, yet are more than one character long. By putting them
1926 separately here, they will not get displayed as part of the help() output,
1927 but I don't think Jeffrey will care about that. */
1928
1929 #ifdef JFRIEDL_DEBUG
1930 else if (strcmp(argv[i], "-pre") == 0) {
1931 jfriedl_prefix = argv[++i];
1932 continue;
1933 } else if (strcmp(argv[i], "-post") == 0) {
1934 jfriedl_postfix = argv[++i];
1935 continue;
1936 } else if (strcmp(argv[i], "-XT") == 0) {
1937 sscanf(argv[++i], "%d", &jfriedl_XT);
1938 continue;
1939 } else if (strcmp(argv[i], "-XR") == 0) {
1940 sscanf(argv[++i], "%d", &jfriedl_XR);
1941 continue;
1942 }
1943 #endif
1944
1945
1946 /* One-char options; many that have no data may be in a single argument; we
1947 continue till we hit the last one or one that needs data. */
1948
1949 else
1950 {
1951 char *s = argv[i] + 1;
1952 longop = FALSE;
1953 while (*s != 0)
1954 {
1955 for (op = optionlist; op->one_char != 0; op++)
1956 { if (*s == op->one_char) break; }
1957 if (op->one_char == 0)
1958 {
1959 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1960 *s, argv[i]);
1961 exit(usage(2));
1962 }
1963 if (op->type != OP_NODATA || s[1] == 0)
1964 {
1965 option_data = s+1;
1966 break;
1967 }
1968 pcre_options = handle_option(*s++, pcre_options);
1969 }
1970 }
1971
1972 /* At this point we should have op pointing to a matched option. If the type
1973 is NO_DATA, it means that there is no data, and the option might set
1974 something in the PCRE options. */
1975
1976 if (op->type == OP_NODATA)
1977 {
1978 pcre_options = handle_option(op->one_char, pcre_options);
1979 continue;
1980 }
1981
1982 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1983 either has a value or defaults to something. It cannot have data in a
1984 separate item. At the moment, the only such options are "colo(u)r" and
1985 Jeffrey Friedl's special -S debugging option. */
1986
1987 if (*option_data == 0 &&
1988 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1989 {
1990 switch (op->one_char)
1991 {
1992 case N_COLOUR:
1993 colour_option = (char *)"auto";
1994 break;
1995 #ifdef JFRIEDL_DEBUG
1996 case 'S':
1997 S_arg = 0;
1998 break;
1999 #endif
2000 }
2001 continue;
2002 }
2003
2004 /* Otherwise, find the data string for the option. */
2005
2006 if (*option_data == 0)
2007 {
2008 if (i >= argc - 1 || longopwasequals)
2009 {
2010 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2011 exit(usage(2));
2012 }
2013 option_data = argv[++i];
2014 }
2015
2016 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2017 multiple times to create a list of patterns. */
2018
2019 if (op->type == OP_PATLIST)
2020 {
2021 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2022 {
2023 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2024 MAX_PATTERN_COUNT);
2025 return 2;
2026 }
2027 patterns[cmd_pattern_count++] = option_data;
2028 }
2029
2030 /* Otherwise, deal with single string or numeric data values. */
2031
2032 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2033 {
2034 *((char **)op->dataptr) = option_data;
2035 }
2036 else
2037 {
2038 char *endptr;
2039 int n = strtoul(option_data, &endptr, 10);
2040 if (*endptr != 0)
2041 {
2042 if (longop)
2043 {
2044 char *equals = strchr(op->long_name, '=');
2045 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2046 equals - op->long_name;
2047 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2048 option_data, nlen, op->long_name);
2049 }
2050 else
2051 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2052 option_data, op->one_char);
2053 exit(usage(2));
2054 }
2055 *((int *)op->dataptr) = n;
2056 }
2057 }
2058
2059 /* Options have been decoded. If -C was used, its value is used as a default
2060 for -A and -B. */
2061
2062 if (both_context > 0)
2063 {
2064 if (after_context == 0) after_context = both_context;
2065 if (before_context == 0) before_context = both_context;
2066 }
2067
2068 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2069 However, the latter two set the only_matching flag. */
2070
2071 if ((only_matching && (file_offsets || line_offsets)) ||
2072 (file_offsets && line_offsets))
2073 {
2074 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2075 "and/or --line-offsets\n");
2076 exit(usage(2));
2077 }
2078
2079 if (file_offsets || line_offsets) only_matching = TRUE;
2080
2081 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2082 LC_ALL environment variable is set, and if so, use it. */
2083
2084 if (locale == NULL)
2085 {
2086 locale = getenv("LC_ALL");
2087 locale_from = "LCC_ALL";
2088 }
2089
2090 if (locale == NULL)
2091 {
2092 locale = getenv("LC_CTYPE");
2093 locale_from = "LC_CTYPE";
2094 }
2095
2096 /* If a locale has been provided, set it, and generate the tables the PCRE
2097 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2098
2099 if (locale != NULL)
2100 {
2101 if (setlocale(LC_CTYPE, locale) == NULL)
2102 {
2103 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2104 locale, locale_from);
2105 return 2;
2106 }
2107 pcretables = pcre_maketables();
2108 }
2109
2110 /* Sort out colouring */
2111
2112 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2113 {
2114 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2115 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2116 else
2117 {
2118 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2119 colour_option);
2120 return 2;
2121 }
2122 if (do_colour)
2123 {
2124 char *cs = getenv("PCREGREP_COLOUR");
2125 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2126 if (cs != NULL) colour_string = cs;
2127 }
2128 }
2129
2130 /* Interpret the newline type; the default settings are Unix-like. */
2131
2132 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2133 {
2134 pcre_options |= PCRE_NEWLINE_CR;
2135 endlinetype = EL_CR;
2136 }
2137 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2138 {
2139 pcre_options |= PCRE_NEWLINE_LF;
2140 endlinetype = EL_LF;
2141 }
2142 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2143 {
2144 pcre_options |= PCRE_NEWLINE_CRLF;
2145 endlinetype = EL_CRLF;
2146 }
2147 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2148 {
2149 pcre_options |= PCRE_NEWLINE_ANY;
2150 endlinetype = EL_ANY;
2151 }
2152 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2153 {
2154 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2155 endlinetype = EL_ANYCRLF;
2156 }
2157 else
2158 {
2159 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2160 return 2;
2161 }
2162
2163 /* Interpret the text values for -d and -D */
2164
2165 if (dee_option != NULL)
2166 {
2167 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2168 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2169 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2170 else
2171 {
2172 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2173 return 2;
2174 }
2175 }
2176
2177 if (DEE_option != NULL)
2178 {
2179 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2180 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2181 else
2182 {
2183 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2184 return 2;
2185 }
2186 }
2187
2188 /* Check the values for Jeffrey Friedl's debugging options. */
2189
2190 #ifdef JFRIEDL_DEBUG
2191 if (S_arg > 9)
2192 {
2193 fprintf(stderr, "pcregrep: bad value for -S option\n");
2194 return 2;
2195 }
2196 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2197 {
2198 if (jfriedl_XT == 0) jfriedl_XT = 1;
2199 if (jfriedl_XR == 0) jfriedl_XR = 1;
2200 }
2201 #endif
2202
2203 /* Get memory to store the pattern and hints lists. */
2204
2205 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2206 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2207
2208 if (pattern_list == NULL || hints_list == NULL)
2209 {
2210 fprintf(stderr, "pcregrep: malloc failed\n");
2211 goto EXIT2;
2212 }
2213
2214 /* If no patterns were provided by -e, and there is no file provided by -f,
2215 the first argument is the one and only pattern, and it must exist. */
2216
2217 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2218 {
2219 if (i >= argc) return usage(2);
2220 patterns[cmd_pattern_count++] = argv[i++];
2221 }
2222
2223 /* Compile the patterns that were provided on the command line, either by
2224 multiple uses of -e or as a single unkeyed pattern. */
2225
2226 for (j = 0; j < cmd_pattern_count; j++)
2227 {
2228 if (!compile_pattern(patterns[j], pcre_options, NULL,
2229 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2230 goto EXIT2;
2231 }
2232
2233 /* Compile the regular expressions that are provided in a file. */
2234
2235 if (pattern_filename != NULL)
2236 {
2237 int linenumber = 0;
2238 FILE *f;
2239 char *filename;
2240 char buffer[MBUFTHIRD];
2241
2242 if (strcmp(pattern_filename, "-") == 0)
2243 {
2244 f = stdin;
2245 filename = stdin_name;
2246 }
2247 else
2248 {
2249 f = fopen(pattern_filename, "r");
2250 if (f == NULL)
2251 {
2252 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2253 strerror(errno));
2254 goto EXIT2;
2255 }
2256 filename = pattern_filename;
2257 }
2258
2259 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2260 {
2261 char *s = buffer + (int)strlen(buffer);
2262 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2263 *s = 0;
2264 linenumber++;
2265 if (buffer[0] == 0) continue; /* Skip blank lines */
2266 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2267 goto EXIT2;
2268 }
2269
2270 if (f != stdin) fclose(f);
2271 }
2272
2273 /* Study the regular expressions, as we will be running them many times */
2274
2275 for (j = 0; j < pattern_count; j++)
2276 {
2277 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2278 if (error != NULL)
2279 {
2280 char s[16];
2281 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2282 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2283 goto EXIT2;
2284 }
2285 hint_count++;
2286 }
2287
2288 /* If there are include or exclude patterns, compile them. */
2289
2290 if (exclude_pattern != NULL)
2291 {
2292 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2293 pcretables);
2294 if (exclude_compiled == NULL)
2295 {
2296 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2297 errptr, error);
2298 goto EXIT2;
2299 }
2300 }
2301
2302 if (include_pattern != NULL)
2303 {
2304 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2305 pcretables);
2306 if (include_compiled == NULL)
2307 {
2308 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2309 errptr, error);
2310 goto EXIT2;
2311 }
2312 }
2313
2314 if (exclude_dir_pattern != NULL)
2315 {
2316 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2317 pcretables);
2318 if (exclude_dir_compiled == NULL)
2319 {
2320 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2321 errptr, error);
2322 goto EXIT2;
2323 }
2324 }
2325
2326 if (include_dir_pattern != NULL)
2327 {
2328 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2329 pcretables);
2330 if (include_dir_compiled == NULL)
2331 {
2332 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2333 errptr, error);
2334 goto EXIT2;
2335 }
2336 }
2337
2338 /* If there are no further arguments, do the business on stdin and exit. */
2339
2340 if (i >= argc)
2341 {
2342 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2343 goto EXIT;
2344 }
2345
2346 /* Otherwise, work through the remaining arguments as files or directories.
2347 Pass in the fact that there is only one argument at top level - this suppresses
2348 the file name if the argument is not a directory and filenames are not
2349 otherwise forced. */
2350
2351 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2352
2353 for (; i < argc; i++)
2354 {
2355 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2356 only_one_at_top);
2357 if (frc > 1) rc = frc;
2358 else if (frc == 0 && rc == 1) rc = 0;
2359 }
2360
2361 EXIT:
2362 if (pattern_list != NULL)
2363 {
2364 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2365 free(pattern_list);
2366 }
2367 if (hints_list != NULL)
2368 {
2369 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2370 free(hints_list);
2371 }
2372 return rc;
2373
2374 EXIT2:
2375 rc = 2;
2376 goto EXIT;
2377 }
2378
2379 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12