/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 422 - (show annotations) (download)
Fri Aug 14 16:42:55 2009 UTC (5 years, 3 months ago) by ph10
File MIME type: text/plain
File size: 69817 byte(s)
Fix --regex(p)=pattern not working in pcregrep.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2009 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define MBUFTHIRD BUFSIZ
78 #else
79 #define MBUFTHIRD 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107
108
109 /*************************************************
110 * Global variables *
111 *************************************************/
112
113 /* Jeffrey Friedl has some debugging requirements that are not part of the
114 regular code. */
115
116 #ifdef JFRIEDL_DEBUG
117 static int S_arg = -1;
118 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
119 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
120 static const char *jfriedl_prefix = "";
121 static const char *jfriedl_postfix = "";
122 #endif
123
124 static int endlinetype;
125
126 static char *colour_string = (char *)"1;31";
127 static char *colour_option = NULL;
128 static char *dee_option = NULL;
129 static char *DEE_option = NULL;
130 static char *newline = NULL;
131 static char *pattern_filename = NULL;
132 static char *stdin_name = (char *)"(standard input)";
133 static char *locale = NULL;
134
135 static const unsigned char *pcretables = NULL;
136
137 static int pattern_count = 0;
138 static pcre **pattern_list = NULL;
139 static pcre_extra **hints_list = NULL;
140
141 static char *include_pattern = NULL;
142 static char *exclude_pattern = NULL;
143 static char *include_dir_pattern = NULL;
144 static char *exclude_dir_pattern = NULL;
145
146 static pcre *include_compiled = NULL;
147 static pcre *exclude_compiled = NULL;
148 static pcre *include_dir_compiled = NULL;
149 static pcre *exclude_dir_compiled = NULL;
150
151 static int after_context = 0;
152 static int before_context = 0;
153 static int both_context = 0;
154 static int dee_action = dee_READ;
155 static int DEE_action = DEE_READ;
156 static int error_count = 0;
157 static int filenames = FN_DEFAULT;
158 static int process_options = 0;
159
160 static BOOL count_only = FALSE;
161 static BOOL do_colour = FALSE;
162 static BOOL file_offsets = FALSE;
163 static BOOL hyphenpending = FALSE;
164 static BOOL invert = FALSE;
165 static BOOL line_offsets = FALSE;
166 static BOOL multiline = FALSE;
167 static BOOL number = FALSE;
168 static BOOL omit_zero_count = FALSE;
169 static BOOL only_matching = FALSE;
170 static BOOL quiet = FALSE;
171 static BOOL silent = FALSE;
172 static BOOL utf8 = FALSE;
173
174 /* Structure for options and list of them */
175
176 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
177 OP_PATLIST };
178
179 typedef struct option_item {
180 int type;
181 int one_char;
182 void *dataptr;
183 const char *long_name;
184 const char *help_text;
185 } option_item;
186
187 /* Options without a single-letter equivalent get a negative value. This can be
188 used to identify them. */
189
190 #define N_COLOUR (-1)
191 #define N_EXCLUDE (-2)
192 #define N_EXCLUDE_DIR (-3)
193 #define N_HELP (-4)
194 #define N_INCLUDE (-5)
195 #define N_INCLUDE_DIR (-6)
196 #define N_LABEL (-7)
197 #define N_LOCALE (-8)
198 #define N_NULL (-9)
199 #define N_LOFFSETS (-10)
200 #define N_FOFFSETS (-11)
201
202 static option_item optionlist[] = {
203 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
204 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
205 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
206 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
207 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
208 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
209 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
210 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
211 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
212 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
213 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
214 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
215 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
216 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
217 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
218 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
219 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
220 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
221 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
222 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
223 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
224 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
225 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
226 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
227 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
228 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
229 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
230 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
231 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
232 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
233 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
234 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
235 #ifdef JFRIEDL_DEBUG
236 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
237 #endif
238 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
239 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
240 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
241 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
242 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
243 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
244 { OP_NODATA, 0, NULL, NULL, NULL }
245 };
246
247 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
248 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
249 that the combination of -w and -x has the same effect as -x on its own, so we
250 can treat them as the same. */
251
252 static const char *prefix[] = {
253 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
254
255 static const char *suffix[] = {
256 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
257
258 /* UTF-8 tables - used only when the newline setting is "any". */
259
260 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
261
262 const char utf8_table4[] = {
263 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
264 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
265 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
266 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
267
268
269
270 /*************************************************
271 * OS-specific functions *
272 *************************************************/
273
274 /* These functions are defined so that they can be made system specific,
275 although at present the only ones are for Unix, Win32, and for "no support". */
276
277
278 /************* Directory scanning in Unix ***********/
279
280 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
281 #include <sys/types.h>
282 #include <sys/stat.h>
283 #include <dirent.h>
284
285 typedef DIR directory_type;
286
287 static int
288 isdirectory(char *filename)
289 {
290 struct stat statbuf;
291 if (stat(filename, &statbuf) < 0)
292 return 0; /* In the expectation that opening as a file will fail */
293 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
294 }
295
296 static directory_type *
297 opendirectory(char *filename)
298 {
299 return opendir(filename);
300 }
301
302 static char *
303 readdirectory(directory_type *dir)
304 {
305 for (;;)
306 {
307 struct dirent *dent = readdir(dir);
308 if (dent == NULL) return NULL;
309 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
310 return dent->d_name;
311 }
312 /* Control never reaches here */
313 }
314
315 static void
316 closedirectory(directory_type *dir)
317 {
318 closedir(dir);
319 }
320
321
322 /************* Test for regular file in Unix **********/
323
324 static int
325 isregfile(char *filename)
326 {
327 struct stat statbuf;
328 if (stat(filename, &statbuf) < 0)
329 return 1; /* In the expectation that opening as a file will fail */
330 return (statbuf.st_mode & S_IFMT) == S_IFREG;
331 }
332
333
334 /************* Test stdout for being a terminal in Unix **********/
335
336 static BOOL
337 is_stdout_tty(void)
338 {
339 return isatty(fileno(stdout));
340 }
341
342
343 /************* Directory scanning in Win32 ***********/
344
345 /* I (Philip Hazel) have no means of testing this code. It was contributed by
346 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
347 when it did not exist. David Byron added a patch that moved the #include of
348 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
349 */
350
351 #elif HAVE_WINDOWS_H
352
353 #ifndef STRICT
354 # define STRICT
355 #endif
356 #ifndef WIN32_LEAN_AND_MEAN
357 # define WIN32_LEAN_AND_MEAN
358 #endif
359
360 #include <windows.h>
361
362 #ifndef INVALID_FILE_ATTRIBUTES
363 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
364 #endif
365
366 typedef struct directory_type
367 {
368 HANDLE handle;
369 BOOL first;
370 WIN32_FIND_DATA data;
371 } directory_type;
372
373 int
374 isdirectory(char *filename)
375 {
376 DWORD attr = GetFileAttributes(filename);
377 if (attr == INVALID_FILE_ATTRIBUTES)
378 return 0;
379 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
380 }
381
382 directory_type *
383 opendirectory(char *filename)
384 {
385 size_t len;
386 char *pattern;
387 directory_type *dir;
388 DWORD err;
389 len = strlen(filename);
390 pattern = (char *) malloc(len + 3);
391 dir = (directory_type *) malloc(sizeof(*dir));
392 if ((pattern == NULL) || (dir == NULL))
393 {
394 fprintf(stderr, "pcregrep: malloc failed\n");
395 exit(2);
396 }
397 memcpy(pattern, filename, len);
398 memcpy(&(pattern[len]), "\\*", 3);
399 dir->handle = FindFirstFile(pattern, &(dir->data));
400 if (dir->handle != INVALID_HANDLE_VALUE)
401 {
402 free(pattern);
403 dir->first = TRUE;
404 return dir;
405 }
406 err = GetLastError();
407 free(pattern);
408 free(dir);
409 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
410 return NULL;
411 }
412
413 char *
414 readdirectory(directory_type *dir)
415 {
416 for (;;)
417 {
418 if (!dir->first)
419 {
420 if (!FindNextFile(dir->handle, &(dir->data)))
421 return NULL;
422 }
423 else
424 {
425 dir->first = FALSE;
426 }
427 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
428 return dir->data.cFileName;
429 }
430 #ifndef _MSC_VER
431 return NULL; /* Keep compiler happy; never executed */
432 #endif
433 }
434
435 void
436 closedirectory(directory_type *dir)
437 {
438 FindClose(dir->handle);
439 free(dir);
440 }
441
442
443 /************* Test for regular file in Win32 **********/
444
445 /* I don't know how to do this, or if it can be done; assume all paths are
446 regular if they are not directories. */
447
448 int isregfile(char *filename)
449 {
450 return !isdirectory(filename);
451 }
452
453
454 /************* Test stdout for being a terminal in Win32 **********/
455
456 /* I don't know how to do this; assume never */
457
458 static BOOL
459 is_stdout_tty(void)
460 {
461 return FALSE;
462 }
463
464
465 /************* Directory scanning when we can't do it ***********/
466
467 /* The type is void, and apart from isdirectory(), the functions do nothing. */
468
469 #else
470
471 typedef void directory_type;
472
473 int isdirectory(char *filename) { return 0; }
474 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
475 char *readdirectory(directory_type *dir) { return (char*)0;}
476 void closedirectory(directory_type *dir) {}
477
478
479 /************* Test for regular when we can't do it **********/
480
481 /* Assume all files are regular. */
482
483 int isregfile(char *filename) { return 1; }
484
485
486 /************* Test stdout for being a terminal when we can't do it **********/
487
488 static BOOL
489 is_stdout_tty(void)
490 {
491 return FALSE;
492 }
493
494
495 #endif
496
497
498
499 #ifndef HAVE_STRERROR
500 /*************************************************
501 * Provide strerror() for non-ANSI libraries *
502 *************************************************/
503
504 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
505 in their libraries, but can provide the same facility by this simple
506 alternative function. */
507
508 extern int sys_nerr;
509 extern char *sys_errlist[];
510
511 char *
512 strerror(int n)
513 {
514 if (n < 0 || n >= sys_nerr) return "unknown error number";
515 return sys_errlist[n];
516 }
517 #endif /* HAVE_STRERROR */
518
519
520
521 /*************************************************
522 * Find end of line *
523 *************************************************/
524
525 /* The length of the endline sequence that is found is set via lenptr. This may
526 be zero at the very end of the file if there is no line-ending sequence there.
527
528 Arguments:
529 p current position in line
530 endptr end of available data
531 lenptr where to put the length of the eol sequence
532
533 Returns: pointer to the last byte of the line
534 */
535
536 static char *
537 end_of_line(char *p, char *endptr, int *lenptr)
538 {
539 switch(endlinetype)
540 {
541 default: /* Just in case */
542 case EL_LF:
543 while (p < endptr && *p != '\n') p++;
544 if (p < endptr)
545 {
546 *lenptr = 1;
547 return p + 1;
548 }
549 *lenptr = 0;
550 return endptr;
551
552 case EL_CR:
553 while (p < endptr && *p != '\r') p++;
554 if (p < endptr)
555 {
556 *lenptr = 1;
557 return p + 1;
558 }
559 *lenptr = 0;
560 return endptr;
561
562 case EL_CRLF:
563 for (;;)
564 {
565 while (p < endptr && *p != '\r') p++;
566 if (++p >= endptr)
567 {
568 *lenptr = 0;
569 return endptr;
570 }
571 if (*p == '\n')
572 {
573 *lenptr = 2;
574 return p + 1;
575 }
576 }
577 break;
578
579 case EL_ANYCRLF:
580 while (p < endptr)
581 {
582 int extra = 0;
583 register int c = *((unsigned char *)p);
584
585 if (utf8 && c >= 0xc0)
586 {
587 int gcii, gcss;
588 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
589 gcss = 6*extra;
590 c = (c & utf8_table3[extra]) << gcss;
591 for (gcii = 1; gcii <= extra; gcii++)
592 {
593 gcss -= 6;
594 c |= (p[gcii] & 0x3f) << gcss;
595 }
596 }
597
598 p += 1 + extra;
599
600 switch (c)
601 {
602 case 0x0a: /* LF */
603 *lenptr = 1;
604 return p;
605
606 case 0x0d: /* CR */
607 if (p < endptr && *p == 0x0a)
608 {
609 *lenptr = 2;
610 p++;
611 }
612 else *lenptr = 1;
613 return p;
614
615 default:
616 break;
617 }
618 } /* End of loop for ANYCRLF case */
619
620 *lenptr = 0; /* Must have hit the end */
621 return endptr;
622
623 case EL_ANY:
624 while (p < endptr)
625 {
626 int extra = 0;
627 register int c = *((unsigned char *)p);
628
629 if (utf8 && c >= 0xc0)
630 {
631 int gcii, gcss;
632 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
633 gcss = 6*extra;
634 c = (c & utf8_table3[extra]) << gcss;
635 for (gcii = 1; gcii <= extra; gcii++)
636 {
637 gcss -= 6;
638 c |= (p[gcii] & 0x3f) << gcss;
639 }
640 }
641
642 p += 1 + extra;
643
644 switch (c)
645 {
646 case 0x0a: /* LF */
647 case 0x0b: /* VT */
648 case 0x0c: /* FF */
649 *lenptr = 1;
650 return p;
651
652 case 0x0d: /* CR */
653 if (p < endptr && *p == 0x0a)
654 {
655 *lenptr = 2;
656 p++;
657 }
658 else *lenptr = 1;
659 return p;
660
661 case 0x85: /* NEL */
662 *lenptr = utf8? 2 : 1;
663 return p;
664
665 case 0x2028: /* LS */
666 case 0x2029: /* PS */
667 *lenptr = 3;
668 return p;
669
670 default:
671 break;
672 }
673 } /* End of loop for ANY case */
674
675 *lenptr = 0; /* Must have hit the end */
676 return endptr;
677 } /* End of overall switch */
678 }
679
680
681
682 /*************************************************
683 * Find start of previous line *
684 *************************************************/
685
686 /* This is called when looking back for before lines to print.
687
688 Arguments:
689 p start of the subsequent line
690 startptr start of available data
691
692 Returns: pointer to the start of the previous line
693 */
694
695 static char *
696 previous_line(char *p, char *startptr)
697 {
698 switch(endlinetype)
699 {
700 default: /* Just in case */
701 case EL_LF:
702 p--;
703 while (p > startptr && p[-1] != '\n') p--;
704 return p;
705
706 case EL_CR:
707 p--;
708 while (p > startptr && p[-1] != '\n') p--;
709 return p;
710
711 case EL_CRLF:
712 for (;;)
713 {
714 p -= 2;
715 while (p > startptr && p[-1] != '\n') p--;
716 if (p <= startptr + 1 || p[-2] == '\r') return p;
717 }
718 return p; /* But control should never get here */
719
720 case EL_ANY:
721 case EL_ANYCRLF:
722 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
723 if (utf8) while ((*p & 0xc0) == 0x80) p--;
724
725 while (p > startptr)
726 {
727 register int c;
728 char *pp = p - 1;
729
730 if (utf8)
731 {
732 int extra = 0;
733 while ((*pp & 0xc0) == 0x80) pp--;
734 c = *((unsigned char *)pp);
735 if (c >= 0xc0)
736 {
737 int gcii, gcss;
738 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
739 gcss = 6*extra;
740 c = (c & utf8_table3[extra]) << gcss;
741 for (gcii = 1; gcii <= extra; gcii++)
742 {
743 gcss -= 6;
744 c |= (pp[gcii] & 0x3f) << gcss;
745 }
746 }
747 }
748 else c = *((unsigned char *)pp);
749
750 if (endlinetype == EL_ANYCRLF) switch (c)
751 {
752 case 0x0a: /* LF */
753 case 0x0d: /* CR */
754 return p;
755
756 default:
757 break;
758 }
759
760 else switch (c)
761 {
762 case 0x0a: /* LF */
763 case 0x0b: /* VT */
764 case 0x0c: /* FF */
765 case 0x0d: /* CR */
766 case 0x85: /* NEL */
767 case 0x2028: /* LS */
768 case 0x2029: /* PS */
769 return p;
770
771 default:
772 break;
773 }
774
775 p = pp; /* Back one character */
776 } /* End of loop for ANY case */
777
778 return startptr; /* Hit start of data */
779 } /* End of overall switch */
780 }
781
782
783
784
785
786 /*************************************************
787 * Print the previous "after" lines *
788 *************************************************/
789
790 /* This is called if we are about to lose said lines because of buffer filling,
791 and at the end of the file. The data in the line is written using fwrite() so
792 that a binary zero does not terminate it.
793
794 Arguments:
795 lastmatchnumber the number of the last matching line, plus one
796 lastmatchrestart where we restarted after the last match
797 endptr end of available data
798 printname filename for printing
799
800 Returns: nothing
801 */
802
803 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
804 char *endptr, char *printname)
805 {
806 if (after_context > 0 && lastmatchnumber > 0)
807 {
808 int count = 0;
809 while (lastmatchrestart < endptr && count++ < after_context)
810 {
811 int ellength;
812 char *pp = lastmatchrestart;
813 if (printname != NULL) fprintf(stdout, "%s-", printname);
814 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
815 pp = end_of_line(pp, endptr, &ellength);
816 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
817 lastmatchrestart = pp;
818 }
819 hyphenpending = TRUE;
820 }
821 }
822
823
824
825 /*************************************************
826 * Apply patterns to subject till one matches *
827 *************************************************/
828
829 /* This function is called to run through all patterns, looking for a match. It
830 is used multiple times for the same subject when colouring is enabled, in order
831 to find all possible matches.
832
833 Arguments:
834 matchptr the start of the subject
835 length the length of the subject to match
836 offsets the offets vector to fill in
837 mrc address of where to put the result of pcre_exec()
838
839 Returns: TRUE if there was a match
840 FALSE if there was no match
841 invert if there was a non-fatal error
842 */
843
844 static BOOL
845 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
846 {
847 int i;
848 for (i = 0; i < pattern_count; i++)
849 {
850 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
851 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
852 if (*mrc >= 0) return TRUE;
853 if (*mrc == PCRE_ERROR_NOMATCH) continue;
854 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
855 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
856 fprintf(stderr, "this text:\n");
857 fwrite(matchptr, 1, length, stderr); /* In case binary zero included */
858 fprintf(stderr, "\n");
859 if (error_count == 0 &&
860 (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
861 {
862 fprintf(stderr, "pcregrep: error %d means that a resource limit "
863 "was exceeded\n", *mrc);
864 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
865 }
866 if (error_count++ > 20)
867 {
868 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
869 exit(2);
870 }
871 return invert; /* No more matching; don't show the line again */
872 }
873
874 return FALSE; /* No match, no errors */
875 }
876
877
878
879 /*************************************************
880 * Grep an individual file *
881 *************************************************/
882
883 /* This is called from grep_or_recurse() below. It uses a buffer that is three
884 times the value of MBUFTHIRD. The matching point is never allowed to stray into
885 the top third of the buffer, thus keeping more of the file available for
886 context printing or for multiline scanning. For large files, the pointer will
887 be in the middle third most of the time, so the bottom third is available for
888 "before" context printing.
889
890 Arguments:
891 handle the fopened FILE stream for a normal file
892 the gzFile pointer when reading is via libz
893 the BZFILE pointer when reading is via libbz2
894 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
895 printname the file name if it is to be printed for each match
896 or NULL if the file name is not to be printed
897 it cannot be NULL if filenames[_nomatch]_only is set
898
899 Returns: 0 if there was at least one match
900 1 otherwise (no matches)
901 2 if there is a read error on a .bz2 file
902 */
903
904 static int
905 pcregrep(void *handle, int frtype, char *printname)
906 {
907 int rc = 1;
908 int linenumber = 1;
909 int lastmatchnumber = 0;
910 int count = 0;
911 int filepos = 0;
912 int offsets[OFFSET_SIZE];
913 char *lastmatchrestart = NULL;
914 char buffer[3*MBUFTHIRD];
915 char *ptr = buffer;
916 char *endptr;
917 size_t bufflength;
918 BOOL endhyphenpending = FALSE;
919 FILE *in = NULL; /* Ensure initialized */
920
921 #ifdef SUPPORT_LIBZ
922 gzFile ingz = NULL;
923 #endif
924
925 #ifdef SUPPORT_LIBBZ2
926 BZFILE *inbz2 = NULL;
927 #endif
928
929
930 /* Do the first read into the start of the buffer and set up the pointer to end
931 of what we have. In the case of libz, a non-zipped .gz file will be read as a
932 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
933 fail. */
934
935 #ifdef SUPPORT_LIBZ
936 if (frtype == FR_LIBZ)
937 {
938 ingz = (gzFile)handle;
939 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
940 }
941 else
942 #endif
943
944 #ifdef SUPPORT_LIBBZ2
945 if (frtype == FR_LIBBZ2)
946 {
947 inbz2 = (BZFILE *)handle;
948 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
949 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
950 } /* without the cast it is unsigned. */
951 else
952 #endif
953
954 {
955 in = (FILE *)handle;
956 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
957 }
958
959 endptr = buffer + bufflength;
960
961 /* Loop while the current pointer is not at the end of the file. For large
962 files, endptr will be at the end of the buffer when we are in the middle of the
963 file, but ptr will never get there, because as soon as it gets over 2/3 of the
964 way, the buffer is shifted left and re-filled. */
965
966 while (ptr < endptr)
967 {
968 int endlinelength;
969 int mrc = 0;
970 BOOL match;
971 char *matchptr = ptr;
972 char *t = ptr;
973 size_t length, linelength;
974
975 /* At this point, ptr is at the start of a line. We need to find the length
976 of the subject string to pass to pcre_exec(). In multiline mode, it is the
977 length remainder of the data in the buffer. Otherwise, it is the length of
978 the next line, excluding the terminating newline. After matching, we always
979 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
980 option is used for compiling, so that any match is constrained to be in the
981 first line. */
982
983 t = end_of_line(t, endptr, &endlinelength);
984 linelength = t - ptr - endlinelength;
985 length = multiline? (size_t)(endptr - ptr) : linelength;
986
987 /* Extra processing for Jeffrey Friedl's debugging. */
988
989 #ifdef JFRIEDL_DEBUG
990 if (jfriedl_XT || jfriedl_XR)
991 {
992 #include <sys/time.h>
993 #include <time.h>
994 struct timeval start_time, end_time;
995 struct timezone dummy;
996 int i;
997
998 if (jfriedl_XT)
999 {
1000 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1001 const char *orig = ptr;
1002 ptr = malloc(newlen + 1);
1003 if (!ptr) {
1004 printf("out of memory");
1005 exit(2);
1006 }
1007 endptr = ptr;
1008 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1009 for (i = 0; i < jfriedl_XT; i++) {
1010 strncpy(endptr, orig, length);
1011 endptr += length;
1012 }
1013 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1014 length = newlen;
1015 }
1016
1017 if (gettimeofday(&start_time, &dummy) != 0)
1018 perror("bad gettimeofday");
1019
1020
1021 for (i = 0; i < jfriedl_XR; i++)
1022 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1023 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1024
1025 if (gettimeofday(&end_time, &dummy) != 0)
1026 perror("bad gettimeofday");
1027
1028 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1029 -
1030 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1031
1032 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1033 return 0;
1034 }
1035 #endif
1036
1037 /* We come back here after a match when the -o option (only_matching) is set,
1038 in order to find any further matches in the same line. */
1039
1040 ONLY_MATCHING_RESTART:
1041
1042 /* Run through all the patterns until one matches or there is an error other
1043 than NOMATCH. This code is in a subroutine so that it can be re-used for
1044 finding subsequent matches when colouring matched lines. */
1045
1046 match = match_patterns(matchptr, length, offsets, &mrc);
1047
1048 /* If it's a match or a not-match (as required), do what's wanted. */
1049
1050 if (match != invert)
1051 {
1052 BOOL hyphenprinted = FALSE;
1053
1054 /* We've failed if we want a file that doesn't have any matches. */
1055
1056 if (filenames == FN_NOMATCH_ONLY) return 1;
1057
1058 /* Just count if just counting is wanted. */
1059
1060 if (count_only) count++;
1061
1062 /* If all we want is a file name, there is no need to scan any more lines
1063 in the file. */
1064
1065 else if (filenames == FN_MATCH_ONLY)
1066 {
1067 fprintf(stdout, "%s\n", printname);
1068 return 0;
1069 }
1070
1071 /* Likewise, if all we want is a yes/no answer. */
1072
1073 else if (quiet) return 0;
1074
1075 /* The --only-matching option prints just the substring that matched, and
1076 the --file-offsets and --line-offsets options output offsets for the
1077 matching substring (they both force --only-matching). None of these options
1078 prints any context. Afterwards, adjust the start and length, and then jump
1079 back to look for further matches in the same line. If we are in invert
1080 mode, however, nothing is printed - this could be still useful because the
1081 return code is set. */
1082
1083 else if (only_matching)
1084 {
1085 if (!invert)
1086 {
1087 if (printname != NULL) fprintf(stdout, "%s:", printname);
1088 if (number) fprintf(stdout, "%d:", linenumber);
1089 if (line_offsets)
1090 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1091 offsets[1] - offsets[0]);
1092 else if (file_offsets)
1093 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1094 offsets[1] - offsets[0]);
1095 else
1096 {
1097 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1098 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1099 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1100 }
1101 fprintf(stdout, "\n");
1102 matchptr += offsets[1];
1103 length -= offsets[1];
1104 match = FALSE;
1105 goto ONLY_MATCHING_RESTART;
1106 }
1107 }
1108
1109 /* This is the default case when none of the above options is set. We print
1110 the matching lines(s), possibly preceded and/or followed by other lines of
1111 context. */
1112
1113 else
1114 {
1115 /* See if there is a requirement to print some "after" lines from a
1116 previous match. We never print any overlaps. */
1117
1118 if (after_context > 0 && lastmatchnumber > 0)
1119 {
1120 int ellength;
1121 int linecount = 0;
1122 char *p = lastmatchrestart;
1123
1124 while (p < ptr && linecount < after_context)
1125 {
1126 p = end_of_line(p, ptr, &ellength);
1127 linecount++;
1128 }
1129
1130 /* It is important to advance lastmatchrestart during this printing so
1131 that it interacts correctly with any "before" printing below. Print
1132 each line's data using fwrite() in case there are binary zeroes. */
1133
1134 while (lastmatchrestart < p)
1135 {
1136 char *pp = lastmatchrestart;
1137 if (printname != NULL) fprintf(stdout, "%s-", printname);
1138 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1139 pp = end_of_line(pp, endptr, &ellength);
1140 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1141 lastmatchrestart = pp;
1142 }
1143 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1144 }
1145
1146 /* If there were non-contiguous lines printed above, insert hyphens. */
1147
1148 if (hyphenpending)
1149 {
1150 fprintf(stdout, "--\n");
1151 hyphenpending = FALSE;
1152 hyphenprinted = TRUE;
1153 }
1154
1155 /* See if there is a requirement to print some "before" lines for this
1156 match. Again, don't print overlaps. */
1157
1158 if (before_context > 0)
1159 {
1160 int linecount = 0;
1161 char *p = ptr;
1162
1163 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1164 linecount < before_context)
1165 {
1166 linecount++;
1167 p = previous_line(p, buffer);
1168 }
1169
1170 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1171 fprintf(stdout, "--\n");
1172
1173 while (p < ptr)
1174 {
1175 int ellength;
1176 char *pp = p;
1177 if (printname != NULL) fprintf(stdout, "%s-", printname);
1178 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1179 pp = end_of_line(pp, endptr, &ellength);
1180 fwrite(p, 1, pp - p, stdout);
1181 p = pp;
1182 }
1183 }
1184
1185 /* Now print the matching line(s); ensure we set hyphenpending at the end
1186 of the file if any context lines are being output. */
1187
1188 if (after_context > 0 || before_context > 0)
1189 endhyphenpending = TRUE;
1190
1191 if (printname != NULL) fprintf(stdout, "%s:", printname);
1192 if (number) fprintf(stdout, "%d:", linenumber);
1193
1194 /* In multiline mode, we want to print to the end of the line in which
1195 the end of the matched string is found, so we adjust linelength and the
1196 line number appropriately, but only when there actually was a match
1197 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1198 the match will always be before the first newline sequence. */
1199
1200 if (multiline)
1201 {
1202 int ellength;
1203 char *endmatch = ptr;
1204 if (!invert)
1205 {
1206 endmatch += offsets[1];
1207 t = ptr;
1208 while (t < endmatch)
1209 {
1210 t = end_of_line(t, endptr, &ellength);
1211 if (t <= endmatch) linenumber++; else break;
1212 }
1213 }
1214 endmatch = end_of_line(endmatch, endptr, &ellength);
1215 linelength = endmatch - ptr - ellength;
1216 }
1217
1218 /*** NOTE: Use only fwrite() to output the data line, so that binary
1219 zeroes are treated as just another data character. */
1220
1221 /* This extra option, for Jeffrey Friedl's debugging requirements,
1222 replaces the matched string, or a specific captured string if it exists,
1223 with X. When this happens, colouring is ignored. */
1224
1225 #ifdef JFRIEDL_DEBUG
1226 if (S_arg >= 0 && S_arg < mrc)
1227 {
1228 int first = S_arg * 2;
1229 int last = first + 1;
1230 fwrite(ptr, 1, offsets[first], stdout);
1231 fprintf(stdout, "X");
1232 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1233 }
1234 else
1235 #endif
1236
1237 /* We have to split the line(s) up if colouring, and search for further
1238 matches. */
1239
1240 if (do_colour)
1241 {
1242 int last_offset = 0;
1243 fwrite(ptr, 1, offsets[0], stdout);
1244 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1245 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1246 fprintf(stdout, "%c[00m", 0x1b);
1247 for (;;)
1248 {
1249 last_offset += offsets[1];
1250 matchptr += offsets[1];
1251 length -= offsets[1];
1252 if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1253 fwrite(matchptr, 1, offsets[0], stdout);
1254 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1255 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1256 fprintf(stdout, "%c[00m", 0x1b);
1257 }
1258 fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1259 stdout);
1260 }
1261
1262 /* Not colouring; no need to search for further matches */
1263
1264 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1265 }
1266
1267 /* End of doing what has to be done for a match */
1268
1269 rc = 0; /* Had some success */
1270
1271 /* Remember where the last match happened for after_context. We remember
1272 where we are about to restart, and that line's number. */
1273
1274 lastmatchrestart = ptr + linelength + endlinelength;
1275 lastmatchnumber = linenumber + 1;
1276 }
1277
1278 /* For a match in multiline inverted mode (which of course did not cause
1279 anything to be printed), we have to move on to the end of the match before
1280 proceeding. */
1281
1282 if (multiline && invert && match)
1283 {
1284 int ellength;
1285 char *endmatch = ptr + offsets[1];
1286 t = ptr;
1287 while (t < endmatch)
1288 {
1289 t = end_of_line(t, endptr, &ellength);
1290 if (t <= endmatch) linenumber++; else break;
1291 }
1292 endmatch = end_of_line(endmatch, endptr, &ellength);
1293 linelength = endmatch - ptr - ellength;
1294 }
1295
1296 /* Advance to after the newline and increment the line number. The file
1297 offset to the current line is maintained in filepos. */
1298
1299 ptr += linelength + endlinelength;
1300 filepos += linelength + endlinelength;
1301 linenumber++;
1302
1303 /* If we haven't yet reached the end of the file (the buffer is full), and
1304 the current point is in the top 1/3 of the buffer, slide the buffer down by
1305 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1306 about to be lost, print them. */
1307
1308 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1309 {
1310 if (after_context > 0 &&
1311 lastmatchnumber > 0 &&
1312 lastmatchrestart < buffer + MBUFTHIRD)
1313 {
1314 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1315 lastmatchnumber = 0;
1316 }
1317
1318 /* Now do the shuffle */
1319
1320 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1321 ptr -= MBUFTHIRD;
1322
1323 #ifdef SUPPORT_LIBZ
1324 if (frtype == FR_LIBZ)
1325 bufflength = 2*MBUFTHIRD +
1326 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1327 else
1328 #endif
1329
1330 #ifdef SUPPORT_LIBBZ2
1331 if (frtype == FR_LIBBZ2)
1332 bufflength = 2*MBUFTHIRD +
1333 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1334 else
1335 #endif
1336
1337 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1338
1339 endptr = buffer + bufflength;
1340
1341 /* Adjust any last match point */
1342
1343 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1344 }
1345 } /* Loop through the whole file */
1346
1347 /* End of file; print final "after" lines if wanted; do_after_lines sets
1348 hyphenpending if it prints something. */
1349
1350 if (!only_matching && !count_only)
1351 {
1352 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1353 hyphenpending |= endhyphenpending;
1354 }
1355
1356 /* Print the file name if we are looking for those without matches and there
1357 were none. If we found a match, we won't have got this far. */
1358
1359 if (filenames == FN_NOMATCH_ONLY)
1360 {
1361 fprintf(stdout, "%s\n", printname);
1362 return 0;
1363 }
1364
1365 /* Print the match count if wanted */
1366
1367 if (count_only)
1368 {
1369 if (count > 0 || !omit_zero_count)
1370 {
1371 if (printname != NULL && filenames != FN_NONE)
1372 fprintf(stdout, "%s:", printname);
1373 fprintf(stdout, "%d\n", count);
1374 }
1375 }
1376
1377 return rc;
1378 }
1379
1380
1381
1382 /*************************************************
1383 * Grep a file or recurse into a directory *
1384 *************************************************/
1385
1386 /* Given a path name, if it's a directory, scan all the files if we are
1387 recursing; if it's a file, grep it.
1388
1389 Arguments:
1390 pathname the path to investigate
1391 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1392 only_one_at_top TRUE if the path is the only one at toplevel
1393
1394 Returns: 0 if there was at least one match
1395 1 if there were no matches
1396 2 there was some kind of error
1397
1398 However, file opening failures are suppressed if "silent" is set.
1399 */
1400
1401 static int
1402 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1403 {
1404 int rc = 1;
1405 int sep;
1406 int frtype;
1407 int pathlen;
1408 void *handle;
1409 FILE *in = NULL; /* Ensure initialized */
1410
1411 #ifdef SUPPORT_LIBZ
1412 gzFile ingz = NULL;
1413 #endif
1414
1415 #ifdef SUPPORT_LIBBZ2
1416 BZFILE *inbz2 = NULL;
1417 #endif
1418
1419 /* If the file name is "-" we scan stdin */
1420
1421 if (strcmp(pathname, "-") == 0)
1422 {
1423 return pcregrep(stdin, FR_PLAIN,
1424 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1425 stdin_name : NULL);
1426 }
1427
1428 /* If the file is a directory, skip if skipping or if we are recursing, scan
1429 each file and directory within it, subject to any include or exclude patterns
1430 that were set. The scanning code is localized so it can be made
1431 system-specific. */
1432
1433 if ((sep = isdirectory(pathname)) != 0)
1434 {
1435 if (dee_action == dee_SKIP) return 1;
1436 if (dee_action == dee_RECURSE)
1437 {
1438 char buffer[1024];
1439 char *nextfile;
1440 directory_type *dir = opendirectory(pathname);
1441
1442 if (dir == NULL)
1443 {
1444 if (!silent)
1445 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1446 strerror(errno));
1447 return 2;
1448 }
1449
1450 while ((nextfile = readdirectory(dir)) != NULL)
1451 {
1452 int frc, nflen;
1453 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1454 nflen = strlen(nextfile);
1455
1456 if (isdirectory(buffer))
1457 {
1458 if (exclude_dir_compiled != NULL &&
1459 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1460 continue;
1461
1462 if (include_dir_compiled != NULL &&
1463 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1464 continue;
1465 }
1466 else
1467 {
1468 if (exclude_compiled != NULL &&
1469 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1470 continue;
1471
1472 if (include_compiled != NULL &&
1473 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1474 continue;
1475 }
1476
1477 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1478 if (frc > 1) rc = frc;
1479 else if (frc == 0 && rc == 1) rc = 0;
1480 }
1481
1482 closedirectory(dir);
1483 return rc;
1484 }
1485 }
1486
1487 /* If the file is not a directory and not a regular file, skip it if that's
1488 been requested. */
1489
1490 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1491
1492 /* Control reaches here if we have a regular file, or if we have a directory
1493 and recursion or skipping was not requested, or if we have anything else and
1494 skipping was not requested. The scan proceeds. If this is the first and only
1495 argument at top level, we don't show the file name, unless we are only showing
1496 the file name, or the filename was forced (-H). */
1497
1498 pathlen = strlen(pathname);
1499
1500 /* Open using zlib if it is supported and the file name ends with .gz. */
1501
1502 #ifdef SUPPORT_LIBZ
1503 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1504 {
1505 ingz = gzopen(pathname, "rb");
1506 if (ingz == NULL)
1507 {
1508 if (!silent)
1509 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1510 strerror(errno));
1511 return 2;
1512 }
1513 handle = (void *)ingz;
1514 frtype = FR_LIBZ;
1515 }
1516 else
1517 #endif
1518
1519 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1520
1521 #ifdef SUPPORT_LIBBZ2
1522 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1523 {
1524 inbz2 = BZ2_bzopen(pathname, "rb");
1525 handle = (void *)inbz2;
1526 frtype = FR_LIBBZ2;
1527 }
1528 else
1529 #endif
1530
1531 /* Otherwise use plain fopen(). The label is so that we can come back here if
1532 an attempt to read a .bz2 file indicates that it really is a plain file. */
1533
1534 #ifdef SUPPORT_LIBBZ2
1535 PLAIN_FILE:
1536 #endif
1537 {
1538 in = fopen(pathname, "rb");
1539 handle = (void *)in;
1540 frtype = FR_PLAIN;
1541 }
1542
1543 /* All the opening methods return errno when they fail. */
1544
1545 if (handle == NULL)
1546 {
1547 if (!silent)
1548 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1549 strerror(errno));
1550 return 2;
1551 }
1552
1553 /* Now grep the file */
1554
1555 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1556 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1557
1558 /* Close in an appropriate manner. */
1559
1560 #ifdef SUPPORT_LIBZ
1561 if (frtype == FR_LIBZ)
1562 gzclose(ingz);
1563 else
1564 #endif
1565
1566 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1567 read failed. If the error indicates that the file isn't in fact bzipped, try
1568 again as a normal file. */
1569
1570 #ifdef SUPPORT_LIBBZ2
1571 if (frtype == FR_LIBBZ2)
1572 {
1573 if (rc == 2)
1574 {
1575 int errnum;
1576 const char *err = BZ2_bzerror(inbz2, &errnum);
1577 if (errnum == BZ_DATA_ERROR_MAGIC)
1578 {
1579 BZ2_bzclose(inbz2);
1580 goto PLAIN_FILE;
1581 }
1582 else if (!silent)
1583 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1584 pathname, err);
1585 }
1586 BZ2_bzclose(inbz2);
1587 }
1588 else
1589 #endif
1590
1591 /* Normal file close */
1592
1593 fclose(in);
1594
1595 /* Pass back the yield from pcregrep(). */
1596
1597 return rc;
1598 }
1599
1600
1601
1602
1603 /*************************************************
1604 * Usage function *
1605 *************************************************/
1606
1607 static int
1608 usage(int rc)
1609 {
1610 option_item *op;
1611 fprintf(stderr, "Usage: pcregrep [-");
1612 for (op = optionlist; op->one_char != 0; op++)
1613 {
1614 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1615 }
1616 fprintf(stderr, "] [long options] [pattern] [files]\n");
1617 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1618 "options.\n");
1619 return rc;
1620 }
1621
1622
1623
1624
1625 /*************************************************
1626 * Help function *
1627 *************************************************/
1628
1629 static void
1630 help(void)
1631 {
1632 option_item *op;
1633
1634 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1635 printf("Search for PATTERN in each FILE or standard input.\n");
1636 printf("PATTERN must be present if neither -e nor -f is used.\n");
1637 printf("\"-\" can be used as a file name to mean STDIN.\n");
1638
1639 #ifdef SUPPORT_LIBZ
1640 printf("Files whose names end in .gz are read using zlib.\n");
1641 #endif
1642
1643 #ifdef SUPPORT_LIBBZ2
1644 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1645 #endif
1646
1647 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1648 printf("Other files and the standard input are read as plain files.\n\n");
1649 #else
1650 printf("All files are read as plain files, without any interpretation.\n\n");
1651 #endif
1652
1653 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1654 printf("Options:\n");
1655
1656 for (op = optionlist; op->one_char != 0; op++)
1657 {
1658 int n;
1659 char s[4];
1660 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1661 n = 30 - printf(" %s --%s", s, op->long_name);
1662 if (n < 1) n = 1;
1663 printf("%.*s%s\n", n, " ", op->help_text);
1664 }
1665
1666 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1667 printf("trailing white space is removed and blank lines are ignored.\n");
1668 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1669
1670 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1671 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1672 }
1673
1674
1675
1676
1677 /*************************************************
1678 * Handle a single-letter, no data option *
1679 *************************************************/
1680
1681 static int
1682 handle_option(int letter, int options)
1683 {
1684 switch(letter)
1685 {
1686 case N_FOFFSETS: file_offsets = TRUE; break;
1687 case N_HELP: help(); exit(0);
1688 case N_LOFFSETS: line_offsets = number = TRUE; break;
1689 case 'c': count_only = TRUE; break;
1690 case 'F': process_options |= PO_FIXED_STRINGS; break;
1691 case 'H': filenames = FN_FORCE; break;
1692 case 'h': filenames = FN_NONE; break;
1693 case 'i': options |= PCRE_CASELESS; break;
1694 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1695 case 'L': filenames = FN_NOMATCH_ONLY; break;
1696 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1697 case 'n': number = TRUE; break;
1698 case 'o': only_matching = TRUE; break;
1699 case 'q': quiet = TRUE; break;
1700 case 'r': dee_action = dee_RECURSE; break;
1701 case 's': silent = TRUE; break;
1702 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1703 case 'v': invert = TRUE; break;
1704 case 'w': process_options |= PO_WORD_MATCH; break;
1705 case 'x': process_options |= PO_LINE_MATCH; break;
1706
1707 case 'V':
1708 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1709 exit(0);
1710 break;
1711
1712 default:
1713 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1714 exit(usage(2));
1715 }
1716
1717 return options;
1718 }
1719
1720
1721
1722
1723 /*************************************************
1724 * Construct printed ordinal *
1725 *************************************************/
1726
1727 /* This turns a number into "1st", "3rd", etc. */
1728
1729 static char *
1730 ordin(int n)
1731 {
1732 static char buffer[8];
1733 char *p = buffer;
1734 sprintf(p, "%d", n);
1735 while (*p != 0) p++;
1736 switch (n%10)
1737 {
1738 case 1: strcpy(p, "st"); break;
1739 case 2: strcpy(p, "nd"); break;
1740 case 3: strcpy(p, "rd"); break;
1741 default: strcpy(p, "th"); break;
1742 }
1743 return buffer;
1744 }
1745
1746
1747
1748 /*************************************************
1749 * Compile a single pattern *
1750 *************************************************/
1751
1752 /* When the -F option has been used, this is called for each substring.
1753 Otherwise it's called for each supplied pattern.
1754
1755 Arguments:
1756 pattern the pattern string
1757 options the PCRE options
1758 filename the file name, or NULL for a command-line pattern
1759 count 0 if this is the only command line pattern, or
1760 number of the command line pattern, or
1761 linenumber for a pattern from a file
1762
1763 Returns: TRUE on success, FALSE after an error
1764 */
1765
1766 static BOOL
1767 compile_single_pattern(char *pattern, int options, char *filename, int count)
1768 {
1769 char buffer[MBUFTHIRD + 16];
1770 const char *error;
1771 int errptr;
1772
1773 if (pattern_count >= MAX_PATTERN_COUNT)
1774 {
1775 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1776 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1777 return FALSE;
1778 }
1779
1780 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1781 suffix[process_options]);
1782 pattern_list[pattern_count] =
1783 pcre_compile(buffer, options, &error, &errptr, pcretables);
1784 if (pattern_list[pattern_count] != NULL)
1785 {
1786 pattern_count++;
1787 return TRUE;
1788 }
1789
1790 /* Handle compile errors */
1791
1792 errptr -= (int)strlen(prefix[process_options]);
1793 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1794
1795 if (filename == NULL)
1796 {
1797 if (count == 0)
1798 fprintf(stderr, "pcregrep: Error in command-line regex "
1799 "at offset %d: %s\n", errptr, error);
1800 else
1801 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1802 "at offset %d: %s\n", ordin(count), errptr, error);
1803 }
1804 else
1805 {
1806 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1807 "at offset %d: %s\n", count, filename, errptr, error);
1808 }
1809
1810 return FALSE;
1811 }
1812
1813
1814
1815 /*************************************************
1816 * Compile one supplied pattern *
1817 *************************************************/
1818
1819 /* When the -F option has been used, each string may be a list of strings,
1820 separated by line breaks. They will be matched literally.
1821
1822 Arguments:
1823 pattern the pattern string
1824 options the PCRE options
1825 filename the file name, or NULL for a command-line pattern
1826 count 0 if this is the only command line pattern, or
1827 number of the command line pattern, or
1828 linenumber for a pattern from a file
1829
1830 Returns: TRUE on success, FALSE after an error
1831 */
1832
1833 static BOOL
1834 compile_pattern(char *pattern, int options, char *filename, int count)
1835 {
1836 if ((process_options & PO_FIXED_STRINGS) != 0)
1837 {
1838 char *eop = pattern + strlen(pattern);
1839 char buffer[MBUFTHIRD];
1840 for(;;)
1841 {
1842 int ellength;
1843 char *p = end_of_line(pattern, eop, &ellength);
1844 if (ellength == 0)
1845 return compile_single_pattern(pattern, options, filename, count);
1846 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1847 pattern = p;
1848 if (!compile_single_pattern(buffer, options, filename, count))
1849 return FALSE;
1850 }
1851 }
1852 else return compile_single_pattern(pattern, options, filename, count);
1853 }
1854
1855
1856
1857 /*************************************************
1858 * Main program *
1859 *************************************************/
1860
1861 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1862
1863 int
1864 main(int argc, char **argv)
1865 {
1866 int i, j;
1867 int rc = 1;
1868 int pcre_options = 0;
1869 int cmd_pattern_count = 0;
1870 int hint_count = 0;
1871 int errptr;
1872 BOOL only_one_at_top;
1873 char *patterns[MAX_PATTERN_COUNT];
1874 const char *locale_from = "--locale";
1875 const char *error;
1876
1877 /* Set the default line ending value from the default in the PCRE library;
1878 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1879 Note that the return values from pcre_config(), though derived from the ASCII
1880 codes, are the same in EBCDIC environments, so we must use the actual values
1881 rather than escapes such as as '\r'. */
1882
1883 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1884 switch(i)
1885 {
1886 default: newline = (char *)"lf"; break;
1887 case 13: newline = (char *)"cr"; break;
1888 case (13 << 8) | 10: newline = (char *)"crlf"; break;
1889 case -1: newline = (char *)"any"; break;
1890 case -2: newline = (char *)"anycrlf"; break;
1891 }
1892
1893 /* Process the options */
1894
1895 for (i = 1; i < argc; i++)
1896 {
1897 option_item *op = NULL;
1898 char *option_data = (char *)""; /* default to keep compiler happy */
1899 BOOL longop;
1900 BOOL longopwasequals = FALSE;
1901
1902 if (argv[i][0] != '-') break;
1903
1904 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1905 but only if we have previously had -e or -f to define the patterns. */
1906
1907 if (argv[i][1] == 0)
1908 {
1909 if (pattern_filename != NULL || pattern_count > 0) break;
1910 else exit(usage(2));
1911 }
1912
1913 /* Handle a long name option, or -- to terminate the options */
1914
1915 if (argv[i][1] == '-')
1916 {
1917 char *arg = argv[i] + 2;
1918 char *argequals = strchr(arg, '=');
1919
1920 if (*arg == 0) /* -- terminates options */
1921 {
1922 i++;
1923 break; /* out of the options-handling loop */
1924 }
1925
1926 longop = TRUE;
1927
1928 /* Some long options have data that follows after =, for example file=name.
1929 Some options have variations in the long name spelling: specifically, we
1930 allow "regexp" because GNU grep allows it, though I personally go along
1931 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1932 These options are entered in the table as "regex(p)". Options can be in
1933 both these categories. */
1934
1935 for (op = optionlist; op->one_char != 0; op++)
1936 {
1937 char *opbra = strchr(op->long_name, '(');
1938 char *equals = strchr(op->long_name, '=');
1939
1940 /* Handle options with only one spelling of the name */
1941
1942 if (opbra == NULL) /* Does not contain '(' */
1943 {
1944 if (equals == NULL) /* Not thing=data case */
1945 {
1946 if (strcmp(arg, op->long_name) == 0) break;
1947 }
1948 else /* Special case xxx=data */
1949 {
1950 int oplen = equals - op->long_name;
1951 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1952 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1953 {
1954 option_data = arg + arglen;
1955 if (*option_data == '=')
1956 {
1957 option_data++;
1958 longopwasequals = TRUE;
1959 }
1960 break;
1961 }
1962 }
1963 }
1964
1965 /* Handle options with an alternate spelling of the name */
1966
1967 else
1968 {
1969 char buff1[24];
1970 char buff2[24];
1971
1972 int baselen = opbra - op->long_name;
1973 int fulllen = strchr(op->long_name, ')') - op->long_name + 1;
1974 int arglen = (argequals == NULL || equals == NULL)?
1975 (int)strlen(arg) : argequals - arg;
1976
1977 sprintf(buff1, "%.*s", baselen, op->long_name);
1978 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
1979
1980 if (strncmp(arg, buff1, arglen) == 0 ||
1981 strncmp(arg, buff2, arglen) == 0)
1982 {
1983 if (equals != NULL && argequals != NULL)
1984 {
1985 option_data = argequals;
1986 if (*option_data == '=')
1987 {
1988 option_data++;
1989 longopwasequals = TRUE;
1990 }
1991 }
1992 break;
1993 }
1994 }
1995 }
1996
1997 if (op->one_char == 0)
1998 {
1999 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2000 exit(usage(2));
2001 }
2002 }
2003
2004 /* Jeffrey Friedl's debugging harness uses these additional options which
2005 are not in the right form for putting in the option table because they use
2006 only one hyphen, yet are more than one character long. By putting them
2007 separately here, they will not get displayed as part of the help() output,
2008 but I don't think Jeffrey will care about that. */
2009
2010 #ifdef JFRIEDL_DEBUG
2011 else if (strcmp(argv[i], "-pre") == 0) {
2012 jfriedl_prefix = argv[++i];
2013 continue;
2014 } else if (strcmp(argv[i], "-post") == 0) {
2015 jfriedl_postfix = argv[++i];
2016 continue;
2017 } else if (strcmp(argv[i], "-XT") == 0) {
2018 sscanf(argv[++i], "%d", &jfriedl_XT);
2019 continue;
2020 } else if (strcmp(argv[i], "-XR") == 0) {
2021 sscanf(argv[++i], "%d", &jfriedl_XR);
2022 continue;
2023 }
2024 #endif
2025
2026
2027 /* One-char options; many that have no data may be in a single argument; we
2028 continue till we hit the last one or one that needs data. */
2029
2030 else
2031 {
2032 char *s = argv[i] + 1;
2033 longop = FALSE;
2034 while (*s != 0)
2035 {
2036 for (op = optionlist; op->one_char != 0; op++)
2037 { if (*s == op->one_char) break; }
2038 if (op->one_char == 0)
2039 {
2040 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2041 *s, argv[i]);
2042 exit(usage(2));
2043 }
2044 if (op->type != OP_NODATA || s[1] == 0)
2045 {
2046 option_data = s+1;
2047 break;
2048 }
2049 pcre_options = handle_option(*s++, pcre_options);
2050 }
2051 }
2052
2053 /* At this point we should have op pointing to a matched option. If the type
2054 is NO_DATA, it means that there is no data, and the option might set
2055 something in the PCRE options. */
2056
2057 if (op->type == OP_NODATA)
2058 {
2059 pcre_options = handle_option(op->one_char, pcre_options);
2060 continue;
2061 }
2062
2063 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2064 either has a value or defaults to something. It cannot have data in a
2065 separate item. At the moment, the only such options are "colo(u)r" and
2066 Jeffrey Friedl's special -S debugging option. */
2067
2068 if (*option_data == 0 &&
2069 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2070 {
2071 switch (op->one_char)
2072 {
2073 case N_COLOUR:
2074 colour_option = (char *)"auto";
2075 break;
2076 #ifdef JFRIEDL_DEBUG
2077 case 'S':
2078 S_arg = 0;
2079 break;
2080 #endif
2081 }
2082 continue;
2083 }
2084
2085 /* Otherwise, find the data string for the option. */
2086
2087 if (*option_data == 0)
2088 {
2089 if (i >= argc - 1 || longopwasequals)
2090 {
2091 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2092 exit(usage(2));
2093 }
2094 option_data = argv[++i];
2095 }
2096
2097 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2098 multiple times to create a list of patterns. */
2099
2100 if (op->type == OP_PATLIST)
2101 {
2102 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2103 {
2104 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2105 MAX_PATTERN_COUNT);
2106 return 2;
2107 }
2108 patterns[cmd_pattern_count++] = option_data;
2109 }
2110
2111 /* Otherwise, deal with single string or numeric data values. */
2112
2113 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2114 {
2115 *((char **)op->dataptr) = option_data;
2116 }
2117 else
2118 {
2119 char *endptr;
2120 int n = strtoul(option_data, &endptr, 10);
2121 if (*endptr != 0)
2122 {
2123 if (longop)
2124 {
2125 char *equals = strchr(op->long_name, '=');
2126 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2127 equals - op->long_name;
2128 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2129 option_data, nlen, op->long_name);
2130 }
2131 else
2132 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2133 option_data, op->one_char);
2134 exit(usage(2));
2135 }
2136 *((int *)op->dataptr) = n;
2137 }
2138 }
2139
2140 /* Options have been decoded. If -C was used, its value is used as a default
2141 for -A and -B. */
2142
2143 if (both_context > 0)
2144 {
2145 if (after_context == 0) after_context = both_context;
2146 if (before_context == 0) before_context = both_context;
2147 }
2148
2149 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2150 However, the latter two set the only_matching flag. */
2151
2152 if ((only_matching && (file_offsets || line_offsets)) ||
2153 (file_offsets && line_offsets))
2154 {
2155 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2156 "and/or --line-offsets\n");
2157 exit(usage(2));
2158 }
2159
2160 if (file_offsets || line_offsets) only_matching = TRUE;
2161
2162 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2163 LC_ALL environment variable is set, and if so, use it. */
2164
2165 if (locale == NULL)
2166 {
2167 locale = getenv("LC_ALL");
2168 locale_from = "LCC_ALL";
2169 }
2170
2171 if (locale == NULL)
2172 {
2173 locale = getenv("LC_CTYPE");
2174 locale_from = "LC_CTYPE";
2175 }
2176
2177 /* If a locale has been provided, set it, and generate the tables the PCRE
2178 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2179
2180 if (locale != NULL)
2181 {
2182 if (setlocale(LC_CTYPE, locale) == NULL)
2183 {
2184 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2185 locale, locale_from);
2186 return 2;
2187 }
2188 pcretables = pcre_maketables();
2189 }
2190
2191 /* Sort out colouring */
2192
2193 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2194 {
2195 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2196 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2197 else
2198 {
2199 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2200 colour_option);
2201 return 2;
2202 }
2203 if (do_colour)
2204 {
2205 char *cs = getenv("PCREGREP_COLOUR");
2206 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2207 if (cs != NULL) colour_string = cs;
2208 }
2209 }
2210
2211 /* Interpret the newline type; the default settings are Unix-like. */
2212
2213 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2214 {
2215 pcre_options |= PCRE_NEWLINE_CR;
2216 endlinetype = EL_CR;
2217 }
2218 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2219 {
2220 pcre_options |= PCRE_NEWLINE_LF;
2221 endlinetype = EL_LF;
2222 }
2223 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2224 {
2225 pcre_options |= PCRE_NEWLINE_CRLF;
2226 endlinetype = EL_CRLF;
2227 }
2228 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2229 {
2230 pcre_options |= PCRE_NEWLINE_ANY;
2231 endlinetype = EL_ANY;
2232 }
2233 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2234 {
2235 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2236 endlinetype = EL_ANYCRLF;
2237 }
2238 else
2239 {
2240 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2241 return 2;
2242 }
2243
2244 /* Interpret the text values for -d and -D */
2245
2246 if (dee_option != NULL)
2247 {
2248 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2249 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2250 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2251 else
2252 {
2253 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2254 return 2;
2255 }
2256 }
2257
2258 if (DEE_option != NULL)
2259 {
2260 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2261 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2262 else
2263 {
2264 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2265 return 2;
2266 }
2267 }
2268
2269 /* Check the values for Jeffrey Friedl's debugging options. */
2270
2271 #ifdef JFRIEDL_DEBUG
2272 if (S_arg > 9)
2273 {
2274 fprintf(stderr, "pcregrep: bad value for -S option\n");
2275 return 2;
2276 }
2277 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2278 {
2279 if (jfriedl_XT == 0) jfriedl_XT = 1;
2280 if (jfriedl_XR == 0) jfriedl_XR = 1;
2281 }
2282 #endif
2283
2284 /* Get memory to store the pattern and hints lists. */
2285
2286 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2287 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2288
2289 if (pattern_list == NULL || hints_list == NULL)
2290 {
2291 fprintf(stderr, "pcregrep: malloc failed\n");
2292 goto EXIT2;
2293 }
2294
2295 /* If no patterns were provided by -e, and there is no file provided by -f,
2296 the first argument is the one and only pattern, and it must exist. */
2297
2298 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2299 {
2300 if (i >= argc) return usage(2);
2301 patterns[cmd_pattern_count++] = argv[i++];
2302 }
2303
2304 /* Compile the patterns that were provided on the command line, either by
2305 multiple uses of -e or as a single unkeyed pattern. */
2306
2307 for (j = 0; j < cmd_pattern_count; j++)
2308 {
2309 if (!compile_pattern(patterns[j], pcre_options, NULL,
2310 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2311 goto EXIT2;
2312 }
2313
2314 /* Compile the regular expressions that are provided in a file. */
2315
2316 if (pattern_filename != NULL)
2317 {
2318 int linenumber = 0;
2319 FILE *f;
2320 char *filename;
2321 char buffer[MBUFTHIRD];
2322
2323 if (strcmp(pattern_filename, "-") == 0)
2324 {
2325 f = stdin;
2326 filename = stdin_name;
2327 }
2328 else
2329 {
2330 f = fopen(pattern_filename, "r");
2331 if (f == NULL)
2332 {
2333 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2334 strerror(errno));
2335 goto EXIT2;
2336 }
2337 filename = pattern_filename;
2338 }
2339
2340 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2341 {
2342 char *s = buffer + (int)strlen(buffer);
2343 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2344 *s = 0;
2345 linenumber++;
2346 if (buffer[0] == 0) continue; /* Skip blank lines */
2347 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2348 goto EXIT2;
2349 }
2350
2351 if (f != stdin) fclose(f);
2352 }
2353
2354 /* Study the regular expressions, as we will be running them many times */
2355
2356 for (j = 0; j < pattern_count; j++)
2357 {
2358 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2359 if (error != NULL)
2360 {
2361 char s[16];
2362 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2363 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2364 goto EXIT2;
2365 }
2366 hint_count++;
2367 }
2368
2369 /* If there are include or exclude patterns, compile them. */
2370
2371 if (exclude_pattern != NULL)
2372 {
2373 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2374 pcretables);
2375 if (exclude_compiled == NULL)
2376 {
2377 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2378 errptr, error);
2379 goto EXIT2;
2380 }
2381 }
2382
2383 if (include_pattern != NULL)
2384 {
2385 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2386 pcretables);
2387 if (include_compiled == NULL)
2388 {
2389 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2390 errptr, error);
2391 goto EXIT2;
2392 }
2393 }
2394
2395 if (exclude_dir_pattern != NULL)
2396 {
2397 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2398 pcretables);
2399 if (exclude_dir_compiled == NULL)
2400 {
2401 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2402 errptr, error);
2403 goto EXIT2;
2404 }
2405 }
2406
2407 if (include_dir_pattern != NULL)
2408 {
2409 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2410 pcretables);
2411 if (include_dir_compiled == NULL)
2412 {
2413 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2414 errptr, error);
2415 goto EXIT2;
2416 }
2417 }
2418
2419 /* If there are no further arguments, do the business on stdin and exit. */
2420
2421 if (i >= argc)
2422 {
2423 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2424 goto EXIT;
2425 }
2426
2427 /* Otherwise, work through the remaining arguments as files or directories.
2428 Pass in the fact that there is only one argument at top level - this suppresses
2429 the file name if the argument is not a directory and filenames are not
2430 otherwise forced. */
2431
2432 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2433
2434 for (; i < argc; i++)
2435 {
2436 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2437 only_one_at_top);
2438 if (frc > 1) rc = frc;
2439 else if (frc == 0 && rc == 1) rc = 0;
2440 }
2441
2442 EXIT:
2443 if (pattern_list != NULL)
2444 {
2445 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2446 free(pattern_list);
2447 }
2448 if (hints_list != NULL)
2449 {
2450 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2451 free(hints_list);
2452 }
2453 return rc;
2454
2455 EXIT2:
2456 rc = 2;
2457 goto EXIT;
2458 }
2459
2460 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12