/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 379 - (show annotations) (download)
Mon Mar 2 20:30:05 2009 UTC (5 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 68940 byte(s)
Lock out empty string matches in pcregrep.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2009 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define MBUFTHIRD BUFSIZ
78 #else
79 #define MBUFTHIRD 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107
108
109 /*************************************************
110 * Global variables *
111 *************************************************/
112
113 /* Jeffrey Friedl has some debugging requirements that are not part of the
114 regular code. */
115
116 #ifdef JFRIEDL_DEBUG
117 static int S_arg = -1;
118 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
119 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
120 static const char *jfriedl_prefix = "";
121 static const char *jfriedl_postfix = "";
122 #endif
123
124 static int endlinetype;
125
126 static char *colour_string = (char *)"1;31";
127 static char *colour_option = NULL;
128 static char *dee_option = NULL;
129 static char *DEE_option = NULL;
130 static char *newline = NULL;
131 static char *pattern_filename = NULL;
132 static char *stdin_name = (char *)"(standard input)";
133 static char *locale = NULL;
134
135 static const unsigned char *pcretables = NULL;
136
137 static int pattern_count = 0;
138 static pcre **pattern_list = NULL;
139 static pcre_extra **hints_list = NULL;
140
141 static char *include_pattern = NULL;
142 static char *exclude_pattern = NULL;
143 static char *include_dir_pattern = NULL;
144 static char *exclude_dir_pattern = NULL;
145
146 static pcre *include_compiled = NULL;
147 static pcre *exclude_compiled = NULL;
148 static pcre *include_dir_compiled = NULL;
149 static pcre *exclude_dir_compiled = NULL;
150
151 static int after_context = 0;
152 static int before_context = 0;
153 static int both_context = 0;
154 static int dee_action = dee_READ;
155 static int DEE_action = DEE_READ;
156 static int error_count = 0;
157 static int filenames = FN_DEFAULT;
158 static int process_options = 0;
159
160 static BOOL count_only = FALSE;
161 static BOOL do_colour = FALSE;
162 static BOOL file_offsets = FALSE;
163 static BOOL hyphenpending = FALSE;
164 static BOOL invert = FALSE;
165 static BOOL line_offsets = FALSE;
166 static BOOL multiline = FALSE;
167 static BOOL number = FALSE;
168 static BOOL only_matching = FALSE;
169 static BOOL quiet = FALSE;
170 static BOOL silent = FALSE;
171 static BOOL utf8 = FALSE;
172
173 /* Structure for options and list of them */
174
175 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
176 OP_PATLIST };
177
178 typedef struct option_item {
179 int type;
180 int one_char;
181 void *dataptr;
182 const char *long_name;
183 const char *help_text;
184 } option_item;
185
186 /* Options without a single-letter equivalent get a negative value. This can be
187 used to identify them. */
188
189 #define N_COLOUR (-1)
190 #define N_EXCLUDE (-2)
191 #define N_EXCLUDE_DIR (-3)
192 #define N_HELP (-4)
193 #define N_INCLUDE (-5)
194 #define N_INCLUDE_DIR (-6)
195 #define N_LABEL (-7)
196 #define N_LOCALE (-8)
197 #define N_NULL (-9)
198 #define N_LOFFSETS (-10)
199 #define N_FOFFSETS (-11)
200
201 static option_item optionlist[] = {
202 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
203 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
204 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
205 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
206 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
207 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
208 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
209 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
210 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
211 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
212 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
213 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
214 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
215 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
216 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
217 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
218 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
219 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
220 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
221 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
222 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
223 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
224 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
225 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
226 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
227 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
228 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
229 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
230 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
231 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
232 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
233 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
234 #ifdef JFRIEDL_DEBUG
235 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
236 #endif
237 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
238 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
239 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
240 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
241 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
242 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
243 { OP_NODATA, 0, NULL, NULL, NULL }
244 };
245
246 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
247 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
248 that the combination of -w and -x has the same effect as -x on its own, so we
249 can treat them as the same. */
250
251 static const char *prefix[] = {
252 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
253
254 static const char *suffix[] = {
255 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
256
257 /* UTF-8 tables - used only when the newline setting is "any". */
258
259 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
260
261 const char utf8_table4[] = {
262 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
264 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
265 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
266
267
268
269 /*************************************************
270 * OS-specific functions *
271 *************************************************/
272
273 /* These functions are defined so that they can be made system specific,
274 although at present the only ones are for Unix, Win32, and for "no support". */
275
276
277 /************* Directory scanning in Unix ***********/
278
279 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
280 #include <sys/types.h>
281 #include <sys/stat.h>
282 #include <dirent.h>
283
284 typedef DIR directory_type;
285
286 static int
287 isdirectory(char *filename)
288 {
289 struct stat statbuf;
290 if (stat(filename, &statbuf) < 0)
291 return 0; /* In the expectation that opening as a file will fail */
292 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
293 }
294
295 static directory_type *
296 opendirectory(char *filename)
297 {
298 return opendir(filename);
299 }
300
301 static char *
302 readdirectory(directory_type *dir)
303 {
304 for (;;)
305 {
306 struct dirent *dent = readdir(dir);
307 if (dent == NULL) return NULL;
308 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
309 return dent->d_name;
310 }
311 /* Control never reaches here */
312 }
313
314 static void
315 closedirectory(directory_type *dir)
316 {
317 closedir(dir);
318 }
319
320
321 /************* Test for regular file in Unix **********/
322
323 static int
324 isregfile(char *filename)
325 {
326 struct stat statbuf;
327 if (stat(filename, &statbuf) < 0)
328 return 1; /* In the expectation that opening as a file will fail */
329 return (statbuf.st_mode & S_IFMT) == S_IFREG;
330 }
331
332
333 /************* Test stdout for being a terminal in Unix **********/
334
335 static BOOL
336 is_stdout_tty(void)
337 {
338 return isatty(fileno(stdout));
339 }
340
341
342 /************* Directory scanning in Win32 ***********/
343
344 /* I (Philip Hazel) have no means of testing this code. It was contributed by
345 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
346 when it did not exist. David Byron added a patch that moved the #include of
347 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
348 */
349
350 #elif HAVE_WINDOWS_H
351
352 #ifndef STRICT
353 # define STRICT
354 #endif
355 #ifndef WIN32_LEAN_AND_MEAN
356 # define WIN32_LEAN_AND_MEAN
357 #endif
358
359 #include <windows.h>
360
361 #ifndef INVALID_FILE_ATTRIBUTES
362 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
363 #endif
364
365 typedef struct directory_type
366 {
367 HANDLE handle;
368 BOOL first;
369 WIN32_FIND_DATA data;
370 } directory_type;
371
372 int
373 isdirectory(char *filename)
374 {
375 DWORD attr = GetFileAttributes(filename);
376 if (attr == INVALID_FILE_ATTRIBUTES)
377 return 0;
378 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
379 }
380
381 directory_type *
382 opendirectory(char *filename)
383 {
384 size_t len;
385 char *pattern;
386 directory_type *dir;
387 DWORD err;
388 len = strlen(filename);
389 pattern = (char *) malloc(len + 3);
390 dir = (directory_type *) malloc(sizeof(*dir));
391 if ((pattern == NULL) || (dir == NULL))
392 {
393 fprintf(stderr, "pcregrep: malloc failed\n");
394 exit(2);
395 }
396 memcpy(pattern, filename, len);
397 memcpy(&(pattern[len]), "\\*", 3);
398 dir->handle = FindFirstFile(pattern, &(dir->data));
399 if (dir->handle != INVALID_HANDLE_VALUE)
400 {
401 free(pattern);
402 dir->first = TRUE;
403 return dir;
404 }
405 err = GetLastError();
406 free(pattern);
407 free(dir);
408 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
409 return NULL;
410 }
411
412 char *
413 readdirectory(directory_type *dir)
414 {
415 for (;;)
416 {
417 if (!dir->first)
418 {
419 if (!FindNextFile(dir->handle, &(dir->data)))
420 return NULL;
421 }
422 else
423 {
424 dir->first = FALSE;
425 }
426 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
427 return dir->data.cFileName;
428 }
429 #ifndef _MSC_VER
430 return NULL; /* Keep compiler happy; never executed */
431 #endif
432 }
433
434 void
435 closedirectory(directory_type *dir)
436 {
437 FindClose(dir->handle);
438 free(dir);
439 }
440
441
442 /************* Test for regular file in Win32 **********/
443
444 /* I don't know how to do this, or if it can be done; assume all paths are
445 regular if they are not directories. */
446
447 int isregfile(char *filename)
448 {
449 return !isdirectory(filename);
450 }
451
452
453 /************* Test stdout for being a terminal in Win32 **********/
454
455 /* I don't know how to do this; assume never */
456
457 static BOOL
458 is_stdout_tty(void)
459 {
460 return FALSE;
461 }
462
463
464 /************* Directory scanning when we can't do it ***********/
465
466 /* The type is void, and apart from isdirectory(), the functions do nothing. */
467
468 #else
469
470 typedef void directory_type;
471
472 int isdirectory(char *filename) { return 0; }
473 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
474 char *readdirectory(directory_type *dir) { return (char*)0;}
475 void closedirectory(directory_type *dir) {}
476
477
478 /************* Test for regular when we can't do it **********/
479
480 /* Assume all files are regular. */
481
482 int isregfile(char *filename) { return 1; }
483
484
485 /************* Test stdout for being a terminal when we can't do it **********/
486
487 static BOOL
488 is_stdout_tty(void)
489 {
490 return FALSE;
491 }
492
493
494 #endif
495
496
497
498 #ifndef HAVE_STRERROR
499 /*************************************************
500 * Provide strerror() for non-ANSI libraries *
501 *************************************************/
502
503 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
504 in their libraries, but can provide the same facility by this simple
505 alternative function. */
506
507 extern int sys_nerr;
508 extern char *sys_errlist[];
509
510 char *
511 strerror(int n)
512 {
513 if (n < 0 || n >= sys_nerr) return "unknown error number";
514 return sys_errlist[n];
515 }
516 #endif /* HAVE_STRERROR */
517
518
519
520 /*************************************************
521 * Find end of line *
522 *************************************************/
523
524 /* The length of the endline sequence that is found is set via lenptr. This may
525 be zero at the very end of the file if there is no line-ending sequence there.
526
527 Arguments:
528 p current position in line
529 endptr end of available data
530 lenptr where to put the length of the eol sequence
531
532 Returns: pointer to the last byte of the line
533 */
534
535 static char *
536 end_of_line(char *p, char *endptr, int *lenptr)
537 {
538 switch(endlinetype)
539 {
540 default: /* Just in case */
541 case EL_LF:
542 while (p < endptr && *p != '\n') p++;
543 if (p < endptr)
544 {
545 *lenptr = 1;
546 return p + 1;
547 }
548 *lenptr = 0;
549 return endptr;
550
551 case EL_CR:
552 while (p < endptr && *p != '\r') p++;
553 if (p < endptr)
554 {
555 *lenptr = 1;
556 return p + 1;
557 }
558 *lenptr = 0;
559 return endptr;
560
561 case EL_CRLF:
562 for (;;)
563 {
564 while (p < endptr && *p != '\r') p++;
565 if (++p >= endptr)
566 {
567 *lenptr = 0;
568 return endptr;
569 }
570 if (*p == '\n')
571 {
572 *lenptr = 2;
573 return p + 1;
574 }
575 }
576 break;
577
578 case EL_ANYCRLF:
579 while (p < endptr)
580 {
581 int extra = 0;
582 register int c = *((unsigned char *)p);
583
584 if (utf8 && c >= 0xc0)
585 {
586 int gcii, gcss;
587 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
588 gcss = 6*extra;
589 c = (c & utf8_table3[extra]) << gcss;
590 for (gcii = 1; gcii <= extra; gcii++)
591 {
592 gcss -= 6;
593 c |= (p[gcii] & 0x3f) << gcss;
594 }
595 }
596
597 p += 1 + extra;
598
599 switch (c)
600 {
601 case 0x0a: /* LF */
602 *lenptr = 1;
603 return p;
604
605 case 0x0d: /* CR */
606 if (p < endptr && *p == 0x0a)
607 {
608 *lenptr = 2;
609 p++;
610 }
611 else *lenptr = 1;
612 return p;
613
614 default:
615 break;
616 }
617 } /* End of loop for ANYCRLF case */
618
619 *lenptr = 0; /* Must have hit the end */
620 return endptr;
621
622 case EL_ANY:
623 while (p < endptr)
624 {
625 int extra = 0;
626 register int c = *((unsigned char *)p);
627
628 if (utf8 && c >= 0xc0)
629 {
630 int gcii, gcss;
631 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
632 gcss = 6*extra;
633 c = (c & utf8_table3[extra]) << gcss;
634 for (gcii = 1; gcii <= extra; gcii++)
635 {
636 gcss -= 6;
637 c |= (p[gcii] & 0x3f) << gcss;
638 }
639 }
640
641 p += 1 + extra;
642
643 switch (c)
644 {
645 case 0x0a: /* LF */
646 case 0x0b: /* VT */
647 case 0x0c: /* FF */
648 *lenptr = 1;
649 return p;
650
651 case 0x0d: /* CR */
652 if (p < endptr && *p == 0x0a)
653 {
654 *lenptr = 2;
655 p++;
656 }
657 else *lenptr = 1;
658 return p;
659
660 case 0x85: /* NEL */
661 *lenptr = utf8? 2 : 1;
662 return p;
663
664 case 0x2028: /* LS */
665 case 0x2029: /* PS */
666 *lenptr = 3;
667 return p;
668
669 default:
670 break;
671 }
672 } /* End of loop for ANY case */
673
674 *lenptr = 0; /* Must have hit the end */
675 return endptr;
676 } /* End of overall switch */
677 }
678
679
680
681 /*************************************************
682 * Find start of previous line *
683 *************************************************/
684
685 /* This is called when looking back for before lines to print.
686
687 Arguments:
688 p start of the subsequent line
689 startptr start of available data
690
691 Returns: pointer to the start of the previous line
692 */
693
694 static char *
695 previous_line(char *p, char *startptr)
696 {
697 switch(endlinetype)
698 {
699 default: /* Just in case */
700 case EL_LF:
701 p--;
702 while (p > startptr && p[-1] != '\n') p--;
703 return p;
704
705 case EL_CR:
706 p--;
707 while (p > startptr && p[-1] != '\n') p--;
708 return p;
709
710 case EL_CRLF:
711 for (;;)
712 {
713 p -= 2;
714 while (p > startptr && p[-1] != '\n') p--;
715 if (p <= startptr + 1 || p[-2] == '\r') return p;
716 }
717 return p; /* But control should never get here */
718
719 case EL_ANY:
720 case EL_ANYCRLF:
721 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
722 if (utf8) while ((*p & 0xc0) == 0x80) p--;
723
724 while (p > startptr)
725 {
726 register int c;
727 char *pp = p - 1;
728
729 if (utf8)
730 {
731 int extra = 0;
732 while ((*pp & 0xc0) == 0x80) pp--;
733 c = *((unsigned char *)pp);
734 if (c >= 0xc0)
735 {
736 int gcii, gcss;
737 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
738 gcss = 6*extra;
739 c = (c & utf8_table3[extra]) << gcss;
740 for (gcii = 1; gcii <= extra; gcii++)
741 {
742 gcss -= 6;
743 c |= (pp[gcii] & 0x3f) << gcss;
744 }
745 }
746 }
747 else c = *((unsigned char *)pp);
748
749 if (endlinetype == EL_ANYCRLF) switch (c)
750 {
751 case 0x0a: /* LF */
752 case 0x0d: /* CR */
753 return p;
754
755 default:
756 break;
757 }
758
759 else switch (c)
760 {
761 case 0x0a: /* LF */
762 case 0x0b: /* VT */
763 case 0x0c: /* FF */
764 case 0x0d: /* CR */
765 case 0x85: /* NEL */
766 case 0x2028: /* LS */
767 case 0x2029: /* PS */
768 return p;
769
770 default:
771 break;
772 }
773
774 p = pp; /* Back one character */
775 } /* End of loop for ANY case */
776
777 return startptr; /* Hit start of data */
778 } /* End of overall switch */
779 }
780
781
782
783
784
785 /*************************************************
786 * Print the previous "after" lines *
787 *************************************************/
788
789 /* This is called if we are about to lose said lines because of buffer filling,
790 and at the end of the file. The data in the line is written using fwrite() so
791 that a binary zero does not terminate it.
792
793 Arguments:
794 lastmatchnumber the number of the last matching line, plus one
795 lastmatchrestart where we restarted after the last match
796 endptr end of available data
797 printname filename for printing
798
799 Returns: nothing
800 */
801
802 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
803 char *endptr, char *printname)
804 {
805 if (after_context > 0 && lastmatchnumber > 0)
806 {
807 int count = 0;
808 while (lastmatchrestart < endptr && count++ < after_context)
809 {
810 int ellength;
811 char *pp = lastmatchrestart;
812 if (printname != NULL) fprintf(stdout, "%s-", printname);
813 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
814 pp = end_of_line(pp, endptr, &ellength);
815 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
816 lastmatchrestart = pp;
817 }
818 hyphenpending = TRUE;
819 }
820 }
821
822
823
824 /*************************************************
825 * Apply patterns to subject till one matches *
826 *************************************************/
827
828 /* This function is called to run through all patterns, looking for a match. It
829 is used multiple times for the same subject when colouring is enabled, in order
830 to find all possible matches.
831
832 Arguments:
833 matchptr the start of the subject
834 length the length of the subject to match
835 offsets the offets vector to fill in
836 mrc address of where to put the result of pcre_exec()
837
838 Returns: TRUE if there was a match
839 FALSE if there was no match
840 invert if there was a non-fatal error
841 */
842
843 static BOOL
844 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
845 {
846 int i;
847 for (i = 0; i < pattern_count; i++)
848 {
849 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
850 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
851 if (*mrc >= 0) return TRUE;
852 if (*mrc == PCRE_ERROR_NOMATCH) continue;
853 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
854 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
855 fprintf(stderr, "this text:\n");
856 fwrite(matchptr, 1, length, stderr); /* In case binary zero included */
857 fprintf(stderr, "\n");
858 if (error_count == 0 &&
859 (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
860 {
861 fprintf(stderr, "pcregrep: error %d means that a resource limit "
862 "was exceeded\n", *mrc);
863 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
864 }
865 if (error_count++ > 20)
866 {
867 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
868 exit(2);
869 }
870 return invert; /* No more matching; don't show the line again */
871 }
872
873 return FALSE; /* No match, no errors */
874 }
875
876
877
878 /*************************************************
879 * Grep an individual file *
880 *************************************************/
881
882 /* This is called from grep_or_recurse() below. It uses a buffer that is three
883 times the value of MBUFTHIRD. The matching point is never allowed to stray into
884 the top third of the buffer, thus keeping more of the file available for
885 context printing or for multiline scanning. For large files, the pointer will
886 be in the middle third most of the time, so the bottom third is available for
887 "before" context printing.
888
889 Arguments:
890 handle the fopened FILE stream for a normal file
891 the gzFile pointer when reading is via libz
892 the BZFILE pointer when reading is via libbz2
893 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
894 printname the file name if it is to be printed for each match
895 or NULL if the file name is not to be printed
896 it cannot be NULL if filenames[_nomatch]_only is set
897
898 Returns: 0 if there was at least one match
899 1 otherwise (no matches)
900 2 if there is a read error on a .bz2 file
901 */
902
903 static int
904 pcregrep(void *handle, int frtype, char *printname)
905 {
906 int rc = 1;
907 int linenumber = 1;
908 int lastmatchnumber = 0;
909 int count = 0;
910 int filepos = 0;
911 int offsets[OFFSET_SIZE];
912 char *lastmatchrestart = NULL;
913 char buffer[3*MBUFTHIRD];
914 char *ptr = buffer;
915 char *endptr;
916 size_t bufflength;
917 BOOL endhyphenpending = FALSE;
918 FILE *in = NULL; /* Ensure initialized */
919
920 #ifdef SUPPORT_LIBZ
921 gzFile ingz = NULL;
922 #endif
923
924 #ifdef SUPPORT_LIBBZ2
925 BZFILE *inbz2 = NULL;
926 #endif
927
928
929 /* Do the first read into the start of the buffer and set up the pointer to end
930 of what we have. In the case of libz, a non-zipped .gz file will be read as a
931 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
932 fail. */
933
934 #ifdef SUPPORT_LIBZ
935 if (frtype == FR_LIBZ)
936 {
937 ingz = (gzFile)handle;
938 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
939 }
940 else
941 #endif
942
943 #ifdef SUPPORT_LIBBZ2
944 if (frtype == FR_LIBBZ2)
945 {
946 inbz2 = (BZFILE *)handle;
947 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
948 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
949 } /* without the cast it is unsigned. */
950 else
951 #endif
952
953 {
954 in = (FILE *)handle;
955 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
956 }
957
958 endptr = buffer + bufflength;
959
960 /* Loop while the current pointer is not at the end of the file. For large
961 files, endptr will be at the end of the buffer when we are in the middle of the
962 file, but ptr will never get there, because as soon as it gets over 2/3 of the
963 way, the buffer is shifted left and re-filled. */
964
965 while (ptr < endptr)
966 {
967 int endlinelength;
968 int mrc = 0;
969 BOOL match;
970 char *matchptr = ptr;
971 char *t = ptr;
972 size_t length, linelength;
973
974 /* At this point, ptr is at the start of a line. We need to find the length
975 of the subject string to pass to pcre_exec(). In multiline mode, it is the
976 length remainder of the data in the buffer. Otherwise, it is the length of
977 the next line, excluding the terminating newline. After matching, we always
978 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
979 option is used for compiling, so that any match is constrained to be in the
980 first line. */
981
982 t = end_of_line(t, endptr, &endlinelength);
983 linelength = t - ptr - endlinelength;
984 length = multiline? (size_t)(endptr - ptr) : linelength;
985
986 /* Extra processing for Jeffrey Friedl's debugging. */
987
988 #ifdef JFRIEDL_DEBUG
989 if (jfriedl_XT || jfriedl_XR)
990 {
991 #include <sys/time.h>
992 #include <time.h>
993 struct timeval start_time, end_time;
994 struct timezone dummy;
995 int i;
996
997 if (jfriedl_XT)
998 {
999 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1000 const char *orig = ptr;
1001 ptr = malloc(newlen + 1);
1002 if (!ptr) {
1003 printf("out of memory");
1004 exit(2);
1005 }
1006 endptr = ptr;
1007 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1008 for (i = 0; i < jfriedl_XT; i++) {
1009 strncpy(endptr, orig, length);
1010 endptr += length;
1011 }
1012 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1013 length = newlen;
1014 }
1015
1016 if (gettimeofday(&start_time, &dummy) != 0)
1017 perror("bad gettimeofday");
1018
1019
1020 for (i = 0; i < jfriedl_XR; i++)
1021 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1022 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1023
1024 if (gettimeofday(&end_time, &dummy) != 0)
1025 perror("bad gettimeofday");
1026
1027 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1028 -
1029 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1030
1031 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1032 return 0;
1033 }
1034 #endif
1035
1036 /* We come back here after a match when the -o option (only_matching) is set,
1037 in order to find any further matches in the same line. */
1038
1039 ONLY_MATCHING_RESTART:
1040
1041 /* Run through all the patterns until one matches or there is an error other
1042 than NOMATCH. This code is in a subroutine so that it can be re-used for
1043 finding subsequent matches when colouring matched lines. */
1044
1045 match = match_patterns(matchptr, length, offsets, &mrc);
1046
1047 /* If it's a match or a not-match (as required), do what's wanted. */
1048
1049 if (match != invert)
1050 {
1051 BOOL hyphenprinted = FALSE;
1052
1053 /* We've failed if we want a file that doesn't have any matches. */
1054
1055 if (filenames == FN_NOMATCH_ONLY) return 1;
1056
1057 /* Just count if just counting is wanted. */
1058
1059 if (count_only) count++;
1060
1061 /* If all we want is a file name, there is no need to scan any more lines
1062 in the file. */
1063
1064 else if (filenames == FN_ONLY)
1065 {
1066 fprintf(stdout, "%s\n", printname);
1067 return 0;
1068 }
1069
1070 /* Likewise, if all we want is a yes/no answer. */
1071
1072 else if (quiet) return 0;
1073
1074 /* The --only-matching option prints just the substring that matched, and
1075 the --file-offsets and --line-offsets options output offsets for the
1076 matching substring (they both force --only-matching). None of these options
1077 prints any context. Afterwards, adjust the start and length, and then jump
1078 back to look for further matches in the same line. If we are in invert
1079 mode, however, nothing is printed - this could be still useful because the
1080 return code is set. */
1081
1082 else if (only_matching)
1083 {
1084 if (!invert)
1085 {
1086 if (printname != NULL) fprintf(stdout, "%s:", printname);
1087 if (number) fprintf(stdout, "%d:", linenumber);
1088 if (line_offsets)
1089 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1090 offsets[1] - offsets[0]);
1091 else if (file_offsets)
1092 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1093 offsets[1] - offsets[0]);
1094 else
1095 {
1096 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1097 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1098 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1099 }
1100 fprintf(stdout, "\n");
1101 matchptr += offsets[1];
1102 length -= offsets[1];
1103 match = FALSE;
1104 goto ONLY_MATCHING_RESTART;
1105 }
1106 }
1107
1108 /* This is the default case when none of the above options is set. We print
1109 the matching lines(s), possibly preceded and/or followed by other lines of
1110 context. */
1111
1112 else
1113 {
1114 /* See if there is a requirement to print some "after" lines from a
1115 previous match. We never print any overlaps. */
1116
1117 if (after_context > 0 && lastmatchnumber > 0)
1118 {
1119 int ellength;
1120 int linecount = 0;
1121 char *p = lastmatchrestart;
1122
1123 while (p < ptr && linecount < after_context)
1124 {
1125 p = end_of_line(p, ptr, &ellength);
1126 linecount++;
1127 }
1128
1129 /* It is important to advance lastmatchrestart during this printing so
1130 that it interacts correctly with any "before" printing below. Print
1131 each line's data using fwrite() in case there are binary zeroes. */
1132
1133 while (lastmatchrestart < p)
1134 {
1135 char *pp = lastmatchrestart;
1136 if (printname != NULL) fprintf(stdout, "%s-", printname);
1137 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1138 pp = end_of_line(pp, endptr, &ellength);
1139 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1140 lastmatchrestart = pp;
1141 }
1142 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1143 }
1144
1145 /* If there were non-contiguous lines printed above, insert hyphens. */
1146
1147 if (hyphenpending)
1148 {
1149 fprintf(stdout, "--\n");
1150 hyphenpending = FALSE;
1151 hyphenprinted = TRUE;
1152 }
1153
1154 /* See if there is a requirement to print some "before" lines for this
1155 match. Again, don't print overlaps. */
1156
1157 if (before_context > 0)
1158 {
1159 int linecount = 0;
1160 char *p = ptr;
1161
1162 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1163 linecount < before_context)
1164 {
1165 linecount++;
1166 p = previous_line(p, buffer);
1167 }
1168
1169 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1170 fprintf(stdout, "--\n");
1171
1172 while (p < ptr)
1173 {
1174 int ellength;
1175 char *pp = p;
1176 if (printname != NULL) fprintf(stdout, "%s-", printname);
1177 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1178 pp = end_of_line(pp, endptr, &ellength);
1179 fwrite(p, 1, pp - p, stdout);
1180 p = pp;
1181 }
1182 }
1183
1184 /* Now print the matching line(s); ensure we set hyphenpending at the end
1185 of the file if any context lines are being output. */
1186
1187 if (after_context > 0 || before_context > 0)
1188 endhyphenpending = TRUE;
1189
1190 if (printname != NULL) fprintf(stdout, "%s:", printname);
1191 if (number) fprintf(stdout, "%d:", linenumber);
1192
1193 /* In multiline mode, we want to print to the end of the line in which
1194 the end of the matched string is found, so we adjust linelength and the
1195 line number appropriately, but only when there actually was a match
1196 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1197 the match will always be before the first newline sequence. */
1198
1199 if (multiline)
1200 {
1201 int ellength;
1202 char *endmatch = ptr;
1203 if (!invert)
1204 {
1205 endmatch += offsets[1];
1206 t = ptr;
1207 while (t < endmatch)
1208 {
1209 t = end_of_line(t, endptr, &ellength);
1210 if (t <= endmatch) linenumber++; else break;
1211 }
1212 }
1213 endmatch = end_of_line(endmatch, endptr, &ellength);
1214 linelength = endmatch - ptr - ellength;
1215 }
1216
1217 /*** NOTE: Use only fwrite() to output the data line, so that binary
1218 zeroes are treated as just another data character. */
1219
1220 /* This extra option, for Jeffrey Friedl's debugging requirements,
1221 replaces the matched string, or a specific captured string if it exists,
1222 with X. When this happens, colouring is ignored. */
1223
1224 #ifdef JFRIEDL_DEBUG
1225 if (S_arg >= 0 && S_arg < mrc)
1226 {
1227 int first = S_arg * 2;
1228 int last = first + 1;
1229 fwrite(ptr, 1, offsets[first], stdout);
1230 fprintf(stdout, "X");
1231 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1232 }
1233 else
1234 #endif
1235
1236 /* We have to split the line(s) up if colouring, and search for further
1237 matches. */
1238
1239 if (do_colour)
1240 {
1241 int last_offset = 0;
1242 fwrite(ptr, 1, offsets[0], stdout);
1243 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1244 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1245 fprintf(stdout, "%c[00m", 0x1b);
1246 for (;;)
1247 {
1248 last_offset += offsets[1];
1249 matchptr += offsets[1];
1250 length -= offsets[1];
1251 if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1252 fwrite(matchptr, 1, offsets[0], stdout);
1253 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1254 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1255 fprintf(stdout, "%c[00m", 0x1b);
1256 }
1257 fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1258 stdout);
1259 }
1260
1261 /* Not colouring; no need to search for further matches */
1262
1263 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1264 }
1265
1266 /* End of doing what has to be done for a match */
1267
1268 rc = 0; /* Had some success */
1269
1270 /* Remember where the last match happened for after_context. We remember
1271 where we are about to restart, and that line's number. */
1272
1273 lastmatchrestart = ptr + linelength + endlinelength;
1274 lastmatchnumber = linenumber + 1;
1275 }
1276
1277 /* For a match in multiline inverted mode (which of course did not cause
1278 anything to be printed), we have to move on to the end of the match before
1279 proceeding. */
1280
1281 if (multiline && invert && match)
1282 {
1283 int ellength;
1284 char *endmatch = ptr + offsets[1];
1285 t = ptr;
1286 while (t < endmatch)
1287 {
1288 t = end_of_line(t, endptr, &ellength);
1289 if (t <= endmatch) linenumber++; else break;
1290 }
1291 endmatch = end_of_line(endmatch, endptr, &ellength);
1292 linelength = endmatch - ptr - ellength;
1293 }
1294
1295 /* Advance to after the newline and increment the line number. The file
1296 offset to the current line is maintained in filepos. */
1297
1298 ptr += linelength + endlinelength;
1299 filepos += linelength + endlinelength;
1300 linenumber++;
1301
1302 /* If we haven't yet reached the end of the file (the buffer is full), and
1303 the current point is in the top 1/3 of the buffer, slide the buffer down by
1304 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1305 about to be lost, print them. */
1306
1307 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1308 {
1309 if (after_context > 0 &&
1310 lastmatchnumber > 0 &&
1311 lastmatchrestart < buffer + MBUFTHIRD)
1312 {
1313 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1314 lastmatchnumber = 0;
1315 }
1316
1317 /* Now do the shuffle */
1318
1319 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1320 ptr -= MBUFTHIRD;
1321
1322 #ifdef SUPPORT_LIBZ
1323 if (frtype == FR_LIBZ)
1324 bufflength = 2*MBUFTHIRD +
1325 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1326 else
1327 #endif
1328
1329 #ifdef SUPPORT_LIBBZ2
1330 if (frtype == FR_LIBBZ2)
1331 bufflength = 2*MBUFTHIRD +
1332 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1333 else
1334 #endif
1335
1336 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1337
1338 endptr = buffer + bufflength;
1339
1340 /* Adjust any last match point */
1341
1342 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1343 }
1344 } /* Loop through the whole file */
1345
1346 /* End of file; print final "after" lines if wanted; do_after_lines sets
1347 hyphenpending if it prints something. */
1348
1349 if (!only_matching && !count_only)
1350 {
1351 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1352 hyphenpending |= endhyphenpending;
1353 }
1354
1355 /* Print the file name if we are looking for those without matches and there
1356 were none. If we found a match, we won't have got this far. */
1357
1358 if (filenames == FN_NOMATCH_ONLY)
1359 {
1360 fprintf(stdout, "%s\n", printname);
1361 return 0;
1362 }
1363
1364 /* Print the match count if wanted */
1365
1366 if (count_only)
1367 {
1368 if (printname != NULL) fprintf(stdout, "%s:", printname);
1369 fprintf(stdout, "%d\n", count);
1370 }
1371
1372 return rc;
1373 }
1374
1375
1376
1377 /*************************************************
1378 * Grep a file or recurse into a directory *
1379 *************************************************/
1380
1381 /* Given a path name, if it's a directory, scan all the files if we are
1382 recursing; if it's a file, grep it.
1383
1384 Arguments:
1385 pathname the path to investigate
1386 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1387 only_one_at_top TRUE if the path is the only one at toplevel
1388
1389 Returns: 0 if there was at least one match
1390 1 if there were no matches
1391 2 there was some kind of error
1392
1393 However, file opening failures are suppressed if "silent" is set.
1394 */
1395
1396 static int
1397 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1398 {
1399 int rc = 1;
1400 int sep;
1401 int frtype;
1402 int pathlen;
1403 void *handle;
1404 FILE *in = NULL; /* Ensure initialized */
1405
1406 #ifdef SUPPORT_LIBZ
1407 gzFile ingz = NULL;
1408 #endif
1409
1410 #ifdef SUPPORT_LIBBZ2
1411 BZFILE *inbz2 = NULL;
1412 #endif
1413
1414 /* If the file name is "-" we scan stdin */
1415
1416 if (strcmp(pathname, "-") == 0)
1417 {
1418 return pcregrep(stdin, FR_PLAIN,
1419 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1420 stdin_name : NULL);
1421 }
1422
1423 /* If the file is a directory, skip if skipping or if we are recursing, scan
1424 each file and directory within it, subject to any include or exclude patterns
1425 that were set. The scanning code is localized so it can be made
1426 system-specific. */
1427
1428 if ((sep = isdirectory(pathname)) != 0)
1429 {
1430 if (dee_action == dee_SKIP) return 1;
1431 if (dee_action == dee_RECURSE)
1432 {
1433 char buffer[1024];
1434 char *nextfile;
1435 directory_type *dir = opendirectory(pathname);
1436
1437 if (dir == NULL)
1438 {
1439 if (!silent)
1440 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1441 strerror(errno));
1442 return 2;
1443 }
1444
1445 while ((nextfile = readdirectory(dir)) != NULL)
1446 {
1447 int frc, nflen;
1448 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1449 nflen = strlen(nextfile);
1450
1451 if (isdirectory(buffer))
1452 {
1453 if (exclude_dir_compiled != NULL &&
1454 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1455 continue;
1456
1457 if (include_dir_compiled != NULL &&
1458 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1459 continue;
1460 }
1461 else
1462 {
1463 if (exclude_compiled != NULL &&
1464 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1465 continue;
1466
1467 if (include_compiled != NULL &&
1468 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1469 continue;
1470 }
1471
1472 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1473 if (frc > 1) rc = frc;
1474 else if (frc == 0 && rc == 1) rc = 0;
1475 }
1476
1477 closedirectory(dir);
1478 return rc;
1479 }
1480 }
1481
1482 /* If the file is not a directory and not a regular file, skip it if that's
1483 been requested. */
1484
1485 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1486
1487 /* Control reaches here if we have a regular file, or if we have a directory
1488 and recursion or skipping was not requested, or if we have anything else and
1489 skipping was not requested. The scan proceeds. If this is the first and only
1490 argument at top level, we don't show the file name, unless we are only showing
1491 the file name, or the filename was forced (-H). */
1492
1493 pathlen = strlen(pathname);
1494
1495 /* Open using zlib if it is supported and the file name ends with .gz. */
1496
1497 #ifdef SUPPORT_LIBZ
1498 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1499 {
1500 ingz = gzopen(pathname, "rb");
1501 if (ingz == NULL)
1502 {
1503 if (!silent)
1504 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1505 strerror(errno));
1506 return 2;
1507 }
1508 handle = (void *)ingz;
1509 frtype = FR_LIBZ;
1510 }
1511 else
1512 #endif
1513
1514 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1515
1516 #ifdef SUPPORT_LIBBZ2
1517 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1518 {
1519 inbz2 = BZ2_bzopen(pathname, "rb");
1520 handle = (void *)inbz2;
1521 frtype = FR_LIBBZ2;
1522 }
1523 else
1524 #endif
1525
1526 /* Otherwise use plain fopen(). The label is so that we can come back here if
1527 an attempt to read a .bz2 file indicates that it really is a plain file. */
1528
1529 #ifdef SUPPORT_LIBBZ2
1530 PLAIN_FILE:
1531 #endif
1532 {
1533 in = fopen(pathname, "r");
1534 handle = (void *)in;
1535 frtype = FR_PLAIN;
1536 }
1537
1538 /* All the opening methods return errno when they fail. */
1539
1540 if (handle == NULL)
1541 {
1542 if (!silent)
1543 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1544 strerror(errno));
1545 return 2;
1546 }
1547
1548 /* Now grep the file */
1549
1550 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1551 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1552
1553 /* Close in an appropriate manner. */
1554
1555 #ifdef SUPPORT_LIBZ
1556 if (frtype == FR_LIBZ)
1557 gzclose(ingz);
1558 else
1559 #endif
1560
1561 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1562 read failed. If the error indicates that the file isn't in fact bzipped, try
1563 again as a normal file. */
1564
1565 #ifdef SUPPORT_LIBBZ2
1566 if (frtype == FR_LIBBZ2)
1567 {
1568 if (rc == 2)
1569 {
1570 int errnum;
1571 const char *err = BZ2_bzerror(inbz2, &errnum);
1572 if (errnum == BZ_DATA_ERROR_MAGIC)
1573 {
1574 BZ2_bzclose(inbz2);
1575 goto PLAIN_FILE;
1576 }
1577 else if (!silent)
1578 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1579 pathname, err);
1580 }
1581 BZ2_bzclose(inbz2);
1582 }
1583 else
1584 #endif
1585
1586 /* Normal file close */
1587
1588 fclose(in);
1589
1590 /* Pass back the yield from pcregrep(). */
1591
1592 return rc;
1593 }
1594
1595
1596
1597
1598 /*************************************************
1599 * Usage function *
1600 *************************************************/
1601
1602 static int
1603 usage(int rc)
1604 {
1605 option_item *op;
1606 fprintf(stderr, "Usage: pcregrep [-");
1607 for (op = optionlist; op->one_char != 0; op++)
1608 {
1609 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1610 }
1611 fprintf(stderr, "] [long options] [pattern] [files]\n");
1612 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1613 "options.\n");
1614 return rc;
1615 }
1616
1617
1618
1619
1620 /*************************************************
1621 * Help function *
1622 *************************************************/
1623
1624 static void
1625 help(void)
1626 {
1627 option_item *op;
1628
1629 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1630 printf("Search for PATTERN in each FILE or standard input.\n");
1631 printf("PATTERN must be present if neither -e nor -f is used.\n");
1632 printf("\"-\" can be used as a file name to mean STDIN.\n");
1633
1634 #ifdef SUPPORT_LIBZ
1635 printf("Files whose names end in .gz are read using zlib.\n");
1636 #endif
1637
1638 #ifdef SUPPORT_LIBBZ2
1639 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1640 #endif
1641
1642 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1643 printf("Other files and the standard input are read as plain files.\n\n");
1644 #else
1645 printf("All files are read as plain files, without any interpretation.\n\n");
1646 #endif
1647
1648 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1649 printf("Options:\n");
1650
1651 for (op = optionlist; op->one_char != 0; op++)
1652 {
1653 int n;
1654 char s[4];
1655 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1656 n = 30 - printf(" %s --%s", s, op->long_name);
1657 if (n < 1) n = 1;
1658 printf("%.*s%s\n", n, " ", op->help_text);
1659 }
1660
1661 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1662 printf("trailing white space is removed and blank lines are ignored.\n");
1663 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1664
1665 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1666 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1667 }
1668
1669
1670
1671
1672 /*************************************************
1673 * Handle a single-letter, no data option *
1674 *************************************************/
1675
1676 static int
1677 handle_option(int letter, int options)
1678 {
1679 switch(letter)
1680 {
1681 case N_FOFFSETS: file_offsets = TRUE; break;
1682 case N_HELP: help(); exit(0);
1683 case N_LOFFSETS: line_offsets = number = TRUE; break;
1684 case 'c': count_only = TRUE; break;
1685 case 'F': process_options |= PO_FIXED_STRINGS; break;
1686 case 'H': filenames = FN_FORCE; break;
1687 case 'h': filenames = FN_NONE; break;
1688 case 'i': options |= PCRE_CASELESS; break;
1689 case 'l': filenames = FN_ONLY; break;
1690 case 'L': filenames = FN_NOMATCH_ONLY; break;
1691 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1692 case 'n': number = TRUE; break;
1693 case 'o': only_matching = TRUE; break;
1694 case 'q': quiet = TRUE; break;
1695 case 'r': dee_action = dee_RECURSE; break;
1696 case 's': silent = TRUE; break;
1697 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1698 case 'v': invert = TRUE; break;
1699 case 'w': process_options |= PO_WORD_MATCH; break;
1700 case 'x': process_options |= PO_LINE_MATCH; break;
1701
1702 case 'V':
1703 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1704 exit(0);
1705 break;
1706
1707 default:
1708 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1709 exit(usage(2));
1710 }
1711
1712 return options;
1713 }
1714
1715
1716
1717
1718 /*************************************************
1719 * Construct printed ordinal *
1720 *************************************************/
1721
1722 /* This turns a number into "1st", "3rd", etc. */
1723
1724 static char *
1725 ordin(int n)
1726 {
1727 static char buffer[8];
1728 char *p = buffer;
1729 sprintf(p, "%d", n);
1730 while (*p != 0) p++;
1731 switch (n%10)
1732 {
1733 case 1: strcpy(p, "st"); break;
1734 case 2: strcpy(p, "nd"); break;
1735 case 3: strcpy(p, "rd"); break;
1736 default: strcpy(p, "th"); break;
1737 }
1738 return buffer;
1739 }
1740
1741
1742
1743 /*************************************************
1744 * Compile a single pattern *
1745 *************************************************/
1746
1747 /* When the -F option has been used, this is called for each substring.
1748 Otherwise it's called for each supplied pattern.
1749
1750 Arguments:
1751 pattern the pattern string
1752 options the PCRE options
1753 filename the file name, or NULL for a command-line pattern
1754 count 0 if this is the only command line pattern, or
1755 number of the command line pattern, or
1756 linenumber for a pattern from a file
1757
1758 Returns: TRUE on success, FALSE after an error
1759 */
1760
1761 static BOOL
1762 compile_single_pattern(char *pattern, int options, char *filename, int count)
1763 {
1764 char buffer[MBUFTHIRD + 16];
1765 const char *error;
1766 int errptr;
1767
1768 if (pattern_count >= MAX_PATTERN_COUNT)
1769 {
1770 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1771 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1772 return FALSE;
1773 }
1774
1775 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1776 suffix[process_options]);
1777 pattern_list[pattern_count] =
1778 pcre_compile(buffer, options, &error, &errptr, pcretables);
1779 if (pattern_list[pattern_count] != NULL)
1780 {
1781 pattern_count++;
1782 return TRUE;
1783 }
1784
1785 /* Handle compile errors */
1786
1787 errptr -= (int)strlen(prefix[process_options]);
1788 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1789
1790 if (filename == NULL)
1791 {
1792 if (count == 0)
1793 fprintf(stderr, "pcregrep: Error in command-line regex "
1794 "at offset %d: %s\n", errptr, error);
1795 else
1796 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1797 "at offset %d: %s\n", ordin(count), errptr, error);
1798 }
1799 else
1800 {
1801 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1802 "at offset %d: %s\n", count, filename, errptr, error);
1803 }
1804
1805 return FALSE;
1806 }
1807
1808
1809
1810 /*************************************************
1811 * Compile one supplied pattern *
1812 *************************************************/
1813
1814 /* When the -F option has been used, each string may be a list of strings,
1815 separated by line breaks. They will be matched literally.
1816
1817 Arguments:
1818 pattern the pattern string
1819 options the PCRE options
1820 filename the file name, or NULL for a command-line pattern
1821 count 0 if this is the only command line pattern, or
1822 number of the command line pattern, or
1823 linenumber for a pattern from a file
1824
1825 Returns: TRUE on success, FALSE after an error
1826 */
1827
1828 static BOOL
1829 compile_pattern(char *pattern, int options, char *filename, int count)
1830 {
1831 if ((process_options & PO_FIXED_STRINGS) != 0)
1832 {
1833 char *eop = pattern + strlen(pattern);
1834 char buffer[MBUFTHIRD];
1835 for(;;)
1836 {
1837 int ellength;
1838 char *p = end_of_line(pattern, eop, &ellength);
1839 if (ellength == 0)
1840 return compile_single_pattern(pattern, options, filename, count);
1841 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1842 pattern = p;
1843 if (!compile_single_pattern(buffer, options, filename, count))
1844 return FALSE;
1845 }
1846 }
1847 else return compile_single_pattern(pattern, options, filename, count);
1848 }
1849
1850
1851
1852 /*************************************************
1853 * Main program *
1854 *************************************************/
1855
1856 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1857
1858 int
1859 main(int argc, char **argv)
1860 {
1861 int i, j;
1862 int rc = 1;
1863 int pcre_options = 0;
1864 int cmd_pattern_count = 0;
1865 int hint_count = 0;
1866 int errptr;
1867 BOOL only_one_at_top;
1868 char *patterns[MAX_PATTERN_COUNT];
1869 const char *locale_from = "--locale";
1870 const char *error;
1871
1872 /* Set the default line ending value from the default in the PCRE library;
1873 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1874 */
1875
1876 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1877 switch(i)
1878 {
1879 default: newline = (char *)"lf"; break;
1880 case '\r': newline = (char *)"cr"; break;
1881 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1882 case -1: newline = (char *)"any"; break;
1883 case -2: newline = (char *)"anycrlf"; break;
1884 }
1885
1886 /* Process the options */
1887
1888 for (i = 1; i < argc; i++)
1889 {
1890 option_item *op = NULL;
1891 char *option_data = (char *)""; /* default to keep compiler happy */
1892 BOOL longop;
1893 BOOL longopwasequals = FALSE;
1894
1895 if (argv[i][0] != '-') break;
1896
1897 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1898 but only if we have previously had -e or -f to define the patterns. */
1899
1900 if (argv[i][1] == 0)
1901 {
1902 if (pattern_filename != NULL || pattern_count > 0) break;
1903 else exit(usage(2));
1904 }
1905
1906 /* Handle a long name option, or -- to terminate the options */
1907
1908 if (argv[i][1] == '-')
1909 {
1910 char *arg = argv[i] + 2;
1911 char *argequals = strchr(arg, '=');
1912
1913 if (*arg == 0) /* -- terminates options */
1914 {
1915 i++;
1916 break; /* out of the options-handling loop */
1917 }
1918
1919 longop = TRUE;
1920
1921 /* Some long options have data that follows after =, for example file=name.
1922 Some options have variations in the long name spelling: specifically, we
1923 allow "regexp" because GNU grep allows it, though I personally go along
1924 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1925 These options are entered in the table as "regex(p)". No option is in both
1926 these categories, fortunately. */
1927
1928 for (op = optionlist; op->one_char != 0; op++)
1929 {
1930 char *opbra = strchr(op->long_name, '(');
1931 char *equals = strchr(op->long_name, '=');
1932 if (opbra == NULL) /* Not a (p) case */
1933 {
1934 if (equals == NULL) /* Not thing=data case */
1935 {
1936 if (strcmp(arg, op->long_name) == 0) break;
1937 }
1938 else /* Special case xxx=data */
1939 {
1940 int oplen = equals - op->long_name;
1941 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1942 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1943 {
1944 option_data = arg + arglen;
1945 if (*option_data == '=')
1946 {
1947 option_data++;
1948 longopwasequals = TRUE;
1949 }
1950 break;
1951 }
1952 }
1953 }
1954 else /* Special case xxxx(p) */
1955 {
1956 char buff1[24];
1957 char buff2[24];
1958 int baselen = opbra - op->long_name;
1959 sprintf(buff1, "%.*s", baselen, op->long_name);
1960 sprintf(buff2, "%s%.*s", buff1,
1961 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1962 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1963 break;
1964 }
1965 }
1966
1967 if (op->one_char == 0)
1968 {
1969 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1970 exit(usage(2));
1971 }
1972 }
1973
1974
1975 /* Jeffrey Friedl's debugging harness uses these additional options which
1976 are not in the right form for putting in the option table because they use
1977 only one hyphen, yet are more than one character long. By putting them
1978 separately here, they will not get displayed as part of the help() output,
1979 but I don't think Jeffrey will care about that. */
1980
1981 #ifdef JFRIEDL_DEBUG
1982 else if (strcmp(argv[i], "-pre") == 0) {
1983 jfriedl_prefix = argv[++i];
1984 continue;
1985 } else if (strcmp(argv[i], "-post") == 0) {
1986 jfriedl_postfix = argv[++i];
1987 continue;
1988 } else if (strcmp(argv[i], "-XT") == 0) {
1989 sscanf(argv[++i], "%d", &jfriedl_XT);
1990 continue;
1991 } else if (strcmp(argv[i], "-XR") == 0) {
1992 sscanf(argv[++i], "%d", &jfriedl_XR);
1993 continue;
1994 }
1995 #endif
1996
1997
1998 /* One-char options; many that have no data may be in a single argument; we
1999 continue till we hit the last one or one that needs data. */
2000
2001 else
2002 {
2003 char *s = argv[i] + 1;
2004 longop = FALSE;
2005 while (*s != 0)
2006 {
2007 for (op = optionlist; op->one_char != 0; op++)
2008 { if (*s == op->one_char) break; }
2009 if (op->one_char == 0)
2010 {
2011 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2012 *s, argv[i]);
2013 exit(usage(2));
2014 }
2015 if (op->type != OP_NODATA || s[1] == 0)
2016 {
2017 option_data = s+1;
2018 break;
2019 }
2020 pcre_options = handle_option(*s++, pcre_options);
2021 }
2022 }
2023
2024 /* At this point we should have op pointing to a matched option. If the type
2025 is NO_DATA, it means that there is no data, and the option might set
2026 something in the PCRE options. */
2027
2028 if (op->type == OP_NODATA)
2029 {
2030 pcre_options = handle_option(op->one_char, pcre_options);
2031 continue;
2032 }
2033
2034 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2035 either has a value or defaults to something. It cannot have data in a
2036 separate item. At the moment, the only such options are "colo(u)r" and
2037 Jeffrey Friedl's special -S debugging option. */
2038
2039 if (*option_data == 0 &&
2040 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2041 {
2042 switch (op->one_char)
2043 {
2044 case N_COLOUR:
2045 colour_option = (char *)"auto";
2046 break;
2047 #ifdef JFRIEDL_DEBUG
2048 case 'S':
2049 S_arg = 0;
2050 break;
2051 #endif
2052 }
2053 continue;
2054 }
2055
2056 /* Otherwise, find the data string for the option. */
2057
2058 if (*option_data == 0)
2059 {
2060 if (i >= argc - 1 || longopwasequals)
2061 {
2062 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2063 exit(usage(2));
2064 }
2065 option_data = argv[++i];
2066 }
2067
2068 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2069 multiple times to create a list of patterns. */
2070
2071 if (op->type == OP_PATLIST)
2072 {
2073 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2074 {
2075 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2076 MAX_PATTERN_COUNT);
2077 return 2;
2078 }
2079 patterns[cmd_pattern_count++] = option_data;
2080 }
2081
2082 /* Otherwise, deal with single string or numeric data values. */
2083
2084 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2085 {
2086 *((char **)op->dataptr) = option_data;
2087 }
2088 else
2089 {
2090 char *endptr;
2091 int n = strtoul(option_data, &endptr, 10);
2092 if (*endptr != 0)
2093 {
2094 if (longop)
2095 {
2096 char *equals = strchr(op->long_name, '=');
2097 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2098 equals - op->long_name;
2099 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2100 option_data, nlen, op->long_name);
2101 }
2102 else
2103 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2104 option_data, op->one_char);
2105 exit(usage(2));
2106 }
2107 *((int *)op->dataptr) = n;
2108 }
2109 }
2110
2111 /* Options have been decoded. If -C was used, its value is used as a default
2112 for -A and -B. */
2113
2114 if (both_context > 0)
2115 {
2116 if (after_context == 0) after_context = both_context;
2117 if (before_context == 0) before_context = both_context;
2118 }
2119
2120 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2121 However, the latter two set the only_matching flag. */
2122
2123 if ((only_matching && (file_offsets || line_offsets)) ||
2124 (file_offsets && line_offsets))
2125 {
2126 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2127 "and/or --line-offsets\n");
2128 exit(usage(2));
2129 }
2130
2131 if (file_offsets || line_offsets) only_matching = TRUE;
2132
2133 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2134 LC_ALL environment variable is set, and if so, use it. */
2135
2136 if (locale == NULL)
2137 {
2138 locale = getenv("LC_ALL");
2139 locale_from = "LCC_ALL";
2140 }
2141
2142 if (locale == NULL)
2143 {
2144 locale = getenv("LC_CTYPE");
2145 locale_from = "LC_CTYPE";
2146 }
2147
2148 /* If a locale has been provided, set it, and generate the tables the PCRE
2149 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2150
2151 if (locale != NULL)
2152 {
2153 if (setlocale(LC_CTYPE, locale) == NULL)
2154 {
2155 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2156 locale, locale_from);
2157 return 2;
2158 }
2159 pcretables = pcre_maketables();
2160 }
2161
2162 /* Sort out colouring */
2163
2164 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2165 {
2166 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2167 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2168 else
2169 {
2170 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2171 colour_option);
2172 return 2;
2173 }
2174 if (do_colour)
2175 {
2176 char *cs = getenv("PCREGREP_COLOUR");
2177 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2178 if (cs != NULL) colour_string = cs;
2179 }
2180 }
2181
2182 /* Interpret the newline type; the default settings are Unix-like. */
2183
2184 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2185 {
2186 pcre_options |= PCRE_NEWLINE_CR;
2187 endlinetype = EL_CR;
2188 }
2189 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2190 {
2191 pcre_options |= PCRE_NEWLINE_LF;
2192 endlinetype = EL_LF;
2193 }
2194 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2195 {
2196 pcre_options |= PCRE_NEWLINE_CRLF;
2197 endlinetype = EL_CRLF;
2198 }
2199 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2200 {
2201 pcre_options |= PCRE_NEWLINE_ANY;
2202 endlinetype = EL_ANY;
2203 }
2204 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2205 {
2206 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2207 endlinetype = EL_ANYCRLF;
2208 }
2209 else
2210 {
2211 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2212 return 2;
2213 }
2214
2215 /* Interpret the text values for -d and -D */
2216
2217 if (dee_option != NULL)
2218 {
2219 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2220 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2221 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2222 else
2223 {
2224 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2225 return 2;
2226 }
2227 }
2228
2229 if (DEE_option != NULL)
2230 {
2231 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2232 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2233 else
2234 {
2235 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2236 return 2;
2237 }
2238 }
2239
2240 /* Check the values for Jeffrey Friedl's debugging options. */
2241
2242 #ifdef JFRIEDL_DEBUG
2243 if (S_arg > 9)
2244 {
2245 fprintf(stderr, "pcregrep: bad value for -S option\n");
2246 return 2;
2247 }
2248 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2249 {
2250 if (jfriedl_XT == 0) jfriedl_XT = 1;
2251 if (jfriedl_XR == 0) jfriedl_XR = 1;
2252 }
2253 #endif
2254
2255 /* Get memory to store the pattern and hints lists. */
2256
2257 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2258 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2259
2260 if (pattern_list == NULL || hints_list == NULL)
2261 {
2262 fprintf(stderr, "pcregrep: malloc failed\n");
2263 goto EXIT2;
2264 }
2265
2266 /* If no patterns were provided by -e, and there is no file provided by -f,
2267 the first argument is the one and only pattern, and it must exist. */
2268
2269 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2270 {
2271 if (i >= argc) return usage(2);
2272 patterns[cmd_pattern_count++] = argv[i++];
2273 }
2274
2275 /* Compile the patterns that were provided on the command line, either by
2276 multiple uses of -e or as a single unkeyed pattern. */
2277
2278 for (j = 0; j < cmd_pattern_count; j++)
2279 {
2280 if (!compile_pattern(patterns[j], pcre_options, NULL,
2281 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2282 goto EXIT2;
2283 }
2284
2285 /* Compile the regular expressions that are provided in a file. */
2286
2287 if (pattern_filename != NULL)
2288 {
2289 int linenumber = 0;
2290 FILE *f;
2291 char *filename;
2292 char buffer[MBUFTHIRD];
2293
2294 if (strcmp(pattern_filename, "-") == 0)
2295 {
2296 f = stdin;
2297 filename = stdin_name;
2298 }
2299 else
2300 {
2301 f = fopen(pattern_filename, "r");
2302 if (f == NULL)
2303 {
2304 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2305 strerror(errno));
2306 goto EXIT2;
2307 }
2308 filename = pattern_filename;
2309 }
2310
2311 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2312 {
2313 char *s = buffer + (int)strlen(buffer);
2314 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2315 *s = 0;
2316 linenumber++;
2317 if (buffer[0] == 0) continue; /* Skip blank lines */
2318 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2319 goto EXIT2;
2320 }
2321
2322 if (f != stdin) fclose(f);
2323 }
2324
2325 /* Study the regular expressions, as we will be running them many times */
2326
2327 for (j = 0; j < pattern_count; j++)
2328 {
2329 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2330 if (error != NULL)
2331 {
2332 char s[16];
2333 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2334 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2335 goto EXIT2;
2336 }
2337 hint_count++;
2338 }
2339
2340 /* If there are include or exclude patterns, compile them. */
2341
2342 if (exclude_pattern != NULL)
2343 {
2344 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2345 pcretables);
2346 if (exclude_compiled == NULL)
2347 {
2348 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2349 errptr, error);
2350 goto EXIT2;
2351 }
2352 }
2353
2354 if (include_pattern != NULL)
2355 {
2356 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2357 pcretables);
2358 if (include_compiled == NULL)
2359 {
2360 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2361 errptr, error);
2362 goto EXIT2;
2363 }
2364 }
2365
2366 if (exclude_dir_pattern != NULL)
2367 {
2368 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2369 pcretables);
2370 if (exclude_dir_compiled == NULL)
2371 {
2372 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2373 errptr, error);
2374 goto EXIT2;
2375 }
2376 }
2377
2378 if (include_dir_pattern != NULL)
2379 {
2380 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2381 pcretables);
2382 if (include_dir_compiled == NULL)
2383 {
2384 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2385 errptr, error);
2386 goto EXIT2;
2387 }
2388 }
2389
2390 /* If there are no further arguments, do the business on stdin and exit. */
2391
2392 if (i >= argc)
2393 {
2394 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2395 goto EXIT;
2396 }
2397
2398 /* Otherwise, work through the remaining arguments as files or directories.
2399 Pass in the fact that there is only one argument at top level - this suppresses
2400 the file name if the argument is not a directory and filenames are not
2401 otherwise forced. */
2402
2403 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2404
2405 for (; i < argc; i++)
2406 {
2407 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2408 only_one_at_top);
2409 if (frc > 1) rc = frc;
2410 else if (frc == 0 && rc == 1) rc = 0;
2411 }
2412
2413 EXIT:
2414 if (pattern_list != NULL)
2415 {
2416 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2417 free(pattern_list);
2418 }
2419 if (hints_list != NULL)
2420 {
2421 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2422 free(hints_list);
2423 }
2424 return rc;
2425
2426 EXIT2:
2427 rc = 2;
2428 goto EXIT;
2429 }
2430
2431 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12