/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 519 - (show annotations) (download)
Fri May 21 16:43:17 2010 UTC (4 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 72163 byte(s)
Added --line-buffered to pcregrep.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2010 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define MBUFTHIRD BUFSIZ
78 #else
79 #define MBUFTHIRD 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117 /*************************************************
118 * Global variables *
119 *************************************************/
120
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131
132 static int endlinetype;
133
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *newline = NULL;
139 static char *pattern_filename = NULL;
140 static char *stdin_name = (char *)"(standard input)";
141 static char *locale = NULL;
142
143 static const unsigned char *pcretables = NULL;
144
145 static int pattern_count = 0;
146 static pcre **pattern_list = NULL;
147 static pcre_extra **hints_list = NULL;
148
149 static char *include_pattern = NULL;
150 static char *exclude_pattern = NULL;
151 static char *include_dir_pattern = NULL;
152 static char *exclude_dir_pattern = NULL;
153
154 static pcre *include_compiled = NULL;
155 static pcre *exclude_compiled = NULL;
156 static pcre *include_dir_compiled = NULL;
157 static pcre *exclude_dir_compiled = NULL;
158
159 static int after_context = 0;
160 static int before_context = 0;
161 static int both_context = 0;
162 static int dee_action = dee_READ;
163 static int DEE_action = DEE_READ;
164 static int error_count = 0;
165 static int filenames = FN_DEFAULT;
166 static int process_options = 0;
167
168 static BOOL count_only = FALSE;
169 static BOOL do_colour = FALSE;
170 static BOOL file_offsets = FALSE;
171 static BOOL hyphenpending = FALSE;
172 static BOOL invert = FALSE;
173 static BOOL line_buffered = FALSE;
174 static BOOL line_offsets = FALSE;
175 static BOOL multiline = FALSE;
176 static BOOL number = FALSE;
177 static BOOL omit_zero_count = FALSE;
178 static BOOL only_matching = FALSE;
179 static BOOL quiet = FALSE;
180 static BOOL silent = FALSE;
181 static BOOL utf8 = FALSE;
182
183 /* Structure for options and list of them */
184
185 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
186 OP_PATLIST };
187
188 typedef struct option_item {
189 int type;
190 int one_char;
191 void *dataptr;
192 const char *long_name;
193 const char *help_text;
194 } option_item;
195
196 /* Options without a single-letter equivalent get a negative value. This can be
197 used to identify them. */
198
199 #define N_COLOUR (-1)
200 #define N_EXCLUDE (-2)
201 #define N_EXCLUDE_DIR (-3)
202 #define N_HELP (-4)
203 #define N_INCLUDE (-5)
204 #define N_INCLUDE_DIR (-6)
205 #define N_LABEL (-7)
206 #define N_LOCALE (-8)
207 #define N_NULL (-9)
208 #define N_LOFFSETS (-10)
209 #define N_FOFFSETS (-11)
210 #define N_LBUFFER (-12)
211
212 static option_item optionlist[] = {
213 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
214 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
215 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
216 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
217 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
218 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
219 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
220 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
221 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
222 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
223 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
224 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
225 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
226 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
227 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
228 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
229 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
230 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
231 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
232 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
233 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
234 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
235 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
236 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
237 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
238 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
239 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
240 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
241 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
242 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
243 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
244 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
245 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
246 #ifdef JFRIEDL_DEBUG
247 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
248 #endif
249 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
250 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
251 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
252 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
253 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
254 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
255 { OP_NODATA, 0, NULL, NULL, NULL }
256 };
257
258 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
259 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
260 that the combination of -w and -x has the same effect as -x on its own, so we
261 can treat them as the same. */
262
263 static const char *prefix[] = {
264 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
265
266 static const char *suffix[] = {
267 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
268
269 /* UTF-8 tables - used only when the newline setting is "any". */
270
271 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
272
273 const char utf8_table4[] = {
274 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
275 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
276 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
277 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
278
279
280
281 /*************************************************
282 * OS-specific functions *
283 *************************************************/
284
285 /* These functions are defined so that they can be made system specific,
286 although at present the only ones are for Unix, Win32, and for "no support". */
287
288
289 /************* Directory scanning in Unix ***********/
290
291 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
292 #include <sys/types.h>
293 #include <sys/stat.h>
294 #include <dirent.h>
295
296 typedef DIR directory_type;
297
298 static int
299 isdirectory(char *filename)
300 {
301 struct stat statbuf;
302 if (stat(filename, &statbuf) < 0)
303 return 0; /* In the expectation that opening as a file will fail */
304 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
305 }
306
307 static directory_type *
308 opendirectory(char *filename)
309 {
310 return opendir(filename);
311 }
312
313 static char *
314 readdirectory(directory_type *dir)
315 {
316 for (;;)
317 {
318 struct dirent *dent = readdir(dir);
319 if (dent == NULL) return NULL;
320 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
321 return dent->d_name;
322 }
323 /* Control never reaches here */
324 }
325
326 static void
327 closedirectory(directory_type *dir)
328 {
329 closedir(dir);
330 }
331
332
333 /************* Test for regular file in Unix **********/
334
335 static int
336 isregfile(char *filename)
337 {
338 struct stat statbuf;
339 if (stat(filename, &statbuf) < 0)
340 return 1; /* In the expectation that opening as a file will fail */
341 return (statbuf.st_mode & S_IFMT) == S_IFREG;
342 }
343
344
345 /************* Test for a terminal in Unix **********/
346
347 static BOOL
348 is_stdout_tty(void)
349 {
350 return isatty(fileno(stdout));
351 }
352
353 static BOOL
354 is_file_tty(FILE *f)
355 {
356 return isatty(fileno(f));
357 }
358
359
360 /************* Directory scanning in Win32 ***********/
361
362 /* I (Philip Hazel) have no means of testing this code. It was contributed by
363 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
364 when it did not exist. David Byron added a patch that moved the #include of
365 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
366 */
367
368 #elif HAVE_WINDOWS_H
369
370 #ifndef STRICT
371 # define STRICT
372 #endif
373 #ifndef WIN32_LEAN_AND_MEAN
374 # define WIN32_LEAN_AND_MEAN
375 #endif
376
377 #include <windows.h>
378
379 #ifndef INVALID_FILE_ATTRIBUTES
380 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
381 #endif
382
383 typedef struct directory_type
384 {
385 HANDLE handle;
386 BOOL first;
387 WIN32_FIND_DATA data;
388 } directory_type;
389
390 int
391 isdirectory(char *filename)
392 {
393 DWORD attr = GetFileAttributes(filename);
394 if (attr == INVALID_FILE_ATTRIBUTES)
395 return 0;
396 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
397 }
398
399 directory_type *
400 opendirectory(char *filename)
401 {
402 size_t len;
403 char *pattern;
404 directory_type *dir;
405 DWORD err;
406 len = strlen(filename);
407 pattern = (char *) malloc(len + 3);
408 dir = (directory_type *) malloc(sizeof(*dir));
409 if ((pattern == NULL) || (dir == NULL))
410 {
411 fprintf(stderr, "pcregrep: malloc failed\n");
412 exit(2);
413 }
414 memcpy(pattern, filename, len);
415 memcpy(&(pattern[len]), "\\*", 3);
416 dir->handle = FindFirstFile(pattern, &(dir->data));
417 if (dir->handle != INVALID_HANDLE_VALUE)
418 {
419 free(pattern);
420 dir->first = TRUE;
421 return dir;
422 }
423 err = GetLastError();
424 free(pattern);
425 free(dir);
426 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
427 return NULL;
428 }
429
430 char *
431 readdirectory(directory_type *dir)
432 {
433 for (;;)
434 {
435 if (!dir->first)
436 {
437 if (!FindNextFile(dir->handle, &(dir->data)))
438 return NULL;
439 }
440 else
441 {
442 dir->first = FALSE;
443 }
444 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
445 return dir->data.cFileName;
446 }
447 #ifndef _MSC_VER
448 return NULL; /* Keep compiler happy; never executed */
449 #endif
450 }
451
452 void
453 closedirectory(directory_type *dir)
454 {
455 FindClose(dir->handle);
456 free(dir);
457 }
458
459
460 /************* Test for regular file in Win32 **********/
461
462 /* I don't know how to do this, or if it can be done; assume all paths are
463 regular if they are not directories. */
464
465 int isregfile(char *filename)
466 {
467 return !isdirectory(filename);
468 }
469
470
471 /************* Test for a terminal in Win32 **********/
472
473 /* I don't know how to do this; assume never */
474
475 static BOOL
476 is_stdout_tty(void)
477 {
478 return FALSE;
479 }
480
481 static BOOL
482 is_file_tty(FILE *f)
483 {
484 return FALSE;
485 }
486
487
488 /************* Directory scanning when we can't do it ***********/
489
490 /* The type is void, and apart from isdirectory(), the functions do nothing. */
491
492 #else
493
494 typedef void directory_type;
495
496 int isdirectory(char *filename) { return 0; }
497 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
498 char *readdirectory(directory_type *dir) { return (char*)0;}
499 void closedirectory(directory_type *dir) {}
500
501
502 /************* Test for regular when we can't do it **********/
503
504 /* Assume all files are regular. */
505
506 int isregfile(char *filename) { return 1; }
507
508
509 /************* Test for a terminal when we can't do it **********/
510
511 static BOOL
512 is_stdout_tty(void)
513 {
514 return FALSE;
515 }
516
517 static BOOL
518 is_file_tty(FILE *f)
519 {
520 return FALSE;
521 }
522
523 #endif
524
525
526
527 #ifndef HAVE_STRERROR
528 /*************************************************
529 * Provide strerror() for non-ANSI libraries *
530 *************************************************/
531
532 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
533 in their libraries, but can provide the same facility by this simple
534 alternative function. */
535
536 extern int sys_nerr;
537 extern char *sys_errlist[];
538
539 char *
540 strerror(int n)
541 {
542 if (n < 0 || n >= sys_nerr) return "unknown error number";
543 return sys_errlist[n];
544 }
545 #endif /* HAVE_STRERROR */
546
547
548
549 /*************************************************
550 * Read one line of input *
551 *************************************************/
552
553 /* Normally, input is read using fread() into a large buffer, so many lines may
554 be read at once. However, doing this for tty input means that no output appears
555 until a lot of input has been typed. Instead, tty input is handled line by
556 line. We cannot use fgets() for this, because it does not stop at a binary
557 zero, and therefore there is no way of telling how many characters it has read,
558 because there may be binary zeros embedded in the data.
559
560 Arguments:
561 buffer the buffer to read into
562 length the maximum number of characters to read
563 f the file
564
565 Returns: the number of characters read, zero at end of file
566 */
567
568 static int
569 read_one_line(char *buffer, int length, FILE *f)
570 {
571 int c;
572 int yield = 0;
573 while ((c = fgetc(f)) != EOF)
574 {
575 buffer[yield++] = c;
576 if (c == '\n' || yield >= length) break;
577 }
578 return yield;
579 }
580
581
582
583 /*************************************************
584 * Find end of line *
585 *************************************************/
586
587 /* The length of the endline sequence that is found is set via lenptr. This may
588 be zero at the very end of the file if there is no line-ending sequence there.
589
590 Arguments:
591 p current position in line
592 endptr end of available data
593 lenptr where to put the length of the eol sequence
594
595 Returns: pointer to the last byte of the line
596 */
597
598 static char *
599 end_of_line(char *p, char *endptr, int *lenptr)
600 {
601 switch(endlinetype)
602 {
603 default: /* Just in case */
604 case EL_LF:
605 while (p < endptr && *p != '\n') p++;
606 if (p < endptr)
607 {
608 *lenptr = 1;
609 return p + 1;
610 }
611 *lenptr = 0;
612 return endptr;
613
614 case EL_CR:
615 while (p < endptr && *p != '\r') p++;
616 if (p < endptr)
617 {
618 *lenptr = 1;
619 return p + 1;
620 }
621 *lenptr = 0;
622 return endptr;
623
624 case EL_CRLF:
625 for (;;)
626 {
627 while (p < endptr && *p != '\r') p++;
628 if (++p >= endptr)
629 {
630 *lenptr = 0;
631 return endptr;
632 }
633 if (*p == '\n')
634 {
635 *lenptr = 2;
636 return p + 1;
637 }
638 }
639 break;
640
641 case EL_ANYCRLF:
642 while (p < endptr)
643 {
644 int extra = 0;
645 register int c = *((unsigned char *)p);
646
647 if (utf8 && c >= 0xc0)
648 {
649 int gcii, gcss;
650 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
651 gcss = 6*extra;
652 c = (c & utf8_table3[extra]) << gcss;
653 for (gcii = 1; gcii <= extra; gcii++)
654 {
655 gcss -= 6;
656 c |= (p[gcii] & 0x3f) << gcss;
657 }
658 }
659
660 p += 1 + extra;
661
662 switch (c)
663 {
664 case 0x0a: /* LF */
665 *lenptr = 1;
666 return p;
667
668 case 0x0d: /* CR */
669 if (p < endptr && *p == 0x0a)
670 {
671 *lenptr = 2;
672 p++;
673 }
674 else *lenptr = 1;
675 return p;
676
677 default:
678 break;
679 }
680 } /* End of loop for ANYCRLF case */
681
682 *lenptr = 0; /* Must have hit the end */
683 return endptr;
684
685 case EL_ANY:
686 while (p < endptr)
687 {
688 int extra = 0;
689 register int c = *((unsigned char *)p);
690
691 if (utf8 && c >= 0xc0)
692 {
693 int gcii, gcss;
694 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
695 gcss = 6*extra;
696 c = (c & utf8_table3[extra]) << gcss;
697 for (gcii = 1; gcii <= extra; gcii++)
698 {
699 gcss -= 6;
700 c |= (p[gcii] & 0x3f) << gcss;
701 }
702 }
703
704 p += 1 + extra;
705
706 switch (c)
707 {
708 case 0x0a: /* LF */
709 case 0x0b: /* VT */
710 case 0x0c: /* FF */
711 *lenptr = 1;
712 return p;
713
714 case 0x0d: /* CR */
715 if (p < endptr && *p == 0x0a)
716 {
717 *lenptr = 2;
718 p++;
719 }
720 else *lenptr = 1;
721 return p;
722
723 case 0x85: /* NEL */
724 *lenptr = utf8? 2 : 1;
725 return p;
726
727 case 0x2028: /* LS */
728 case 0x2029: /* PS */
729 *lenptr = 3;
730 return p;
731
732 default:
733 break;
734 }
735 } /* End of loop for ANY case */
736
737 *lenptr = 0; /* Must have hit the end */
738 return endptr;
739 } /* End of overall switch */
740 }
741
742
743
744 /*************************************************
745 * Find start of previous line *
746 *************************************************/
747
748 /* This is called when looking back for before lines to print.
749
750 Arguments:
751 p start of the subsequent line
752 startptr start of available data
753
754 Returns: pointer to the start of the previous line
755 */
756
757 static char *
758 previous_line(char *p, char *startptr)
759 {
760 switch(endlinetype)
761 {
762 default: /* Just in case */
763 case EL_LF:
764 p--;
765 while (p > startptr && p[-1] != '\n') p--;
766 return p;
767
768 case EL_CR:
769 p--;
770 while (p > startptr && p[-1] != '\n') p--;
771 return p;
772
773 case EL_CRLF:
774 for (;;)
775 {
776 p -= 2;
777 while (p > startptr && p[-1] != '\n') p--;
778 if (p <= startptr + 1 || p[-2] == '\r') return p;
779 }
780 return p; /* But control should never get here */
781
782 case EL_ANY:
783 case EL_ANYCRLF:
784 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
785 if (utf8) while ((*p & 0xc0) == 0x80) p--;
786
787 while (p > startptr)
788 {
789 register int c;
790 char *pp = p - 1;
791
792 if (utf8)
793 {
794 int extra = 0;
795 while ((*pp & 0xc0) == 0x80) pp--;
796 c = *((unsigned char *)pp);
797 if (c >= 0xc0)
798 {
799 int gcii, gcss;
800 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
801 gcss = 6*extra;
802 c = (c & utf8_table3[extra]) << gcss;
803 for (gcii = 1; gcii <= extra; gcii++)
804 {
805 gcss -= 6;
806 c |= (pp[gcii] & 0x3f) << gcss;
807 }
808 }
809 }
810 else c = *((unsigned char *)pp);
811
812 if (endlinetype == EL_ANYCRLF) switch (c)
813 {
814 case 0x0a: /* LF */
815 case 0x0d: /* CR */
816 return p;
817
818 default:
819 break;
820 }
821
822 else switch (c)
823 {
824 case 0x0a: /* LF */
825 case 0x0b: /* VT */
826 case 0x0c: /* FF */
827 case 0x0d: /* CR */
828 case 0x85: /* NEL */
829 case 0x2028: /* LS */
830 case 0x2029: /* PS */
831 return p;
832
833 default:
834 break;
835 }
836
837 p = pp; /* Back one character */
838 } /* End of loop for ANY case */
839
840 return startptr; /* Hit start of data */
841 } /* End of overall switch */
842 }
843
844
845
846
847
848 /*************************************************
849 * Print the previous "after" lines *
850 *************************************************/
851
852 /* This is called if we are about to lose said lines because of buffer filling,
853 and at the end of the file. The data in the line is written using fwrite() so
854 that a binary zero does not terminate it.
855
856 Arguments:
857 lastmatchnumber the number of the last matching line, plus one
858 lastmatchrestart where we restarted after the last match
859 endptr end of available data
860 printname filename for printing
861
862 Returns: nothing
863 */
864
865 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
866 char *endptr, char *printname)
867 {
868 if (after_context > 0 && lastmatchnumber > 0)
869 {
870 int count = 0;
871 while (lastmatchrestart < endptr && count++ < after_context)
872 {
873 int ellength;
874 char *pp = lastmatchrestart;
875 if (printname != NULL) fprintf(stdout, "%s-", printname);
876 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
877 pp = end_of_line(pp, endptr, &ellength);
878 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
879 lastmatchrestart = pp;
880 }
881 hyphenpending = TRUE;
882 }
883 }
884
885
886
887 /*************************************************
888 * Apply patterns to subject till one matches *
889 *************************************************/
890
891 /* This function is called to run through all patterns, looking for a match. It
892 is used multiple times for the same subject when colouring is enabled, in order
893 to find all possible matches.
894
895 Arguments:
896 matchptr the start of the subject
897 length the length of the subject to match
898 offsets the offets vector to fill in
899 mrc address of where to put the result of pcre_exec()
900
901 Returns: TRUE if there was a match
902 FALSE if there was no match
903 invert if there was a non-fatal error
904 */
905
906 static BOOL
907 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
908 {
909 int i;
910 for (i = 0; i < pattern_count; i++)
911 {
912 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
913 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
914 if (*mrc >= 0) return TRUE;
915 if (*mrc == PCRE_ERROR_NOMATCH) continue;
916 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
917 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
918 fprintf(stderr, "this text:\n");
919 FWRITE(matchptr, 1, length, stderr); /* In case binary zero included */
920 fprintf(stderr, "\n");
921 if (error_count == 0 &&
922 (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
923 {
924 fprintf(stderr, "pcregrep: error %d means that a resource limit "
925 "was exceeded\n", *mrc);
926 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
927 }
928 if (error_count++ > 20)
929 {
930 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
931 exit(2);
932 }
933 return invert; /* No more matching; don't show the line again */
934 }
935
936 return FALSE; /* No match, no errors */
937 }
938
939
940
941 /*************************************************
942 * Grep an individual file *
943 *************************************************/
944
945 /* This is called from grep_or_recurse() below. It uses a buffer that is three
946 times the value of MBUFTHIRD. The matching point is never allowed to stray into
947 the top third of the buffer, thus keeping more of the file available for
948 context printing or for multiline scanning. For large files, the pointer will
949 be in the middle third most of the time, so the bottom third is available for
950 "before" context printing.
951
952 Arguments:
953 handle the fopened FILE stream for a normal file
954 the gzFile pointer when reading is via libz
955 the BZFILE pointer when reading is via libbz2
956 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
957 printname the file name if it is to be printed for each match
958 or NULL if the file name is not to be printed
959 it cannot be NULL if filenames[_nomatch]_only is set
960
961 Returns: 0 if there was at least one match
962 1 otherwise (no matches)
963 2 if there is a read error on a .bz2 file
964 */
965
966 static int
967 pcregrep(void *handle, int frtype, char *printname)
968 {
969 int rc = 1;
970 int linenumber = 1;
971 int lastmatchnumber = 0;
972 int count = 0;
973 int filepos = 0;
974 int offsets[OFFSET_SIZE];
975 char *lastmatchrestart = NULL;
976 char buffer[3*MBUFTHIRD];
977 char *ptr = buffer;
978 char *endptr;
979 size_t bufflength;
980 BOOL endhyphenpending = FALSE;
981 BOOL input_line_buffered = line_buffered;
982 FILE *in = NULL; /* Ensure initialized */
983
984 #ifdef SUPPORT_LIBZ
985 gzFile ingz = NULL;
986 #endif
987
988 #ifdef SUPPORT_LIBBZ2
989 BZFILE *inbz2 = NULL;
990 #endif
991
992
993 /* Do the first read into the start of the buffer and set up the pointer to end
994 of what we have. In the case of libz, a non-zipped .gz file will be read as a
995 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
996 fail. */
997
998 #ifdef SUPPORT_LIBZ
999 if (frtype == FR_LIBZ)
1000 {
1001 ingz = (gzFile)handle;
1002 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1003 }
1004 else
1005 #endif
1006
1007 #ifdef SUPPORT_LIBBZ2
1008 if (frtype == FR_LIBBZ2)
1009 {
1010 inbz2 = (BZFILE *)handle;
1011 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1012 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1013 } /* without the cast it is unsigned. */
1014 else
1015 #endif
1016
1017 {
1018 in = (FILE *)handle;
1019 if (is_file_tty(in)) input_line_buffered = TRUE;
1020 bufflength = input_line_buffered?
1021 read_one_line(buffer, 3*MBUFTHIRD, in) :
1022 fread(buffer, 1, 3*MBUFTHIRD, in);
1023 }
1024
1025 endptr = buffer + bufflength;
1026
1027 /* Loop while the current pointer is not at the end of the file. For large
1028 files, endptr will be at the end of the buffer when we are in the middle of the
1029 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1030 way, the buffer is shifted left and re-filled. */
1031
1032 while (ptr < endptr)
1033 {
1034 int endlinelength;
1035 int mrc = 0;
1036 BOOL match;
1037 char *matchptr = ptr;
1038 char *t = ptr;
1039 size_t length, linelength;
1040
1041 /* At this point, ptr is at the start of a line. We need to find the length
1042 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1043 length remainder of the data in the buffer. Otherwise, it is the length of
1044 the next line, excluding the terminating newline. After matching, we always
1045 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1046 option is used for compiling, so that any match is constrained to be in the
1047 first line. */
1048
1049 t = end_of_line(t, endptr, &endlinelength);
1050 linelength = t - ptr - endlinelength;
1051 length = multiline? (size_t)(endptr - ptr) : linelength;
1052
1053 /* Extra processing for Jeffrey Friedl's debugging. */
1054
1055 #ifdef JFRIEDL_DEBUG
1056 if (jfriedl_XT || jfriedl_XR)
1057 {
1058 #include <sys/time.h>
1059 #include <time.h>
1060 struct timeval start_time, end_time;
1061 struct timezone dummy;
1062 int i;
1063
1064 if (jfriedl_XT)
1065 {
1066 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1067 const char *orig = ptr;
1068 ptr = malloc(newlen + 1);
1069 if (!ptr) {
1070 printf("out of memory");
1071 exit(2);
1072 }
1073 endptr = ptr;
1074 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1075 for (i = 0; i < jfriedl_XT; i++) {
1076 strncpy(endptr, orig, length);
1077 endptr += length;
1078 }
1079 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1080 length = newlen;
1081 }
1082
1083 if (gettimeofday(&start_time, &dummy) != 0)
1084 perror("bad gettimeofday");
1085
1086
1087 for (i = 0; i < jfriedl_XR; i++)
1088 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1089 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1090
1091 if (gettimeofday(&end_time, &dummy) != 0)
1092 perror("bad gettimeofday");
1093
1094 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1095 -
1096 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1097
1098 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1099 return 0;
1100 }
1101 #endif
1102
1103 /* We come back here after a match when the -o option (only_matching) is set,
1104 in order to find any further matches in the same line. */
1105
1106 ONLY_MATCHING_RESTART:
1107
1108 /* Run through all the patterns until one matches or there is an error other
1109 than NOMATCH. This code is in a subroutine so that it can be re-used for
1110 finding subsequent matches when colouring matched lines. */
1111
1112 match = match_patterns(matchptr, length, offsets, &mrc);
1113
1114 /* If it's a match or a not-match (as required), do what's wanted. */
1115
1116 if (match != invert)
1117 {
1118 BOOL hyphenprinted = FALSE;
1119
1120 /* We've failed if we want a file that doesn't have any matches. */
1121
1122 if (filenames == FN_NOMATCH_ONLY) return 1;
1123
1124 /* Just count if just counting is wanted. */
1125
1126 if (count_only) count++;
1127
1128 /* If all we want is a file name, there is no need to scan any more lines
1129 in the file. */
1130
1131 else if (filenames == FN_MATCH_ONLY)
1132 {
1133 fprintf(stdout, "%s\n", printname);
1134 return 0;
1135 }
1136
1137 /* Likewise, if all we want is a yes/no answer. */
1138
1139 else if (quiet) return 0;
1140
1141 /* The --only-matching option prints just the substring that matched, and
1142 the --file-offsets and --line-offsets options output offsets for the
1143 matching substring (they both force --only-matching). None of these options
1144 prints any context. Afterwards, adjust the start and length, and then jump
1145 back to look for further matches in the same line. If we are in invert
1146 mode, however, nothing is printed - this could be still useful because the
1147 return code is set. */
1148
1149 else if (only_matching)
1150 {
1151 if (!invert)
1152 {
1153 if (printname != NULL) fprintf(stdout, "%s:", printname);
1154 if (number) fprintf(stdout, "%d:", linenumber);
1155 if (line_offsets)
1156 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1157 offsets[1] - offsets[0]);
1158 else if (file_offsets)
1159 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1160 offsets[1] - offsets[0]);
1161 else
1162 {
1163 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1164 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1165 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1166 }
1167 fprintf(stdout, "\n");
1168 matchptr += offsets[1];
1169 length -= offsets[1];
1170 match = FALSE;
1171 goto ONLY_MATCHING_RESTART;
1172 }
1173 }
1174
1175 /* This is the default case when none of the above options is set. We print
1176 the matching lines(s), possibly preceded and/or followed by other lines of
1177 context. */
1178
1179 else
1180 {
1181 /* See if there is a requirement to print some "after" lines from a
1182 previous match. We never print any overlaps. */
1183
1184 if (after_context > 0 && lastmatchnumber > 0)
1185 {
1186 int ellength;
1187 int linecount = 0;
1188 char *p = lastmatchrestart;
1189
1190 while (p < ptr && linecount < after_context)
1191 {
1192 p = end_of_line(p, ptr, &ellength);
1193 linecount++;
1194 }
1195
1196 /* It is important to advance lastmatchrestart during this printing so
1197 that it interacts correctly with any "before" printing below. Print
1198 each line's data using fwrite() in case there are binary zeroes. */
1199
1200 while (lastmatchrestart < p)
1201 {
1202 char *pp = lastmatchrestart;
1203 if (printname != NULL) fprintf(stdout, "%s-", printname);
1204 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1205 pp = end_of_line(pp, endptr, &ellength);
1206 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1207 lastmatchrestart = pp;
1208 }
1209 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1210 }
1211
1212 /* If there were non-contiguous lines printed above, insert hyphens. */
1213
1214 if (hyphenpending)
1215 {
1216 fprintf(stdout, "--\n");
1217 hyphenpending = FALSE;
1218 hyphenprinted = TRUE;
1219 }
1220
1221 /* See if there is a requirement to print some "before" lines for this
1222 match. Again, don't print overlaps. */
1223
1224 if (before_context > 0)
1225 {
1226 int linecount = 0;
1227 char *p = ptr;
1228
1229 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1230 linecount < before_context)
1231 {
1232 linecount++;
1233 p = previous_line(p, buffer);
1234 }
1235
1236 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1237 fprintf(stdout, "--\n");
1238
1239 while (p < ptr)
1240 {
1241 int ellength;
1242 char *pp = p;
1243 if (printname != NULL) fprintf(stdout, "%s-", printname);
1244 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1245 pp = end_of_line(pp, endptr, &ellength);
1246 FWRITE(p, 1, pp - p, stdout);
1247 p = pp;
1248 }
1249 }
1250
1251 /* Now print the matching line(s); ensure we set hyphenpending at the end
1252 of the file if any context lines are being output. */
1253
1254 if (after_context > 0 || before_context > 0)
1255 endhyphenpending = TRUE;
1256
1257 if (printname != NULL) fprintf(stdout, "%s:", printname);
1258 if (number) fprintf(stdout, "%d:", linenumber);
1259
1260 /* In multiline mode, we want to print to the end of the line in which
1261 the end of the matched string is found, so we adjust linelength and the
1262 line number appropriately, but only when there actually was a match
1263 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1264 the match will always be before the first newline sequence. */
1265
1266 if (multiline)
1267 {
1268 int ellength;
1269 char *endmatch = ptr;
1270 if (!invert)
1271 {
1272 endmatch += offsets[1];
1273 t = ptr;
1274 while (t < endmatch)
1275 {
1276 t = end_of_line(t, endptr, &ellength);
1277 if (t <= endmatch) linenumber++; else break;
1278 }
1279 }
1280 endmatch = end_of_line(endmatch, endptr, &ellength);
1281 linelength = endmatch - ptr - ellength;
1282 }
1283
1284 /*** NOTE: Use only fwrite() to output the data line, so that binary
1285 zeroes are treated as just another data character. */
1286
1287 /* This extra option, for Jeffrey Friedl's debugging requirements,
1288 replaces the matched string, or a specific captured string if it exists,
1289 with X. When this happens, colouring is ignored. */
1290
1291 #ifdef JFRIEDL_DEBUG
1292 if (S_arg >= 0 && S_arg < mrc)
1293 {
1294 int first = S_arg * 2;
1295 int last = first + 1;
1296 FWRITE(ptr, 1, offsets[first], stdout);
1297 fprintf(stdout, "X");
1298 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1299 }
1300 else
1301 #endif
1302
1303 /* We have to split the line(s) up if colouring, and search for further
1304 matches. */
1305
1306 if (do_colour)
1307 {
1308 int last_offset = 0;
1309 FWRITE(ptr, 1, offsets[0], stdout);
1310 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1311 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1312 fprintf(stdout, "%c[00m", 0x1b);
1313 for (;;)
1314 {
1315 last_offset += offsets[1];
1316 matchptr += offsets[1];
1317 length -= offsets[1];
1318 if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1319 FWRITE(matchptr, 1, offsets[0], stdout);
1320 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1321 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1322 fprintf(stdout, "%c[00m", 0x1b);
1323 }
1324 FWRITE(ptr + last_offset, 1,
1325 (linelength + endlinelength) - last_offset, stdout);
1326 }
1327
1328 /* Not colouring; no need to search for further matches */
1329
1330 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1331 }
1332
1333 /* End of doing what has to be done for a match. If --line-buffered was
1334 given, flush the output. */
1335
1336 if (line_buffered) fflush(stdout);
1337 rc = 0; /* Had some success */
1338
1339 /* Remember where the last match happened for after_context. We remember
1340 where we are about to restart, and that line's number. */
1341
1342 lastmatchrestart = ptr + linelength + endlinelength;
1343 lastmatchnumber = linenumber + 1;
1344 }
1345
1346 /* For a match in multiline inverted mode (which of course did not cause
1347 anything to be printed), we have to move on to the end of the match before
1348 proceeding. */
1349
1350 if (multiline && invert && match)
1351 {
1352 int ellength;
1353 char *endmatch = ptr + offsets[1];
1354 t = ptr;
1355 while (t < endmatch)
1356 {
1357 t = end_of_line(t, endptr, &ellength);
1358 if (t <= endmatch) linenumber++; else break;
1359 }
1360 endmatch = end_of_line(endmatch, endptr, &ellength);
1361 linelength = endmatch - ptr - ellength;
1362 }
1363
1364 /* Advance to after the newline and increment the line number. The file
1365 offset to the current line is maintained in filepos. */
1366
1367 ptr += linelength + endlinelength;
1368 filepos += linelength + endlinelength;
1369 linenumber++;
1370
1371 /* If input is line buffered, and the buffer is not yet full, read another
1372 line and add it into the buffer. */
1373
1374 if (input_line_buffered && bufflength < sizeof(buffer))
1375 {
1376 int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1377 bufflength += add;
1378 endptr += add;
1379 }
1380
1381 /* If we haven't yet reached the end of the file (the buffer is full), and
1382 the current point is in the top 1/3 of the buffer, slide the buffer down by
1383 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1384 about to be lost, print them. */
1385
1386 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1387 {
1388 if (after_context > 0 &&
1389 lastmatchnumber > 0 &&
1390 lastmatchrestart < buffer + MBUFTHIRD)
1391 {
1392 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1393 lastmatchnumber = 0;
1394 }
1395
1396 /* Now do the shuffle */
1397
1398 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1399 ptr -= MBUFTHIRD;
1400
1401 #ifdef SUPPORT_LIBZ
1402 if (frtype == FR_LIBZ)
1403 bufflength = 2*MBUFTHIRD +
1404 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1405 else
1406 #endif
1407
1408 #ifdef SUPPORT_LIBBZ2
1409 if (frtype == FR_LIBBZ2)
1410 bufflength = 2*MBUFTHIRD +
1411 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1412 else
1413 #endif
1414
1415 bufflength = 2*MBUFTHIRD +
1416 (input_line_buffered?
1417 read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1418 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1419 endptr = buffer + bufflength;
1420
1421 /* Adjust any last match point */
1422
1423 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1424 }
1425 } /* Loop through the whole file */
1426
1427 /* End of file; print final "after" lines if wanted; do_after_lines sets
1428 hyphenpending if it prints something. */
1429
1430 if (!only_matching && !count_only)
1431 {
1432 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1433 hyphenpending |= endhyphenpending;
1434 }
1435
1436 /* Print the file name if we are looking for those without matches and there
1437 were none. If we found a match, we won't have got this far. */
1438
1439 if (filenames == FN_NOMATCH_ONLY)
1440 {
1441 fprintf(stdout, "%s\n", printname);
1442 return 0;
1443 }
1444
1445 /* Print the match count if wanted */
1446
1447 if (count_only)
1448 {
1449 if (count > 0 || !omit_zero_count)
1450 {
1451 if (printname != NULL && filenames != FN_NONE)
1452 fprintf(stdout, "%s:", printname);
1453 fprintf(stdout, "%d\n", count);
1454 }
1455 }
1456
1457 return rc;
1458 }
1459
1460
1461
1462 /*************************************************
1463 * Grep a file or recurse into a directory *
1464 *************************************************/
1465
1466 /* Given a path name, if it's a directory, scan all the files if we are
1467 recursing; if it's a file, grep it.
1468
1469 Arguments:
1470 pathname the path to investigate
1471 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1472 only_one_at_top TRUE if the path is the only one at toplevel
1473
1474 Returns: 0 if there was at least one match
1475 1 if there were no matches
1476 2 there was some kind of error
1477
1478 However, file opening failures are suppressed if "silent" is set.
1479 */
1480
1481 static int
1482 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1483 {
1484 int rc = 1;
1485 int sep;
1486 int frtype;
1487 int pathlen;
1488 void *handle;
1489 FILE *in = NULL; /* Ensure initialized */
1490
1491 #ifdef SUPPORT_LIBZ
1492 gzFile ingz = NULL;
1493 #endif
1494
1495 #ifdef SUPPORT_LIBBZ2
1496 BZFILE *inbz2 = NULL;
1497 #endif
1498
1499 /* If the file name is "-" we scan stdin */
1500
1501 if (strcmp(pathname, "-") == 0)
1502 {
1503 return pcregrep(stdin, FR_PLAIN,
1504 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1505 stdin_name : NULL);
1506 }
1507
1508 /* If the file is a directory, skip if skipping or if we are recursing, scan
1509 each file and directory within it, subject to any include or exclude patterns
1510 that were set. The scanning code is localized so it can be made
1511 system-specific. */
1512
1513 if ((sep = isdirectory(pathname)) != 0)
1514 {
1515 if (dee_action == dee_SKIP) return 1;
1516 if (dee_action == dee_RECURSE)
1517 {
1518 char buffer[1024];
1519 char *nextfile;
1520 directory_type *dir = opendirectory(pathname);
1521
1522 if (dir == NULL)
1523 {
1524 if (!silent)
1525 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1526 strerror(errno));
1527 return 2;
1528 }
1529
1530 while ((nextfile = readdirectory(dir)) != NULL)
1531 {
1532 int frc, nflen;
1533 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1534 nflen = strlen(nextfile);
1535
1536 if (isdirectory(buffer))
1537 {
1538 if (exclude_dir_compiled != NULL &&
1539 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1540 continue;
1541
1542 if (include_dir_compiled != NULL &&
1543 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1544 continue;
1545 }
1546 else
1547 {
1548 if (exclude_compiled != NULL &&
1549 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1550 continue;
1551
1552 if (include_compiled != NULL &&
1553 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1554 continue;
1555 }
1556
1557 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1558 if (frc > 1) rc = frc;
1559 else if (frc == 0 && rc == 1) rc = 0;
1560 }
1561
1562 closedirectory(dir);
1563 return rc;
1564 }
1565 }
1566
1567 /* If the file is not a directory and not a regular file, skip it if that's
1568 been requested. */
1569
1570 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1571
1572 /* Control reaches here if we have a regular file, or if we have a directory
1573 and recursion or skipping was not requested, or if we have anything else and
1574 skipping was not requested. The scan proceeds. If this is the first and only
1575 argument at top level, we don't show the file name, unless we are only showing
1576 the file name, or the filename was forced (-H). */
1577
1578 pathlen = strlen(pathname);
1579
1580 /* Open using zlib if it is supported and the file name ends with .gz. */
1581
1582 #ifdef SUPPORT_LIBZ
1583 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1584 {
1585 ingz = gzopen(pathname, "rb");
1586 if (ingz == NULL)
1587 {
1588 if (!silent)
1589 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1590 strerror(errno));
1591 return 2;
1592 }
1593 handle = (void *)ingz;
1594 frtype = FR_LIBZ;
1595 }
1596 else
1597 #endif
1598
1599 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1600
1601 #ifdef SUPPORT_LIBBZ2
1602 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1603 {
1604 inbz2 = BZ2_bzopen(pathname, "rb");
1605 handle = (void *)inbz2;
1606 frtype = FR_LIBBZ2;
1607 }
1608 else
1609 #endif
1610
1611 /* Otherwise use plain fopen(). The label is so that we can come back here if
1612 an attempt to read a .bz2 file indicates that it really is a plain file. */
1613
1614 #ifdef SUPPORT_LIBBZ2
1615 PLAIN_FILE:
1616 #endif
1617 {
1618 in = fopen(pathname, "rb");
1619 handle = (void *)in;
1620 frtype = FR_PLAIN;
1621 }
1622
1623 /* All the opening methods return errno when they fail. */
1624
1625 if (handle == NULL)
1626 {
1627 if (!silent)
1628 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1629 strerror(errno));
1630 return 2;
1631 }
1632
1633 /* Now grep the file */
1634
1635 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1636 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1637
1638 /* Close in an appropriate manner. */
1639
1640 #ifdef SUPPORT_LIBZ
1641 if (frtype == FR_LIBZ)
1642 gzclose(ingz);
1643 else
1644 #endif
1645
1646 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1647 read failed. If the error indicates that the file isn't in fact bzipped, try
1648 again as a normal file. */
1649
1650 #ifdef SUPPORT_LIBBZ2
1651 if (frtype == FR_LIBBZ2)
1652 {
1653 if (rc == 2)
1654 {
1655 int errnum;
1656 const char *err = BZ2_bzerror(inbz2, &errnum);
1657 if (errnum == BZ_DATA_ERROR_MAGIC)
1658 {
1659 BZ2_bzclose(inbz2);
1660 goto PLAIN_FILE;
1661 }
1662 else if (!silent)
1663 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1664 pathname, err);
1665 }
1666 BZ2_bzclose(inbz2);
1667 }
1668 else
1669 #endif
1670
1671 /* Normal file close */
1672
1673 fclose(in);
1674
1675 /* Pass back the yield from pcregrep(). */
1676
1677 return rc;
1678 }
1679
1680
1681
1682
1683 /*************************************************
1684 * Usage function *
1685 *************************************************/
1686
1687 static int
1688 usage(int rc)
1689 {
1690 option_item *op;
1691 fprintf(stderr, "Usage: pcregrep [-");
1692 for (op = optionlist; op->one_char != 0; op++)
1693 {
1694 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1695 }
1696 fprintf(stderr, "] [long options] [pattern] [files]\n");
1697 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1698 "options.\n");
1699 return rc;
1700 }
1701
1702
1703
1704
1705 /*************************************************
1706 * Help function *
1707 *************************************************/
1708
1709 static void
1710 help(void)
1711 {
1712 option_item *op;
1713
1714 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1715 printf("Search for PATTERN in each FILE or standard input.\n");
1716 printf("PATTERN must be present if neither -e nor -f is used.\n");
1717 printf("\"-\" can be used as a file name to mean STDIN.\n");
1718
1719 #ifdef SUPPORT_LIBZ
1720 printf("Files whose names end in .gz are read using zlib.\n");
1721 #endif
1722
1723 #ifdef SUPPORT_LIBBZ2
1724 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1725 #endif
1726
1727 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1728 printf("Other files and the standard input are read as plain files.\n\n");
1729 #else
1730 printf("All files are read as plain files, without any interpretation.\n\n");
1731 #endif
1732
1733 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1734 printf("Options:\n");
1735
1736 for (op = optionlist; op->one_char != 0; op++)
1737 {
1738 int n;
1739 char s[4];
1740 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1741 n = 30 - printf(" %s --%s", s, op->long_name);
1742 if (n < 1) n = 1;
1743 printf("%.*s%s\n", n, " ", op->help_text);
1744 }
1745
1746 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1747 printf("trailing white space is removed and blank lines are ignored.\n");
1748 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1749
1750 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1751 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1752 }
1753
1754
1755
1756
1757 /*************************************************
1758 * Handle a single-letter, no data option *
1759 *************************************************/
1760
1761 static int
1762 handle_option(int letter, int options)
1763 {
1764 switch(letter)
1765 {
1766 case N_FOFFSETS: file_offsets = TRUE; break;
1767 case N_HELP: help(); exit(0);
1768 case N_LOFFSETS: line_offsets = number = TRUE; break;
1769 case N_LBUFFER: line_buffered = TRUE; break;
1770 case 'c': count_only = TRUE; break;
1771 case 'F': process_options |= PO_FIXED_STRINGS; break;
1772 case 'H': filenames = FN_FORCE; break;
1773 case 'h': filenames = FN_NONE; break;
1774 case 'i': options |= PCRE_CASELESS; break;
1775 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1776 case 'L': filenames = FN_NOMATCH_ONLY; break;
1777 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1778 case 'n': number = TRUE; break;
1779 case 'o': only_matching = TRUE; break;
1780 case 'q': quiet = TRUE; break;
1781 case 'r': dee_action = dee_RECURSE; break;
1782 case 's': silent = TRUE; break;
1783 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1784 case 'v': invert = TRUE; break;
1785 case 'w': process_options |= PO_WORD_MATCH; break;
1786 case 'x': process_options |= PO_LINE_MATCH; break;
1787
1788 case 'V':
1789 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1790 exit(0);
1791 break;
1792
1793 default:
1794 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1795 exit(usage(2));
1796 }
1797
1798 return options;
1799 }
1800
1801
1802
1803
1804 /*************************************************
1805 * Construct printed ordinal *
1806 *************************************************/
1807
1808 /* This turns a number into "1st", "3rd", etc. */
1809
1810 static char *
1811 ordin(int n)
1812 {
1813 static char buffer[8];
1814 char *p = buffer;
1815 sprintf(p, "%d", n);
1816 while (*p != 0) p++;
1817 switch (n%10)
1818 {
1819 case 1: strcpy(p, "st"); break;
1820 case 2: strcpy(p, "nd"); break;
1821 case 3: strcpy(p, "rd"); break;
1822 default: strcpy(p, "th"); break;
1823 }
1824 return buffer;
1825 }
1826
1827
1828
1829 /*************************************************
1830 * Compile a single pattern *
1831 *************************************************/
1832
1833 /* When the -F option has been used, this is called for each substring.
1834 Otherwise it's called for each supplied pattern.
1835
1836 Arguments:
1837 pattern the pattern string
1838 options the PCRE options
1839 filename the file name, or NULL for a command-line pattern
1840 count 0 if this is the only command line pattern, or
1841 number of the command line pattern, or
1842 linenumber for a pattern from a file
1843
1844 Returns: TRUE on success, FALSE after an error
1845 */
1846
1847 static BOOL
1848 compile_single_pattern(char *pattern, int options, char *filename, int count)
1849 {
1850 char buffer[MBUFTHIRD + 16];
1851 const char *error;
1852 int errptr;
1853
1854 if (pattern_count >= MAX_PATTERN_COUNT)
1855 {
1856 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1857 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1858 return FALSE;
1859 }
1860
1861 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1862 suffix[process_options]);
1863 pattern_list[pattern_count] =
1864 pcre_compile(buffer, options, &error, &errptr, pcretables);
1865 if (pattern_list[pattern_count] != NULL)
1866 {
1867 pattern_count++;
1868 return TRUE;
1869 }
1870
1871 /* Handle compile errors */
1872
1873 errptr -= (int)strlen(prefix[process_options]);
1874 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1875
1876 if (filename == NULL)
1877 {
1878 if (count == 0)
1879 fprintf(stderr, "pcregrep: Error in command-line regex "
1880 "at offset %d: %s\n", errptr, error);
1881 else
1882 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1883 "at offset %d: %s\n", ordin(count), errptr, error);
1884 }
1885 else
1886 {
1887 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1888 "at offset %d: %s\n", count, filename, errptr, error);
1889 }
1890
1891 return FALSE;
1892 }
1893
1894
1895
1896 /*************************************************
1897 * Compile one supplied pattern *
1898 *************************************************/
1899
1900 /* When the -F option has been used, each string may be a list of strings,
1901 separated by line breaks. They will be matched literally.
1902
1903 Arguments:
1904 pattern the pattern string
1905 options the PCRE options
1906 filename the file name, or NULL for a command-line pattern
1907 count 0 if this is the only command line pattern, or
1908 number of the command line pattern, or
1909 linenumber for a pattern from a file
1910
1911 Returns: TRUE on success, FALSE after an error
1912 */
1913
1914 static BOOL
1915 compile_pattern(char *pattern, int options, char *filename, int count)
1916 {
1917 if ((process_options & PO_FIXED_STRINGS) != 0)
1918 {
1919 char *eop = pattern + strlen(pattern);
1920 char buffer[MBUFTHIRD];
1921 for(;;)
1922 {
1923 int ellength;
1924 char *p = end_of_line(pattern, eop, &ellength);
1925 if (ellength == 0)
1926 return compile_single_pattern(pattern, options, filename, count);
1927 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1928 pattern = p;
1929 if (!compile_single_pattern(buffer, options, filename, count))
1930 return FALSE;
1931 }
1932 }
1933 else return compile_single_pattern(pattern, options, filename, count);
1934 }
1935
1936
1937
1938 /*************************************************
1939 * Main program *
1940 *************************************************/
1941
1942 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1943
1944 int
1945 main(int argc, char **argv)
1946 {
1947 int i, j;
1948 int rc = 1;
1949 int pcre_options = 0;
1950 int cmd_pattern_count = 0;
1951 int hint_count = 0;
1952 int errptr;
1953 BOOL only_one_at_top;
1954 char *patterns[MAX_PATTERN_COUNT];
1955 const char *locale_from = "--locale";
1956 const char *error;
1957
1958 /* Set the default line ending value from the default in the PCRE library;
1959 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1960 Note that the return values from pcre_config(), though derived from the ASCII
1961 codes, are the same in EBCDIC environments, so we must use the actual values
1962 rather than escapes such as as '\r'. */
1963
1964 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1965 switch(i)
1966 {
1967 default: newline = (char *)"lf"; break;
1968 case 13: newline = (char *)"cr"; break;
1969 case (13 << 8) | 10: newline = (char *)"crlf"; break;
1970 case -1: newline = (char *)"any"; break;
1971 case -2: newline = (char *)"anycrlf"; break;
1972 }
1973
1974 /* Process the options */
1975
1976 for (i = 1; i < argc; i++)
1977 {
1978 option_item *op = NULL;
1979 char *option_data = (char *)""; /* default to keep compiler happy */
1980 BOOL longop;
1981 BOOL longopwasequals = FALSE;
1982
1983 if (argv[i][0] != '-') break;
1984
1985 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1986 but only if we have previously had -e or -f to define the patterns. */
1987
1988 if (argv[i][1] == 0)
1989 {
1990 if (pattern_filename != NULL || pattern_count > 0) break;
1991 else exit(usage(2));
1992 }
1993
1994 /* Handle a long name option, or -- to terminate the options */
1995
1996 if (argv[i][1] == '-')
1997 {
1998 char *arg = argv[i] + 2;
1999 char *argequals = strchr(arg, '=');
2000
2001 if (*arg == 0) /* -- terminates options */
2002 {
2003 i++;
2004 break; /* out of the options-handling loop */
2005 }
2006
2007 longop = TRUE;
2008
2009 /* Some long options have data that follows after =, for example file=name.
2010 Some options have variations in the long name spelling: specifically, we
2011 allow "regexp" because GNU grep allows it, though I personally go along
2012 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2013 These options are entered in the table as "regex(p)". Options can be in
2014 both these categories. */
2015
2016 for (op = optionlist; op->one_char != 0; op++)
2017 {
2018 char *opbra = strchr(op->long_name, '(');
2019 char *equals = strchr(op->long_name, '=');
2020
2021 /* Handle options with only one spelling of the name */
2022
2023 if (opbra == NULL) /* Does not contain '(' */
2024 {
2025 if (equals == NULL) /* Not thing=data case */
2026 {
2027 if (strcmp(arg, op->long_name) == 0) break;
2028 }
2029 else /* Special case xxx=data */
2030 {
2031 int oplen = equals - op->long_name;
2032 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
2033 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2034 {
2035 option_data = arg + arglen;
2036 if (*option_data == '=')
2037 {
2038 option_data++;
2039 longopwasequals = TRUE;
2040 }
2041 break;
2042 }
2043 }
2044 }
2045
2046 /* Handle options with an alternate spelling of the name */
2047
2048 else
2049 {
2050 char buff1[24];
2051 char buff2[24];
2052
2053 int baselen = opbra - op->long_name;
2054 int fulllen = strchr(op->long_name, ')') - op->long_name + 1;
2055 int arglen = (argequals == NULL || equals == NULL)?
2056 (int)strlen(arg) : argequals - arg;
2057
2058 sprintf(buff1, "%.*s", baselen, op->long_name);
2059 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2060
2061 if (strncmp(arg, buff1, arglen) == 0 ||
2062 strncmp(arg, buff2, arglen) == 0)
2063 {
2064 if (equals != NULL && argequals != NULL)
2065 {
2066 option_data = argequals;
2067 if (*option_data == '=')
2068 {
2069 option_data++;
2070 longopwasequals = TRUE;
2071 }
2072 }
2073 break;
2074 }
2075 }
2076 }
2077
2078 if (op->one_char == 0)
2079 {
2080 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2081 exit(usage(2));
2082 }
2083 }
2084
2085 /* Jeffrey Friedl's debugging harness uses these additional options which
2086 are not in the right form for putting in the option table because they use
2087 only one hyphen, yet are more than one character long. By putting them
2088 separately here, they will not get displayed as part of the help() output,
2089 but I don't think Jeffrey will care about that. */
2090
2091 #ifdef JFRIEDL_DEBUG
2092 else if (strcmp(argv[i], "-pre") == 0) {
2093 jfriedl_prefix = argv[++i];
2094 continue;
2095 } else if (strcmp(argv[i], "-post") == 0) {
2096 jfriedl_postfix = argv[++i];
2097 continue;
2098 } else if (strcmp(argv[i], "-XT") == 0) {
2099 sscanf(argv[++i], "%d", &jfriedl_XT);
2100 continue;
2101 } else if (strcmp(argv[i], "-XR") == 0) {
2102 sscanf(argv[++i], "%d", &jfriedl_XR);
2103 continue;
2104 }
2105 #endif
2106
2107
2108 /* One-char options; many that have no data may be in a single argument; we
2109 continue till we hit the last one or one that needs data. */
2110
2111 else
2112 {
2113 char *s = argv[i] + 1;
2114 longop = FALSE;
2115 while (*s != 0)
2116 {
2117 for (op = optionlist; op->one_char != 0; op++)
2118 { if (*s == op->one_char) break; }
2119 if (op->one_char == 0)
2120 {
2121 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2122 *s, argv[i]);
2123 exit(usage(2));
2124 }
2125 if (op->type != OP_NODATA || s[1] == 0)
2126 {
2127 option_data = s+1;
2128 break;
2129 }
2130 pcre_options = handle_option(*s++, pcre_options);
2131 }
2132 }
2133
2134 /* At this point we should have op pointing to a matched option. If the type
2135 is NO_DATA, it means that there is no data, and the option might set
2136 something in the PCRE options. */
2137
2138 if (op->type == OP_NODATA)
2139 {
2140 pcre_options = handle_option(op->one_char, pcre_options);
2141 continue;
2142 }
2143
2144 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2145 either has a value or defaults to something. It cannot have data in a
2146 separate item. At the moment, the only such options are "colo(u)r" and
2147 Jeffrey Friedl's special -S debugging option. */
2148
2149 if (*option_data == 0 &&
2150 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2151 {
2152 switch (op->one_char)
2153 {
2154 case N_COLOUR:
2155 colour_option = (char *)"auto";
2156 break;
2157 #ifdef JFRIEDL_DEBUG
2158 case 'S':
2159 S_arg = 0;
2160 break;
2161 #endif
2162 }
2163 continue;
2164 }
2165
2166 /* Otherwise, find the data string for the option. */
2167
2168 if (*option_data == 0)
2169 {
2170 if (i >= argc - 1 || longopwasequals)
2171 {
2172 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2173 exit(usage(2));
2174 }
2175 option_data = argv[++i];
2176 }
2177
2178 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2179 multiple times to create a list of patterns. */
2180
2181 if (op->type == OP_PATLIST)
2182 {
2183 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2184 {
2185 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2186 MAX_PATTERN_COUNT);
2187 return 2;
2188 }
2189 patterns[cmd_pattern_count++] = option_data;
2190 }
2191
2192 /* Otherwise, deal with single string or numeric data values. */
2193
2194 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2195 {
2196 *((char **)op->dataptr) = option_data;
2197 }
2198 else
2199 {
2200 char *endptr;
2201 int n = strtoul(option_data, &endptr, 10);
2202 if (*endptr != 0)
2203 {
2204 if (longop)
2205 {
2206 char *equals = strchr(op->long_name, '=');
2207 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2208 equals - op->long_name;
2209 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2210 option_data, nlen, op->long_name);
2211 }
2212 else
2213 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2214 option_data, op->one_char);
2215 exit(usage(2));
2216 }
2217 *((int *)op->dataptr) = n;
2218 }
2219 }
2220
2221 /* Options have been decoded. If -C was used, its value is used as a default
2222 for -A and -B. */
2223
2224 if (both_context > 0)
2225 {
2226 if (after_context == 0) after_context = both_context;
2227 if (before_context == 0) before_context = both_context;
2228 }
2229
2230 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2231 However, the latter two set the only_matching flag. */
2232
2233 if ((only_matching && (file_offsets || line_offsets)) ||
2234 (file_offsets && line_offsets))
2235 {
2236 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2237 "and/or --line-offsets\n");
2238 exit(usage(2));
2239 }
2240
2241 if (file_offsets || line_offsets) only_matching = TRUE;
2242
2243 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2244 LC_ALL environment variable is set, and if so, use it. */
2245
2246 if (locale == NULL)
2247 {
2248 locale = getenv("LC_ALL");
2249 locale_from = "LCC_ALL";
2250 }
2251
2252 if (locale == NULL)
2253 {
2254 locale = getenv("LC_CTYPE");
2255 locale_from = "LC_CTYPE";
2256 }
2257
2258 /* If a locale has been provided, set it, and generate the tables the PCRE
2259 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2260
2261 if (locale != NULL)
2262 {
2263 if (setlocale(LC_CTYPE, locale) == NULL)
2264 {
2265 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2266 locale, locale_from);
2267 return 2;
2268 }
2269 pcretables = pcre_maketables();
2270 }
2271
2272 /* Sort out colouring */
2273
2274 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2275 {
2276 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2277 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2278 else
2279 {
2280 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2281 colour_option);
2282 return 2;
2283 }
2284 if (do_colour)
2285 {
2286 char *cs = getenv("PCREGREP_COLOUR");
2287 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2288 if (cs != NULL) colour_string = cs;
2289 }
2290 }
2291
2292 /* Interpret the newline type; the default settings are Unix-like. */
2293
2294 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2295 {
2296 pcre_options |= PCRE_NEWLINE_CR;
2297 endlinetype = EL_CR;
2298 }
2299 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2300 {
2301 pcre_options |= PCRE_NEWLINE_LF;
2302 endlinetype = EL_LF;
2303 }
2304 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2305 {
2306 pcre_options |= PCRE_NEWLINE_CRLF;
2307 endlinetype = EL_CRLF;
2308 }
2309 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2310 {
2311 pcre_options |= PCRE_NEWLINE_ANY;
2312 endlinetype = EL_ANY;
2313 }
2314 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2315 {
2316 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2317 endlinetype = EL_ANYCRLF;
2318 }
2319 else
2320 {
2321 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2322 return 2;
2323 }
2324
2325 /* Interpret the text values for -d and -D */
2326
2327 if (dee_option != NULL)
2328 {
2329 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2330 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2331 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2332 else
2333 {
2334 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2335 return 2;
2336 }
2337 }
2338
2339 if (DEE_option != NULL)
2340 {
2341 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2342 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2343 else
2344 {
2345 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2346 return 2;
2347 }
2348 }
2349
2350 /* Check the values for Jeffrey Friedl's debugging options. */
2351
2352 #ifdef JFRIEDL_DEBUG
2353 if (S_arg > 9)
2354 {
2355 fprintf(stderr, "pcregrep: bad value for -S option\n");
2356 return 2;
2357 }
2358 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2359 {
2360 if (jfriedl_XT == 0) jfriedl_XT = 1;
2361 if (jfriedl_XR == 0) jfriedl_XR = 1;
2362 }
2363 #endif
2364
2365 /* Get memory to store the pattern and hints lists. */
2366
2367 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2368 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2369
2370 if (pattern_list == NULL || hints_list == NULL)
2371 {
2372 fprintf(stderr, "pcregrep: malloc failed\n");
2373 goto EXIT2;
2374 }
2375
2376 /* If no patterns were provided by -e, and there is no file provided by -f,
2377 the first argument is the one and only pattern, and it must exist. */
2378
2379 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2380 {
2381 if (i >= argc) return usage(2);
2382 patterns[cmd_pattern_count++] = argv[i++];
2383 }
2384
2385 /* Compile the patterns that were provided on the command line, either by
2386 multiple uses of -e or as a single unkeyed pattern. */
2387
2388 for (j = 0; j < cmd_pattern_count; j++)
2389 {
2390 if (!compile_pattern(patterns[j], pcre_options, NULL,
2391 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2392 goto EXIT2;
2393 }
2394
2395 /* Compile the regular expressions that are provided in a file. */
2396
2397 if (pattern_filename != NULL)
2398 {
2399 int linenumber = 0;
2400 FILE *f;
2401 char *filename;
2402 char buffer[MBUFTHIRD];
2403
2404 if (strcmp(pattern_filename, "-") == 0)
2405 {
2406 f = stdin;
2407 filename = stdin_name;
2408 }
2409 else
2410 {
2411 f = fopen(pattern_filename, "r");
2412 if (f == NULL)
2413 {
2414 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2415 strerror(errno));
2416 goto EXIT2;
2417 }
2418 filename = pattern_filename;
2419 }
2420
2421 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2422 {
2423 char *s = buffer + (int)strlen(buffer);
2424 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2425 *s = 0;
2426 linenumber++;
2427 if (buffer[0] == 0) continue; /* Skip blank lines */
2428 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2429 goto EXIT2;
2430 }
2431
2432 if (f != stdin) fclose(f);
2433 }
2434
2435 /* Study the regular expressions, as we will be running them many times */
2436
2437 for (j = 0; j < pattern_count; j++)
2438 {
2439 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2440 if (error != NULL)
2441 {
2442 char s[16];
2443 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2444 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2445 goto EXIT2;
2446 }
2447 hint_count++;
2448 }
2449
2450 /* If there are include or exclude patterns, compile them. */
2451
2452 if (exclude_pattern != NULL)
2453 {
2454 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2455 pcretables);
2456 if (exclude_compiled == NULL)
2457 {
2458 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2459 errptr, error);
2460 goto EXIT2;
2461 }
2462 }
2463
2464 if (include_pattern != NULL)
2465 {
2466 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2467 pcretables);
2468 if (include_compiled == NULL)
2469 {
2470 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2471 errptr, error);
2472 goto EXIT2;
2473 }
2474 }
2475
2476 if (exclude_dir_pattern != NULL)
2477 {
2478 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2479 pcretables);
2480 if (exclude_dir_compiled == NULL)
2481 {
2482 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2483 errptr, error);
2484 goto EXIT2;
2485 }
2486 }
2487
2488 if (include_dir_pattern != NULL)
2489 {
2490 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2491 pcretables);
2492 if (include_dir_compiled == NULL)
2493 {
2494 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2495 errptr, error);
2496 goto EXIT2;
2497 }
2498 }
2499
2500 /* If there are no further arguments, do the business on stdin and exit. */
2501
2502 if (i >= argc)
2503 {
2504 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2505 goto EXIT;
2506 }
2507
2508 /* Otherwise, work through the remaining arguments as files or directories.
2509 Pass in the fact that there is only one argument at top level - this suppresses
2510 the file name if the argument is not a directory and filenames are not
2511 otherwise forced. */
2512
2513 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2514
2515 for (; i < argc; i++)
2516 {
2517 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2518 only_one_at_top);
2519 if (frc > 1) rc = frc;
2520 else if (frc == 0 && rc == 1) rc = 0;
2521 }
2522
2523 EXIT:
2524 if (pattern_list != NULL)
2525 {
2526 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2527 free(pattern_list);
2528 }
2529 if (hints_list != NULL)
2530 {
2531 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2532 free(hints_list);
2533 }
2534 return rc;
2535
2536 EXIT2:
2537 rc = 2;
2538 goto EXIT;
2539 }
2540
2541 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12