/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 564 - (show annotations) (download)
Sun Oct 31 16:07:24 2010 UTC (3 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 74611 byte(s)
Fix bad return code from pcregrep when -o is used (yielded 1, not 0, after a 
match).

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2010 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define MBUFTHIRD BUFSIZ
78 #else
79 #define MBUFTHIRD 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117 /*************************************************
118 * Global variables *
119 *************************************************/
120
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131
132 static int endlinetype;
133
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *newline = NULL;
139 static char *pattern_filename = NULL;
140 static char *stdin_name = (char *)"(standard input)";
141 static char *locale = NULL;
142
143 static const unsigned char *pcretables = NULL;
144
145 static int pattern_count = 0;
146 static pcre **pattern_list = NULL;
147 static pcre_extra **hints_list = NULL;
148
149 static char *include_pattern = NULL;
150 static char *exclude_pattern = NULL;
151 static char *include_dir_pattern = NULL;
152 static char *exclude_dir_pattern = NULL;
153
154 static pcre *include_compiled = NULL;
155 static pcre *exclude_compiled = NULL;
156 static pcre *include_dir_compiled = NULL;
157 static pcre *exclude_dir_compiled = NULL;
158
159 static int after_context = 0;
160 static int before_context = 0;
161 static int both_context = 0;
162 static int dee_action = dee_READ;
163 static int DEE_action = DEE_READ;
164 static int error_count = 0;
165 static int filenames = FN_DEFAULT;
166 static int process_options = 0;
167
168 static unsigned long int match_limit = 0;
169 static unsigned long int match_limit_recursion = 0;
170
171 static BOOL count_only = FALSE;
172 static BOOL do_colour = FALSE;
173 static BOOL file_offsets = FALSE;
174 static BOOL hyphenpending = FALSE;
175 static BOOL invert = FALSE;
176 static BOOL line_buffered = FALSE;
177 static BOOL line_offsets = FALSE;
178 static BOOL multiline = FALSE;
179 static BOOL number = FALSE;
180 static BOOL omit_zero_count = FALSE;
181 static BOOL only_matching = FALSE;
182 static BOOL resource_error = FALSE;
183 static BOOL quiet = FALSE;
184 static BOOL silent = FALSE;
185 static BOOL utf8 = FALSE;
186
187 /* Structure for options and list of them */
188
189 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
190 OP_PATLIST };
191
192 typedef struct option_item {
193 int type;
194 int one_char;
195 void *dataptr;
196 const char *long_name;
197 const char *help_text;
198 } option_item;
199
200 /* Options without a single-letter equivalent get a negative value. This can be
201 used to identify them. */
202
203 #define N_COLOUR (-1)
204 #define N_EXCLUDE (-2)
205 #define N_EXCLUDE_DIR (-3)
206 #define N_HELP (-4)
207 #define N_INCLUDE (-5)
208 #define N_INCLUDE_DIR (-6)
209 #define N_LABEL (-7)
210 #define N_LOCALE (-8)
211 #define N_NULL (-9)
212 #define N_LOFFSETS (-10)
213 #define N_FOFFSETS (-11)
214 #define N_LBUFFER (-12)
215 #define N_M_LIMIT (-13)
216 #define N_M_LIMIT_REC (-14)
217
218 static option_item optionlist[] = {
219 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
220 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
221 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
222 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
223 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
224 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
225 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
226 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
227 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
228 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
229 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
230 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
231 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
232 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
233 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
234 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
235 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
236 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
237 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
238 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
239 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
240 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
241 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
242 { OP_NUMBER, N_M_LIMIT,&match_limit, "match-limit=number", "set PCRE match limit option" },
243 { OP_NUMBER, N_M_LIMIT_REC,&match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
245 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
247 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
248 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
249 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
250 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
251 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
252 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
253 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
254 #ifdef JFRIEDL_DEBUG
255 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
256 #endif
257 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
258 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
259 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
260 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
261 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
262 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
263 { OP_NODATA, 0, NULL, NULL, NULL }
264 };
265
266 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
267 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
268 that the combination of -w and -x has the same effect as -x on its own, so we
269 can treat them as the same. */
270
271 static const char *prefix[] = {
272 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
273
274 static const char *suffix[] = {
275 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
276
277 /* UTF-8 tables - used only when the newline setting is "any". */
278
279 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
280
281 const char utf8_table4[] = {
282 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
283 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
284 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
285 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
286
287
288
289 /*************************************************
290 * OS-specific functions *
291 *************************************************/
292
293 /* These functions are defined so that they can be made system specific,
294 although at present the only ones are for Unix, Win32, and for "no support". */
295
296
297 /************* Directory scanning in Unix ***********/
298
299 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
300 #include <sys/types.h>
301 #include <sys/stat.h>
302 #include <dirent.h>
303
304 typedef DIR directory_type;
305
306 static int
307 isdirectory(char *filename)
308 {
309 struct stat statbuf;
310 if (stat(filename, &statbuf) < 0)
311 return 0; /* In the expectation that opening as a file will fail */
312 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
313 }
314
315 static directory_type *
316 opendirectory(char *filename)
317 {
318 return opendir(filename);
319 }
320
321 static char *
322 readdirectory(directory_type *dir)
323 {
324 for (;;)
325 {
326 struct dirent *dent = readdir(dir);
327 if (dent == NULL) return NULL;
328 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
329 return dent->d_name;
330 }
331 /* Control never reaches here */
332 }
333
334 static void
335 closedirectory(directory_type *dir)
336 {
337 closedir(dir);
338 }
339
340
341 /************* Test for regular file in Unix **********/
342
343 static int
344 isregfile(char *filename)
345 {
346 struct stat statbuf;
347 if (stat(filename, &statbuf) < 0)
348 return 1; /* In the expectation that opening as a file will fail */
349 return (statbuf.st_mode & S_IFMT) == S_IFREG;
350 }
351
352
353 /************* Test for a terminal in Unix **********/
354
355 static BOOL
356 is_stdout_tty(void)
357 {
358 return isatty(fileno(stdout));
359 }
360
361 static BOOL
362 is_file_tty(FILE *f)
363 {
364 return isatty(fileno(f));
365 }
366
367
368 /************* Directory scanning in Win32 ***********/
369
370 /* I (Philip Hazel) have no means of testing this code. It was contributed by
371 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
372 when it did not exist. David Byron added a patch that moved the #include of
373 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
374 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
375 undefined when it is indeed undefined. */
376
377 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
378
379 #ifndef STRICT
380 # define STRICT
381 #endif
382 #ifndef WIN32_LEAN_AND_MEAN
383 # define WIN32_LEAN_AND_MEAN
384 #endif
385
386 #include <windows.h>
387
388 #ifndef INVALID_FILE_ATTRIBUTES
389 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
390 #endif
391
392 typedef struct directory_type
393 {
394 HANDLE handle;
395 BOOL first;
396 WIN32_FIND_DATA data;
397 } directory_type;
398
399 int
400 isdirectory(char *filename)
401 {
402 DWORD attr = GetFileAttributes(filename);
403 if (attr == INVALID_FILE_ATTRIBUTES)
404 return 0;
405 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
406 }
407
408 directory_type *
409 opendirectory(char *filename)
410 {
411 size_t len;
412 char *pattern;
413 directory_type *dir;
414 DWORD err;
415 len = strlen(filename);
416 pattern = (char *) malloc(len + 3);
417 dir = (directory_type *) malloc(sizeof(*dir));
418 if ((pattern == NULL) || (dir == NULL))
419 {
420 fprintf(stderr, "pcregrep: malloc failed\n");
421 pcregrep_exit(2);
422 }
423 memcpy(pattern, filename, len);
424 memcpy(&(pattern[len]), "\\*", 3);
425 dir->handle = FindFirstFile(pattern, &(dir->data));
426 if (dir->handle != INVALID_HANDLE_VALUE)
427 {
428 free(pattern);
429 dir->first = TRUE;
430 return dir;
431 }
432 err = GetLastError();
433 free(pattern);
434 free(dir);
435 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
436 return NULL;
437 }
438
439 char *
440 readdirectory(directory_type *dir)
441 {
442 for (;;)
443 {
444 if (!dir->first)
445 {
446 if (!FindNextFile(dir->handle, &(dir->data)))
447 return NULL;
448 }
449 else
450 {
451 dir->first = FALSE;
452 }
453 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
454 return dir->data.cFileName;
455 }
456 #ifndef _MSC_VER
457 return NULL; /* Keep compiler happy; never executed */
458 #endif
459 }
460
461 void
462 closedirectory(directory_type *dir)
463 {
464 FindClose(dir->handle);
465 free(dir);
466 }
467
468
469 /************* Test for regular file in Win32 **********/
470
471 /* I don't know how to do this, or if it can be done; assume all paths are
472 regular if they are not directories. */
473
474 int isregfile(char *filename)
475 {
476 return !isdirectory(filename);
477 }
478
479
480 /************* Test for a terminal in Win32 **********/
481
482 /* I don't know how to do this; assume never */
483
484 static BOOL
485 is_stdout_tty(void)
486 {
487 return FALSE;
488 }
489
490 static BOOL
491 is_file_tty(FILE *f)
492 {
493 return FALSE;
494 }
495
496
497 /************* Directory scanning when we can't do it ***********/
498
499 /* The type is void, and apart from isdirectory(), the functions do nothing. */
500
501 #else
502
503 typedef void directory_type;
504
505 int isdirectory(char *filename) { return 0; }
506 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
507 char *readdirectory(directory_type *dir) { return (char*)0;}
508 void closedirectory(directory_type *dir) {}
509
510
511 /************* Test for regular when we can't do it **********/
512
513 /* Assume all files are regular. */
514
515 int isregfile(char *filename) { return 1; }
516
517
518 /************* Test for a terminal when we can't do it **********/
519
520 static BOOL
521 is_stdout_tty(void)
522 {
523 return FALSE;
524 }
525
526 static BOOL
527 is_file_tty(FILE *f)
528 {
529 return FALSE;
530 }
531
532 #endif
533
534
535
536 #ifndef HAVE_STRERROR
537 /*************************************************
538 * Provide strerror() for non-ANSI libraries *
539 *************************************************/
540
541 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
542 in their libraries, but can provide the same facility by this simple
543 alternative function. */
544
545 extern int sys_nerr;
546 extern char *sys_errlist[];
547
548 char *
549 strerror(int n)
550 {
551 if (n < 0 || n >= sys_nerr) return "unknown error number";
552 return sys_errlist[n];
553 }
554 #endif /* HAVE_STRERROR */
555
556
557
558 /*************************************************
559 * Exit from the program *
560 *************************************************/
561
562 /* If there has been a resource error, give a suitable message.
563
564 Argument: the return code
565 Returns: does not return
566 */
567
568 static void
569 pcregrep_exit(int rc)
570 {
571 if (resource_error)
572 {
573 fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
574 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
575 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
576 }
577
578 exit(rc);
579 }
580
581
582
583 /*************************************************
584 * Read one line of input *
585 *************************************************/
586
587 /* Normally, input is read using fread() into a large buffer, so many lines may
588 be read at once. However, doing this for tty input means that no output appears
589 until a lot of input has been typed. Instead, tty input is handled line by
590 line. We cannot use fgets() for this, because it does not stop at a binary
591 zero, and therefore there is no way of telling how many characters it has read,
592 because there may be binary zeros embedded in the data.
593
594 Arguments:
595 buffer the buffer to read into
596 length the maximum number of characters to read
597 f the file
598
599 Returns: the number of characters read, zero at end of file
600 */
601
602 static int
603 read_one_line(char *buffer, int length, FILE *f)
604 {
605 int c;
606 int yield = 0;
607 while ((c = fgetc(f)) != EOF)
608 {
609 buffer[yield++] = c;
610 if (c == '\n' || yield >= length) break;
611 }
612 return yield;
613 }
614
615
616
617 /*************************************************
618 * Find end of line *
619 *************************************************/
620
621 /* The length of the endline sequence that is found is set via lenptr. This may
622 be zero at the very end of the file if there is no line-ending sequence there.
623
624 Arguments:
625 p current position in line
626 endptr end of available data
627 lenptr where to put the length of the eol sequence
628
629 Returns: pointer to the last byte of the line
630 */
631
632 static char *
633 end_of_line(char *p, char *endptr, int *lenptr)
634 {
635 switch(endlinetype)
636 {
637 default: /* Just in case */
638 case EL_LF:
639 while (p < endptr && *p != '\n') p++;
640 if (p < endptr)
641 {
642 *lenptr = 1;
643 return p + 1;
644 }
645 *lenptr = 0;
646 return endptr;
647
648 case EL_CR:
649 while (p < endptr && *p != '\r') p++;
650 if (p < endptr)
651 {
652 *lenptr = 1;
653 return p + 1;
654 }
655 *lenptr = 0;
656 return endptr;
657
658 case EL_CRLF:
659 for (;;)
660 {
661 while (p < endptr && *p != '\r') p++;
662 if (++p >= endptr)
663 {
664 *lenptr = 0;
665 return endptr;
666 }
667 if (*p == '\n')
668 {
669 *lenptr = 2;
670 return p + 1;
671 }
672 }
673 break;
674
675 case EL_ANYCRLF:
676 while (p < endptr)
677 {
678 int extra = 0;
679 register int c = *((unsigned char *)p);
680
681 if (utf8 && c >= 0xc0)
682 {
683 int gcii, gcss;
684 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
685 gcss = 6*extra;
686 c = (c & utf8_table3[extra]) << gcss;
687 for (gcii = 1; gcii <= extra; gcii++)
688 {
689 gcss -= 6;
690 c |= (p[gcii] & 0x3f) << gcss;
691 }
692 }
693
694 p += 1 + extra;
695
696 switch (c)
697 {
698 case 0x0a: /* LF */
699 *lenptr = 1;
700 return p;
701
702 case 0x0d: /* CR */
703 if (p < endptr && *p == 0x0a)
704 {
705 *lenptr = 2;
706 p++;
707 }
708 else *lenptr = 1;
709 return p;
710
711 default:
712 break;
713 }
714 } /* End of loop for ANYCRLF case */
715
716 *lenptr = 0; /* Must have hit the end */
717 return endptr;
718
719 case EL_ANY:
720 while (p < endptr)
721 {
722 int extra = 0;
723 register int c = *((unsigned char *)p);
724
725 if (utf8 && c >= 0xc0)
726 {
727 int gcii, gcss;
728 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
729 gcss = 6*extra;
730 c = (c & utf8_table3[extra]) << gcss;
731 for (gcii = 1; gcii <= extra; gcii++)
732 {
733 gcss -= 6;
734 c |= (p[gcii] & 0x3f) << gcss;
735 }
736 }
737
738 p += 1 + extra;
739
740 switch (c)
741 {
742 case 0x0a: /* LF */
743 case 0x0b: /* VT */
744 case 0x0c: /* FF */
745 *lenptr = 1;
746 return p;
747
748 case 0x0d: /* CR */
749 if (p < endptr && *p == 0x0a)
750 {
751 *lenptr = 2;
752 p++;
753 }
754 else *lenptr = 1;
755 return p;
756
757 case 0x85: /* NEL */
758 *lenptr = utf8? 2 : 1;
759 return p;
760
761 case 0x2028: /* LS */
762 case 0x2029: /* PS */
763 *lenptr = 3;
764 return p;
765
766 default:
767 break;
768 }
769 } /* End of loop for ANY case */
770
771 *lenptr = 0; /* Must have hit the end */
772 return endptr;
773 } /* End of overall switch */
774 }
775
776
777
778 /*************************************************
779 * Find start of previous line *
780 *************************************************/
781
782 /* This is called when looking back for before lines to print.
783
784 Arguments:
785 p start of the subsequent line
786 startptr start of available data
787
788 Returns: pointer to the start of the previous line
789 */
790
791 static char *
792 previous_line(char *p, char *startptr)
793 {
794 switch(endlinetype)
795 {
796 default: /* Just in case */
797 case EL_LF:
798 p--;
799 while (p > startptr && p[-1] != '\n') p--;
800 return p;
801
802 case EL_CR:
803 p--;
804 while (p > startptr && p[-1] != '\n') p--;
805 return p;
806
807 case EL_CRLF:
808 for (;;)
809 {
810 p -= 2;
811 while (p > startptr && p[-1] != '\n') p--;
812 if (p <= startptr + 1 || p[-2] == '\r') return p;
813 }
814 return p; /* But control should never get here */
815
816 case EL_ANY:
817 case EL_ANYCRLF:
818 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
819 if (utf8) while ((*p & 0xc0) == 0x80) p--;
820
821 while (p > startptr)
822 {
823 register int c;
824 char *pp = p - 1;
825
826 if (utf8)
827 {
828 int extra = 0;
829 while ((*pp & 0xc0) == 0x80) pp--;
830 c = *((unsigned char *)pp);
831 if (c >= 0xc0)
832 {
833 int gcii, gcss;
834 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
835 gcss = 6*extra;
836 c = (c & utf8_table3[extra]) << gcss;
837 for (gcii = 1; gcii <= extra; gcii++)
838 {
839 gcss -= 6;
840 c |= (pp[gcii] & 0x3f) << gcss;
841 }
842 }
843 }
844 else c = *((unsigned char *)pp);
845
846 if (endlinetype == EL_ANYCRLF) switch (c)
847 {
848 case 0x0a: /* LF */
849 case 0x0d: /* CR */
850 return p;
851
852 default:
853 break;
854 }
855
856 else switch (c)
857 {
858 case 0x0a: /* LF */
859 case 0x0b: /* VT */
860 case 0x0c: /* FF */
861 case 0x0d: /* CR */
862 case 0x85: /* NEL */
863 case 0x2028: /* LS */
864 case 0x2029: /* PS */
865 return p;
866
867 default:
868 break;
869 }
870
871 p = pp; /* Back one character */
872 } /* End of loop for ANY case */
873
874 return startptr; /* Hit start of data */
875 } /* End of overall switch */
876 }
877
878
879
880
881
882 /*************************************************
883 * Print the previous "after" lines *
884 *************************************************/
885
886 /* This is called if we are about to lose said lines because of buffer filling,
887 and at the end of the file. The data in the line is written using fwrite() so
888 that a binary zero does not terminate it.
889
890 Arguments:
891 lastmatchnumber the number of the last matching line, plus one
892 lastmatchrestart where we restarted after the last match
893 endptr end of available data
894 printname filename for printing
895
896 Returns: nothing
897 */
898
899 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
900 char *endptr, char *printname)
901 {
902 if (after_context > 0 && lastmatchnumber > 0)
903 {
904 int count = 0;
905 while (lastmatchrestart < endptr && count++ < after_context)
906 {
907 int ellength;
908 char *pp = lastmatchrestart;
909 if (printname != NULL) fprintf(stdout, "%s-", printname);
910 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
911 pp = end_of_line(pp, endptr, &ellength);
912 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
913 lastmatchrestart = pp;
914 }
915 hyphenpending = TRUE;
916 }
917 }
918
919
920
921 /*************************************************
922 * Apply patterns to subject till one matches *
923 *************************************************/
924
925 /* This function is called to run through all patterns, looking for a match. It
926 is used multiple times for the same subject when colouring is enabled, in order
927 to find all possible matches.
928
929 Arguments:
930 matchptr the start of the subject
931 length the length of the subject to match
932 offsets the offets vector to fill in
933 mrc address of where to put the result of pcre_exec()
934
935 Returns: TRUE if there was a match
936 FALSE if there was no match
937 invert if there was a non-fatal error
938 */
939
940 static BOOL
941 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
942 {
943 int i;
944 size_t slen = length;
945 const char *msg = "this text:\n\n";
946 if (slen > 200)
947 {
948 slen = 200;
949 msg = "text that starts:\n\n";
950 }
951 for (i = 0; i < pattern_count; i++)
952 {
953 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
954 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
955 if (*mrc >= 0) return TRUE;
956 if (*mrc == PCRE_ERROR_NOMATCH) continue;
957 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
958 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
959 fprintf(stderr, "%s", msg);
960 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
961 fprintf(stderr, "\n\n");
962 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
963 resource_error = TRUE;
964 if (error_count++ > 20)
965 {
966 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
967 pcregrep_exit(2);
968 }
969 return invert; /* No more matching; don't show the line again */
970 }
971
972 return FALSE; /* No match, no errors */
973 }
974
975
976
977 /*************************************************
978 * Grep an individual file *
979 *************************************************/
980
981 /* This is called from grep_or_recurse() below. It uses a buffer that is three
982 times the value of MBUFTHIRD. The matching point is never allowed to stray into
983 the top third of the buffer, thus keeping more of the file available for
984 context printing or for multiline scanning. For large files, the pointer will
985 be in the middle third most of the time, so the bottom third is available for
986 "before" context printing.
987
988 Arguments:
989 handle the fopened FILE stream for a normal file
990 the gzFile pointer when reading is via libz
991 the BZFILE pointer when reading is via libbz2
992 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
993 printname the file name if it is to be printed for each match
994 or NULL if the file name is not to be printed
995 it cannot be NULL if filenames[_nomatch]_only is set
996
997 Returns: 0 if there was at least one match
998 1 otherwise (no matches)
999 2 if there is a read error on a .bz2 file
1000 */
1001
1002 static int
1003 pcregrep(void *handle, int frtype, char *printname)
1004 {
1005 int rc = 1;
1006 int linenumber = 1;
1007 int lastmatchnumber = 0;
1008 int count = 0;
1009 int filepos = 0;
1010 int offsets[OFFSET_SIZE];
1011 char *lastmatchrestart = NULL;
1012 char buffer[3*MBUFTHIRD];
1013 char *ptr = buffer;
1014 char *endptr;
1015 size_t bufflength;
1016 BOOL endhyphenpending = FALSE;
1017 BOOL input_line_buffered = line_buffered;
1018 FILE *in = NULL; /* Ensure initialized */
1019
1020 #ifdef SUPPORT_LIBZ
1021 gzFile ingz = NULL;
1022 #endif
1023
1024 #ifdef SUPPORT_LIBBZ2
1025 BZFILE *inbz2 = NULL;
1026 #endif
1027
1028
1029 /* Do the first read into the start of the buffer and set up the pointer to end
1030 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1031 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1032 fail. */
1033
1034 #ifdef SUPPORT_LIBZ
1035 if (frtype == FR_LIBZ)
1036 {
1037 ingz = (gzFile)handle;
1038 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1039 }
1040 else
1041 #endif
1042
1043 #ifdef SUPPORT_LIBBZ2
1044 if (frtype == FR_LIBBZ2)
1045 {
1046 inbz2 = (BZFILE *)handle;
1047 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1048 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1049 } /* without the cast it is unsigned. */
1050 else
1051 #endif
1052
1053 {
1054 in = (FILE *)handle;
1055 if (is_file_tty(in)) input_line_buffered = TRUE;
1056 bufflength = input_line_buffered?
1057 read_one_line(buffer, 3*MBUFTHIRD, in) :
1058 fread(buffer, 1, 3*MBUFTHIRD, in);
1059 }
1060
1061 endptr = buffer + bufflength;
1062
1063 /* Loop while the current pointer is not at the end of the file. For large
1064 files, endptr will be at the end of the buffer when we are in the middle of the
1065 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1066 way, the buffer is shifted left and re-filled. */
1067
1068 while (ptr < endptr)
1069 {
1070 int endlinelength;
1071 int mrc = 0;
1072 BOOL match;
1073 char *matchptr = ptr;
1074 char *t = ptr;
1075 size_t length, linelength;
1076
1077 /* At this point, ptr is at the start of a line. We need to find the length
1078 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1079 length remainder of the data in the buffer. Otherwise, it is the length of
1080 the next line, excluding the terminating newline. After matching, we always
1081 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1082 option is used for compiling, so that any match is constrained to be in the
1083 first line. */
1084
1085 t = end_of_line(t, endptr, &endlinelength);
1086 linelength = t - ptr - endlinelength;
1087 length = multiline? (size_t)(endptr - ptr) : linelength;
1088
1089 /* Extra processing for Jeffrey Friedl's debugging. */
1090
1091 #ifdef JFRIEDL_DEBUG
1092 if (jfriedl_XT || jfriedl_XR)
1093 {
1094 #include <sys/time.h>
1095 #include <time.h>
1096 struct timeval start_time, end_time;
1097 struct timezone dummy;
1098 int i;
1099
1100 if (jfriedl_XT)
1101 {
1102 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1103 const char *orig = ptr;
1104 ptr = malloc(newlen + 1);
1105 if (!ptr) {
1106 printf("out of memory");
1107 pcregrep_exit(2);
1108 }
1109 endptr = ptr;
1110 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1111 for (i = 0; i < jfriedl_XT; i++) {
1112 strncpy(endptr, orig, length);
1113 endptr += length;
1114 }
1115 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1116 length = newlen;
1117 }
1118
1119 if (gettimeofday(&start_time, &dummy) != 0)
1120 perror("bad gettimeofday");
1121
1122
1123 for (i = 0; i < jfriedl_XR; i++)
1124 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1125 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1126
1127 if (gettimeofday(&end_time, &dummy) != 0)
1128 perror("bad gettimeofday");
1129
1130 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1131 -
1132 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1133
1134 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1135 return 0;
1136 }
1137 #endif
1138
1139 /* We come back here after a match when the -o option (only_matching) is set,
1140 in order to find any further matches in the same line. */
1141
1142 ONLY_MATCHING_RESTART:
1143
1144 /* Run through all the patterns until one matches or there is an error other
1145 than NOMATCH. This code is in a subroutine so that it can be re-used for
1146 finding subsequent matches when colouring matched lines. */
1147
1148 match = match_patterns(matchptr, length, offsets, &mrc);
1149
1150 /* If it's a match or a not-match (as required), do what's wanted. */
1151
1152 if (match != invert)
1153 {
1154 BOOL hyphenprinted = FALSE;
1155
1156 /* We've failed if we want a file that doesn't have any matches. */
1157
1158 if (filenames == FN_NOMATCH_ONLY) return 1;
1159
1160 /* Just count if just counting is wanted. */
1161
1162 if (count_only) count++;
1163
1164 /* If all we want is a file name, there is no need to scan any more lines
1165 in the file. */
1166
1167 else if (filenames == FN_MATCH_ONLY)
1168 {
1169 fprintf(stdout, "%s\n", printname);
1170 return 0;
1171 }
1172
1173 /* Likewise, if all we want is a yes/no answer. */
1174
1175 else if (quiet) return 0;
1176
1177 /* The --only-matching option prints just the substring that matched, and
1178 the --file-offsets and --line-offsets options output offsets for the
1179 matching substring (they both force --only-matching). None of these options
1180 prints any context. Afterwards, adjust the start and length, and then jump
1181 back to look for further matches in the same line. If we are in invert
1182 mode, however, nothing is printed - this could be still useful because the
1183 return code is set. */
1184
1185 else if (only_matching)
1186 {
1187 if (!invert)
1188 {
1189 if (printname != NULL) fprintf(stdout, "%s:", printname);
1190 if (number) fprintf(stdout, "%d:", linenumber);
1191 if (line_offsets)
1192 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1193 offsets[1] - offsets[0]);
1194 else if (file_offsets)
1195 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1196 offsets[1] - offsets[0]);
1197 else
1198 {
1199 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1200 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1201 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1202 }
1203 fprintf(stdout, "\n");
1204 matchptr += offsets[1];
1205 length -= offsets[1];
1206 match = FALSE;
1207 if (line_buffered) fflush(stdout);
1208 rc = 0; /* Had some success */
1209 goto ONLY_MATCHING_RESTART;
1210 }
1211 }
1212
1213 /* This is the default case when none of the above options is set. We print
1214 the matching lines(s), possibly preceded and/or followed by other lines of
1215 context. */
1216
1217 else
1218 {
1219 /* See if there is a requirement to print some "after" lines from a
1220 previous match. We never print any overlaps. */
1221
1222 if (after_context > 0 && lastmatchnumber > 0)
1223 {
1224 int ellength;
1225 int linecount = 0;
1226 char *p = lastmatchrestart;
1227
1228 while (p < ptr && linecount < after_context)
1229 {
1230 p = end_of_line(p, ptr, &ellength);
1231 linecount++;
1232 }
1233
1234 /* It is important to advance lastmatchrestart during this printing so
1235 that it interacts correctly with any "before" printing below. Print
1236 each line's data using fwrite() in case there are binary zeroes. */
1237
1238 while (lastmatchrestart < p)
1239 {
1240 char *pp = lastmatchrestart;
1241 if (printname != NULL) fprintf(stdout, "%s-", printname);
1242 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1243 pp = end_of_line(pp, endptr, &ellength);
1244 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1245 lastmatchrestart = pp;
1246 }
1247 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1248 }
1249
1250 /* If there were non-contiguous lines printed above, insert hyphens. */
1251
1252 if (hyphenpending)
1253 {
1254 fprintf(stdout, "--\n");
1255 hyphenpending = FALSE;
1256 hyphenprinted = TRUE;
1257 }
1258
1259 /* See if there is a requirement to print some "before" lines for this
1260 match. Again, don't print overlaps. */
1261
1262 if (before_context > 0)
1263 {
1264 int linecount = 0;
1265 char *p = ptr;
1266
1267 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1268 linecount < before_context)
1269 {
1270 linecount++;
1271 p = previous_line(p, buffer);
1272 }
1273
1274 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1275 fprintf(stdout, "--\n");
1276
1277 while (p < ptr)
1278 {
1279 int ellength;
1280 char *pp = p;
1281 if (printname != NULL) fprintf(stdout, "%s-", printname);
1282 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1283 pp = end_of_line(pp, endptr, &ellength);
1284 FWRITE(p, 1, pp - p, stdout);
1285 p = pp;
1286 }
1287 }
1288
1289 /* Now print the matching line(s); ensure we set hyphenpending at the end
1290 of the file if any context lines are being output. */
1291
1292 if (after_context > 0 || before_context > 0)
1293 endhyphenpending = TRUE;
1294
1295 if (printname != NULL) fprintf(stdout, "%s:", printname);
1296 if (number) fprintf(stdout, "%d:", linenumber);
1297
1298 /* In multiline mode, we want to print to the end of the line in which
1299 the end of the matched string is found, so we adjust linelength and the
1300 line number appropriately, but only when there actually was a match
1301 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1302 the match will always be before the first newline sequence. */
1303
1304 if (multiline)
1305 {
1306 int ellength;
1307 char *endmatch = ptr;
1308 if (!invert)
1309 {
1310 endmatch += offsets[1];
1311 t = ptr;
1312 while (t < endmatch)
1313 {
1314 t = end_of_line(t, endptr, &ellength);
1315 if (t <= endmatch) linenumber++; else break;
1316 }
1317 }
1318 endmatch = end_of_line(endmatch, endptr, &ellength);
1319 linelength = endmatch - ptr - ellength;
1320 }
1321
1322 /*** NOTE: Use only fwrite() to output the data line, so that binary
1323 zeroes are treated as just another data character. */
1324
1325 /* This extra option, for Jeffrey Friedl's debugging requirements,
1326 replaces the matched string, or a specific captured string if it exists,
1327 with X. When this happens, colouring is ignored. */
1328
1329 #ifdef JFRIEDL_DEBUG
1330 if (S_arg >= 0 && S_arg < mrc)
1331 {
1332 int first = S_arg * 2;
1333 int last = first + 1;
1334 FWRITE(ptr, 1, offsets[first], stdout);
1335 fprintf(stdout, "X");
1336 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1337 }
1338 else
1339 #endif
1340
1341 /* We have to split the line(s) up if colouring, and search for further
1342 matches. */
1343
1344 if (do_colour)
1345 {
1346 int last_offset = 0;
1347 FWRITE(ptr, 1, offsets[0], stdout);
1348 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1349 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1350 fprintf(stdout, "%c[00m", 0x1b);
1351 for (;;)
1352 {
1353 last_offset += offsets[1];
1354 matchptr += offsets[1];
1355 length -= offsets[1];
1356 if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1357 FWRITE(matchptr, 1, offsets[0], stdout);
1358 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1359 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1360 fprintf(stdout, "%c[00m", 0x1b);
1361 }
1362 FWRITE(ptr + last_offset, 1,
1363 (linelength + endlinelength) - last_offset, stdout);
1364 }
1365
1366 /* Not colouring; no need to search for further matches */
1367
1368 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1369 }
1370
1371 /* End of doing what has to be done for a match. If --line-buffered was
1372 given, flush the output. */
1373
1374 if (line_buffered) fflush(stdout);
1375 rc = 0; /* Had some success */
1376
1377 /* Remember where the last match happened for after_context. We remember
1378 where we are about to restart, and that line's number. */
1379
1380 lastmatchrestart = ptr + linelength + endlinelength;
1381 lastmatchnumber = linenumber + 1;
1382 }
1383
1384 /* For a match in multiline inverted mode (which of course did not cause
1385 anything to be printed), we have to move on to the end of the match before
1386 proceeding. */
1387
1388 if (multiline && invert && match)
1389 {
1390 int ellength;
1391 char *endmatch = ptr + offsets[1];
1392 t = ptr;
1393 while (t < endmatch)
1394 {
1395 t = end_of_line(t, endptr, &ellength);
1396 if (t <= endmatch) linenumber++; else break;
1397 }
1398 endmatch = end_of_line(endmatch, endptr, &ellength);
1399 linelength = endmatch - ptr - ellength;
1400 }
1401
1402 /* Advance to after the newline and increment the line number. The file
1403 offset to the current line is maintained in filepos. */
1404
1405 ptr += linelength + endlinelength;
1406 filepos += (int)(linelength + endlinelength);
1407 linenumber++;
1408
1409 /* If input is line buffered, and the buffer is not yet full, read another
1410 line and add it into the buffer. */
1411
1412 if (input_line_buffered && bufflength < sizeof(buffer))
1413 {
1414 int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1415 bufflength += add;
1416 endptr += add;
1417 }
1418
1419 /* If we haven't yet reached the end of the file (the buffer is full), and
1420 the current point is in the top 1/3 of the buffer, slide the buffer down by
1421 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1422 about to be lost, print them. */
1423
1424 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1425 {
1426 if (after_context > 0 &&
1427 lastmatchnumber > 0 &&
1428 lastmatchrestart < buffer + MBUFTHIRD)
1429 {
1430 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1431 lastmatchnumber = 0;
1432 }
1433
1434 /* Now do the shuffle */
1435
1436 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1437 ptr -= MBUFTHIRD;
1438
1439 #ifdef SUPPORT_LIBZ
1440 if (frtype == FR_LIBZ)
1441 bufflength = 2*MBUFTHIRD +
1442 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1443 else
1444 #endif
1445
1446 #ifdef SUPPORT_LIBBZ2
1447 if (frtype == FR_LIBBZ2)
1448 bufflength = 2*MBUFTHIRD +
1449 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1450 else
1451 #endif
1452
1453 bufflength = 2*MBUFTHIRD +
1454 (input_line_buffered?
1455 read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1456 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1457 endptr = buffer + bufflength;
1458
1459 /* Adjust any last match point */
1460
1461 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1462 }
1463 } /* Loop through the whole file */
1464
1465 /* End of file; print final "after" lines if wanted; do_after_lines sets
1466 hyphenpending if it prints something. */
1467
1468 if (!only_matching && !count_only)
1469 {
1470 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1471 hyphenpending |= endhyphenpending;
1472 }
1473
1474 /* Print the file name if we are looking for those without matches and there
1475 were none. If we found a match, we won't have got this far. */
1476
1477 if (filenames == FN_NOMATCH_ONLY)
1478 {
1479 fprintf(stdout, "%s\n", printname);
1480 return 0;
1481 }
1482
1483 /* Print the match count if wanted */
1484
1485 if (count_only)
1486 {
1487 if (count > 0 || !omit_zero_count)
1488 {
1489 if (printname != NULL && filenames != FN_NONE)
1490 fprintf(stdout, "%s:", printname);
1491 fprintf(stdout, "%d\n", count);
1492 }
1493 }
1494
1495 return rc;
1496 }
1497
1498
1499
1500 /*************************************************
1501 * Grep a file or recurse into a directory *
1502 *************************************************/
1503
1504 /* Given a path name, if it's a directory, scan all the files if we are
1505 recursing; if it's a file, grep it.
1506
1507 Arguments:
1508 pathname the path to investigate
1509 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1510 only_one_at_top TRUE if the path is the only one at toplevel
1511
1512 Returns: 0 if there was at least one match
1513 1 if there were no matches
1514 2 there was some kind of error
1515
1516 However, file opening failures are suppressed if "silent" is set.
1517 */
1518
1519 static int
1520 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1521 {
1522 int rc = 1;
1523 int sep;
1524 int frtype;
1525 int pathlen;
1526 void *handle;
1527 FILE *in = NULL; /* Ensure initialized */
1528
1529 #ifdef SUPPORT_LIBZ
1530 gzFile ingz = NULL;
1531 #endif
1532
1533 #ifdef SUPPORT_LIBBZ2
1534 BZFILE *inbz2 = NULL;
1535 #endif
1536
1537 /* If the file name is "-" we scan stdin */
1538
1539 if (strcmp(pathname, "-") == 0)
1540 {
1541 return pcregrep(stdin, FR_PLAIN,
1542 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1543 stdin_name : NULL);
1544 }
1545
1546 /* If the file is a directory, skip if skipping or if we are recursing, scan
1547 each file and directory within it, subject to any include or exclude patterns
1548 that were set. The scanning code is localized so it can be made
1549 system-specific. */
1550
1551 if ((sep = isdirectory(pathname)) != 0)
1552 {
1553 if (dee_action == dee_SKIP) return 1;
1554 if (dee_action == dee_RECURSE)
1555 {
1556 char buffer[1024];
1557 char *nextfile;
1558 directory_type *dir = opendirectory(pathname);
1559
1560 if (dir == NULL)
1561 {
1562 if (!silent)
1563 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1564 strerror(errno));
1565 return 2;
1566 }
1567
1568 while ((nextfile = readdirectory(dir)) != NULL)
1569 {
1570 int frc, nflen;
1571 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1572 nflen = (int)(strlen(nextfile));
1573
1574 if (isdirectory(buffer))
1575 {
1576 if (exclude_dir_compiled != NULL &&
1577 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1578 continue;
1579
1580 if (include_dir_compiled != NULL &&
1581 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1582 continue;
1583 }
1584 else
1585 {
1586 if (exclude_compiled != NULL &&
1587 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1588 continue;
1589
1590 if (include_compiled != NULL &&
1591 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1592 continue;
1593 }
1594
1595 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1596 if (frc > 1) rc = frc;
1597 else if (frc == 0 && rc == 1) rc = 0;
1598 }
1599
1600 closedirectory(dir);
1601 return rc;
1602 }
1603 }
1604
1605 /* If the file is not a directory and not a regular file, skip it if that's
1606 been requested. */
1607
1608 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1609
1610 /* Control reaches here if we have a regular file, or if we have a directory
1611 and recursion or skipping was not requested, or if we have anything else and
1612 skipping was not requested. The scan proceeds. If this is the first and only
1613 argument at top level, we don't show the file name, unless we are only showing
1614 the file name, or the filename was forced (-H). */
1615
1616 pathlen = (int)(strlen(pathname));
1617
1618 /* Open using zlib if it is supported and the file name ends with .gz. */
1619
1620 #ifdef SUPPORT_LIBZ
1621 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1622 {
1623 ingz = gzopen(pathname, "rb");
1624 if (ingz == NULL)
1625 {
1626 if (!silent)
1627 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1628 strerror(errno));
1629 return 2;
1630 }
1631 handle = (void *)ingz;
1632 frtype = FR_LIBZ;
1633 }
1634 else
1635 #endif
1636
1637 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1638
1639 #ifdef SUPPORT_LIBBZ2
1640 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1641 {
1642 inbz2 = BZ2_bzopen(pathname, "rb");
1643 handle = (void *)inbz2;
1644 frtype = FR_LIBBZ2;
1645 }
1646 else
1647 #endif
1648
1649 /* Otherwise use plain fopen(). The label is so that we can come back here if
1650 an attempt to read a .bz2 file indicates that it really is a plain file. */
1651
1652 #ifdef SUPPORT_LIBBZ2
1653 PLAIN_FILE:
1654 #endif
1655 {
1656 in = fopen(pathname, "rb");
1657 handle = (void *)in;
1658 frtype = FR_PLAIN;
1659 }
1660
1661 /* All the opening methods return errno when they fail. */
1662
1663 if (handle == NULL)
1664 {
1665 if (!silent)
1666 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1667 strerror(errno));
1668 return 2;
1669 }
1670
1671 /* Now grep the file */
1672
1673 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1674 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1675
1676 /* Close in an appropriate manner. */
1677
1678 #ifdef SUPPORT_LIBZ
1679 if (frtype == FR_LIBZ)
1680 gzclose(ingz);
1681 else
1682 #endif
1683
1684 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1685 read failed. If the error indicates that the file isn't in fact bzipped, try
1686 again as a normal file. */
1687
1688 #ifdef SUPPORT_LIBBZ2
1689 if (frtype == FR_LIBBZ2)
1690 {
1691 if (rc == 2)
1692 {
1693 int errnum;
1694 const char *err = BZ2_bzerror(inbz2, &errnum);
1695 if (errnum == BZ_DATA_ERROR_MAGIC)
1696 {
1697 BZ2_bzclose(inbz2);
1698 goto PLAIN_FILE;
1699 }
1700 else if (!silent)
1701 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1702 pathname, err);
1703 }
1704 BZ2_bzclose(inbz2);
1705 }
1706 else
1707 #endif
1708
1709 /* Normal file close */
1710
1711 fclose(in);
1712
1713 /* Pass back the yield from pcregrep(). */
1714
1715 return rc;
1716 }
1717
1718
1719
1720
1721 /*************************************************
1722 * Usage function *
1723 *************************************************/
1724
1725 static int
1726 usage(int rc)
1727 {
1728 option_item *op;
1729 fprintf(stderr, "Usage: pcregrep [-");
1730 for (op = optionlist; op->one_char != 0; op++)
1731 {
1732 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1733 }
1734 fprintf(stderr, "] [long options] [pattern] [files]\n");
1735 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1736 "options.\n");
1737 return rc;
1738 }
1739
1740
1741
1742
1743 /*************************************************
1744 * Help function *
1745 *************************************************/
1746
1747 static void
1748 help(void)
1749 {
1750 option_item *op;
1751
1752 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1753 printf("Search for PATTERN in each FILE or standard input.\n");
1754 printf("PATTERN must be present if neither -e nor -f is used.\n");
1755 printf("\"-\" can be used as a file name to mean STDIN.\n");
1756
1757 #ifdef SUPPORT_LIBZ
1758 printf("Files whose names end in .gz are read using zlib.\n");
1759 #endif
1760
1761 #ifdef SUPPORT_LIBBZ2
1762 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1763 #endif
1764
1765 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1766 printf("Other files and the standard input are read as plain files.\n\n");
1767 #else
1768 printf("All files are read as plain files, without any interpretation.\n\n");
1769 #endif
1770
1771 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1772 printf("Options:\n");
1773
1774 for (op = optionlist; op->one_char != 0; op++)
1775 {
1776 int n;
1777 char s[4];
1778 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1779 n = 30 - printf(" %s --%s", s, op->long_name);
1780 if (n < 1) n = 1;
1781 printf("%.*s%s\n", n, " ", op->help_text);
1782 }
1783
1784 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1785 printf("trailing white space is removed and blank lines are ignored.\n");
1786 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1787
1788 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1789 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1790 }
1791
1792
1793
1794
1795 /*************************************************
1796 * Handle a single-letter, no data option *
1797 *************************************************/
1798
1799 static int
1800 handle_option(int letter, int options)
1801 {
1802 switch(letter)
1803 {
1804 case N_FOFFSETS: file_offsets = TRUE; break;
1805 case N_HELP: help(); pcregrep_exit(0);
1806 case N_LOFFSETS: line_offsets = number = TRUE; break;
1807 case N_LBUFFER: line_buffered = TRUE; break;
1808 case 'c': count_only = TRUE; break;
1809 case 'F': process_options |= PO_FIXED_STRINGS; break;
1810 case 'H': filenames = FN_FORCE; break;
1811 case 'h': filenames = FN_NONE; break;
1812 case 'i': options |= PCRE_CASELESS; break;
1813 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1814 case 'L': filenames = FN_NOMATCH_ONLY; break;
1815 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1816 case 'n': number = TRUE; break;
1817 case 'o': only_matching = TRUE; break;
1818 case 'q': quiet = TRUE; break;
1819 case 'r': dee_action = dee_RECURSE; break;
1820 case 's': silent = TRUE; break;
1821 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1822 case 'v': invert = TRUE; break;
1823 case 'w': process_options |= PO_WORD_MATCH; break;
1824 case 'x': process_options |= PO_LINE_MATCH; break;
1825
1826 case 'V':
1827 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1828 pcregrep_exit(0);
1829 break;
1830
1831 default:
1832 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1833 pcregrep_exit(usage(2));
1834 }
1835
1836 return options;
1837 }
1838
1839
1840
1841
1842 /*************************************************
1843 * Construct printed ordinal *
1844 *************************************************/
1845
1846 /* This turns a number into "1st", "3rd", etc. */
1847
1848 static char *
1849 ordin(int n)
1850 {
1851 static char buffer[8];
1852 char *p = buffer;
1853 sprintf(p, "%d", n);
1854 while (*p != 0) p++;
1855 switch (n%10)
1856 {
1857 case 1: strcpy(p, "st"); break;
1858 case 2: strcpy(p, "nd"); break;
1859 case 3: strcpy(p, "rd"); break;
1860 default: strcpy(p, "th"); break;
1861 }
1862 return buffer;
1863 }
1864
1865
1866
1867 /*************************************************
1868 * Compile a single pattern *
1869 *************************************************/
1870
1871 /* When the -F option has been used, this is called for each substring.
1872 Otherwise it's called for each supplied pattern.
1873
1874 Arguments:
1875 pattern the pattern string
1876 options the PCRE options
1877 filename the file name, or NULL for a command-line pattern
1878 count 0 if this is the only command line pattern, or
1879 number of the command line pattern, or
1880 linenumber for a pattern from a file
1881
1882 Returns: TRUE on success, FALSE after an error
1883 */
1884
1885 static BOOL
1886 compile_single_pattern(char *pattern, int options, char *filename, int count)
1887 {
1888 char buffer[MBUFTHIRD + 16];
1889 const char *error;
1890 int errptr;
1891
1892 if (pattern_count >= MAX_PATTERN_COUNT)
1893 {
1894 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1895 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1896 return FALSE;
1897 }
1898
1899 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1900 suffix[process_options]);
1901 pattern_list[pattern_count] =
1902 pcre_compile(buffer, options, &error, &errptr, pcretables);
1903 if (pattern_list[pattern_count] != NULL)
1904 {
1905 pattern_count++;
1906 return TRUE;
1907 }
1908
1909 /* Handle compile errors */
1910
1911 errptr -= (int)strlen(prefix[process_options]);
1912 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1913
1914 if (filename == NULL)
1915 {
1916 if (count == 0)
1917 fprintf(stderr, "pcregrep: Error in command-line regex "
1918 "at offset %d: %s\n", errptr, error);
1919 else
1920 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1921 "at offset %d: %s\n", ordin(count), errptr, error);
1922 }
1923 else
1924 {
1925 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1926 "at offset %d: %s\n", count, filename, errptr, error);
1927 }
1928
1929 return FALSE;
1930 }
1931
1932
1933
1934 /*************************************************
1935 * Compile one supplied pattern *
1936 *************************************************/
1937
1938 /* When the -F option has been used, each string may be a list of strings,
1939 separated by line breaks. They will be matched literally.
1940
1941 Arguments:
1942 pattern the pattern string
1943 options the PCRE options
1944 filename the file name, or NULL for a command-line pattern
1945 count 0 if this is the only command line pattern, or
1946 number of the command line pattern, or
1947 linenumber for a pattern from a file
1948
1949 Returns: TRUE on success, FALSE after an error
1950 */
1951
1952 static BOOL
1953 compile_pattern(char *pattern, int options, char *filename, int count)
1954 {
1955 if ((process_options & PO_FIXED_STRINGS) != 0)
1956 {
1957 char *eop = pattern + strlen(pattern);
1958 char buffer[MBUFTHIRD];
1959 for(;;)
1960 {
1961 int ellength;
1962 char *p = end_of_line(pattern, eop, &ellength);
1963 if (ellength == 0)
1964 return compile_single_pattern(pattern, options, filename, count);
1965 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1966 pattern = p;
1967 if (!compile_single_pattern(buffer, options, filename, count))
1968 return FALSE;
1969 }
1970 }
1971 else return compile_single_pattern(pattern, options, filename, count);
1972 }
1973
1974
1975
1976 /*************************************************
1977 * Main program *
1978 *************************************************/
1979
1980 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1981
1982 int
1983 main(int argc, char **argv)
1984 {
1985 int i, j;
1986 int rc = 1;
1987 int pcre_options = 0;
1988 int cmd_pattern_count = 0;
1989 int hint_count = 0;
1990 int errptr;
1991 BOOL only_one_at_top;
1992 char *patterns[MAX_PATTERN_COUNT];
1993 const char *locale_from = "--locale";
1994 const char *error;
1995
1996 /* Set the default line ending value from the default in the PCRE library;
1997 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1998 Note that the return values from pcre_config(), though derived from the ASCII
1999 codes, are the same in EBCDIC environments, so we must use the actual values
2000 rather than escapes such as as '\r'. */
2001
2002 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2003 switch(i)
2004 {
2005 default: newline = (char *)"lf"; break;
2006 case 13: newline = (char *)"cr"; break;
2007 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2008 case -1: newline = (char *)"any"; break;
2009 case -2: newline = (char *)"anycrlf"; break;
2010 }
2011
2012 /* Process the options */
2013
2014 for (i = 1; i < argc; i++)
2015 {
2016 option_item *op = NULL;
2017 char *option_data = (char *)""; /* default to keep compiler happy */
2018 BOOL longop;
2019 BOOL longopwasequals = FALSE;
2020
2021 if (argv[i][0] != '-') break;
2022
2023 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2024 but only if we have previously had -e or -f to define the patterns. */
2025
2026 if (argv[i][1] == 0)
2027 {
2028 if (pattern_filename != NULL || pattern_count > 0) break;
2029 else pcregrep_exit(usage(2));
2030 }
2031
2032 /* Handle a long name option, or -- to terminate the options */
2033
2034 if (argv[i][1] == '-')
2035 {
2036 char *arg = argv[i] + 2;
2037 char *argequals = strchr(arg, '=');
2038
2039 if (*arg == 0) /* -- terminates options */
2040 {
2041 i++;
2042 break; /* out of the options-handling loop */
2043 }
2044
2045 longop = TRUE;
2046
2047 /* Some long options have data that follows after =, for example file=name.
2048 Some options have variations in the long name spelling: specifically, we
2049 allow "regexp" because GNU grep allows it, though I personally go along
2050 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2051 These options are entered in the table as "regex(p)". Options can be in
2052 both these categories. */
2053
2054 for (op = optionlist; op->one_char != 0; op++)
2055 {
2056 char *opbra = strchr(op->long_name, '(');
2057 char *equals = strchr(op->long_name, '=');
2058
2059 /* Handle options with only one spelling of the name */
2060
2061 if (opbra == NULL) /* Does not contain '(' */
2062 {
2063 if (equals == NULL) /* Not thing=data case */
2064 {
2065 if (strcmp(arg, op->long_name) == 0) break;
2066 }
2067 else /* Special case xxx=data */
2068 {
2069 int oplen = (int)(equals - op->long_name);
2070 int arglen = (argequals == NULL)?
2071 (int)strlen(arg) : (int)(argequals - arg);
2072 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2073 {
2074 option_data = arg + arglen;
2075 if (*option_data == '=')
2076 {
2077 option_data++;
2078 longopwasequals = TRUE;
2079 }
2080 break;
2081 }
2082 }
2083 }
2084
2085 /* Handle options with an alternate spelling of the name */
2086
2087 else
2088 {
2089 char buff1[24];
2090 char buff2[24];
2091
2092 int baselen = (int)(opbra - op->long_name);
2093 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2094 int arglen = (argequals == NULL || equals == NULL)?
2095 (int)strlen(arg) : (int)(argequals - arg);
2096
2097 sprintf(buff1, "%.*s", baselen, op->long_name);
2098 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2099
2100 if (strncmp(arg, buff1, arglen) == 0 ||
2101 strncmp(arg, buff2, arglen) == 0)
2102 {
2103 if (equals != NULL && argequals != NULL)
2104 {
2105 option_data = argequals;
2106 if (*option_data == '=')
2107 {
2108 option_data++;
2109 longopwasequals = TRUE;
2110 }
2111 }
2112 break;
2113 }
2114 }
2115 }
2116
2117 if (op->one_char == 0)
2118 {
2119 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2120 pcregrep_exit(usage(2));
2121 }
2122 }
2123
2124 /* Jeffrey Friedl's debugging harness uses these additional options which
2125 are not in the right form for putting in the option table because they use
2126 only one hyphen, yet are more than one character long. By putting them
2127 separately here, they will not get displayed as part of the help() output,
2128 but I don't think Jeffrey will care about that. */
2129
2130 #ifdef JFRIEDL_DEBUG
2131 else if (strcmp(argv[i], "-pre") == 0) {
2132 jfriedl_prefix = argv[++i];
2133 continue;
2134 } else if (strcmp(argv[i], "-post") == 0) {
2135 jfriedl_postfix = argv[++i];
2136 continue;
2137 } else if (strcmp(argv[i], "-XT") == 0) {
2138 sscanf(argv[++i], "%d", &jfriedl_XT);
2139 continue;
2140 } else if (strcmp(argv[i], "-XR") == 0) {
2141 sscanf(argv[++i], "%d", &jfriedl_XR);
2142 continue;
2143 }
2144 #endif
2145
2146
2147 /* One-char options; many that have no data may be in a single argument; we
2148 continue till we hit the last one or one that needs data. */
2149
2150 else
2151 {
2152 char *s = argv[i] + 1;
2153 longop = FALSE;
2154 while (*s != 0)
2155 {
2156 for (op = optionlist; op->one_char != 0; op++)
2157 { if (*s == op->one_char) break; }
2158 if (op->one_char == 0)
2159 {
2160 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2161 *s, argv[i]);
2162 pcregrep_exit(usage(2));
2163 }
2164 if (op->type != OP_NODATA || s[1] == 0)
2165 {
2166 option_data = s+1;
2167 break;
2168 }
2169 pcre_options = handle_option(*s++, pcre_options);
2170 }
2171 }
2172
2173 /* At this point we should have op pointing to a matched option. If the type
2174 is NO_DATA, it means that there is no data, and the option might set
2175 something in the PCRE options. */
2176
2177 if (op->type == OP_NODATA)
2178 {
2179 pcre_options = handle_option(op->one_char, pcre_options);
2180 continue;
2181 }
2182
2183 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2184 either has a value or defaults to something. It cannot have data in a
2185 separate item. At the moment, the only such options are "colo(u)r" and
2186 Jeffrey Friedl's special -S debugging option. */
2187
2188 if (*option_data == 0 &&
2189 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2190 {
2191 switch (op->one_char)
2192 {
2193 case N_COLOUR:
2194 colour_option = (char *)"auto";
2195 break;
2196 #ifdef JFRIEDL_DEBUG
2197 case 'S':
2198 S_arg = 0;
2199 break;
2200 #endif
2201 }
2202 continue;
2203 }
2204
2205 /* Otherwise, find the data string for the option. */
2206
2207 if (*option_data == 0)
2208 {
2209 if (i >= argc - 1 || longopwasequals)
2210 {
2211 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2212 pcregrep_exit(usage(2));
2213 }
2214 option_data = argv[++i];
2215 }
2216
2217 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2218 multiple times to create a list of patterns. */
2219
2220 if (op->type == OP_PATLIST)
2221 {
2222 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2223 {
2224 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2225 MAX_PATTERN_COUNT);
2226 return 2;
2227 }
2228 patterns[cmd_pattern_count++] = option_data;
2229 }
2230
2231 /* Otherwise, deal with single string or numeric data values. */
2232
2233 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2234 {
2235 *((char **)op->dataptr) = option_data;
2236 }
2237
2238 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2239 only for unpicking arguments, so just keep it simple. */
2240
2241 else
2242 {
2243 unsigned long int n = 0;
2244 char *endptr = option_data;
2245 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2246 while (isdigit((unsigned char)(*endptr)))
2247 n = n * 10 + (int)(*endptr++ - '0');
2248 if (*endptr != 0)
2249 {
2250 if (longop)
2251 {
2252 char *equals = strchr(op->long_name, '=');
2253 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2254 (int)(equals - op->long_name);
2255 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2256 option_data, nlen, op->long_name);
2257 }
2258 else
2259 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2260 option_data, op->one_char);
2261 pcregrep_exit(usage(2));
2262 }
2263 *((int *)op->dataptr) = n;
2264 }
2265 }
2266
2267 /* Options have been decoded. If -C was used, its value is used as a default
2268 for -A and -B. */
2269
2270 if (both_context > 0)
2271 {
2272 if (after_context == 0) after_context = both_context;
2273 if (before_context == 0) before_context = both_context;
2274 }
2275
2276 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2277 However, the latter two set the only_matching flag. */
2278
2279 if ((only_matching && (file_offsets || line_offsets)) ||
2280 (file_offsets && line_offsets))
2281 {
2282 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2283 "and/or --line-offsets\n");
2284 pcregrep_exit(usage(2));
2285 }
2286
2287 if (file_offsets || line_offsets) only_matching = TRUE;
2288
2289 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2290 LC_ALL environment variable is set, and if so, use it. */
2291
2292 if (locale == NULL)
2293 {
2294 locale = getenv("LC_ALL");
2295 locale_from = "LCC_ALL";
2296 }
2297
2298 if (locale == NULL)
2299 {
2300 locale = getenv("LC_CTYPE");
2301 locale_from = "LC_CTYPE";
2302 }
2303
2304 /* If a locale has been provided, set it, and generate the tables the PCRE
2305 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2306
2307 if (locale != NULL)
2308 {
2309 if (setlocale(LC_CTYPE, locale) == NULL)
2310 {
2311 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2312 locale, locale_from);
2313 return 2;
2314 }
2315 pcretables = pcre_maketables();
2316 }
2317
2318 /* Sort out colouring */
2319
2320 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2321 {
2322 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2323 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2324 else
2325 {
2326 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2327 colour_option);
2328 return 2;
2329 }
2330 if (do_colour)
2331 {
2332 char *cs = getenv("PCREGREP_COLOUR");
2333 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2334 if (cs != NULL) colour_string = cs;
2335 }
2336 }
2337
2338 /* Interpret the newline type; the default settings are Unix-like. */
2339
2340 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2341 {
2342 pcre_options |= PCRE_NEWLINE_CR;
2343 endlinetype = EL_CR;
2344 }
2345 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2346 {
2347 pcre_options |= PCRE_NEWLINE_LF;
2348 endlinetype = EL_LF;
2349 }
2350 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2351 {
2352 pcre_options |= PCRE_NEWLINE_CRLF;
2353 endlinetype = EL_CRLF;
2354 }
2355 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2356 {
2357 pcre_options |= PCRE_NEWLINE_ANY;
2358 endlinetype = EL_ANY;
2359 }
2360 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2361 {
2362 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2363 endlinetype = EL_ANYCRLF;
2364 }
2365 else
2366 {
2367 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2368 return 2;
2369 }
2370
2371 /* Interpret the text values for -d and -D */
2372
2373 if (dee_option != NULL)
2374 {
2375 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2376 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2377 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2378 else
2379 {
2380 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2381 return 2;
2382 }
2383 }
2384
2385 if (DEE_option != NULL)
2386 {
2387 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2388 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2389 else
2390 {
2391 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2392 return 2;
2393 }
2394 }
2395
2396 /* Check the values for Jeffrey Friedl's debugging options. */
2397
2398 #ifdef JFRIEDL_DEBUG
2399 if (S_arg > 9)
2400 {
2401 fprintf(stderr, "pcregrep: bad value for -S option\n");
2402 return 2;
2403 }
2404 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2405 {
2406 if (jfriedl_XT == 0) jfriedl_XT = 1;
2407 if (jfriedl_XR == 0) jfriedl_XR = 1;
2408 }
2409 #endif
2410
2411 /* Get memory to store the pattern and hints lists. */
2412
2413 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2414 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2415
2416 if (pattern_list == NULL || hints_list == NULL)
2417 {
2418 fprintf(stderr, "pcregrep: malloc failed\n");
2419 goto EXIT2;
2420 }
2421
2422 /* If no patterns were provided by -e, and there is no file provided by -f,
2423 the first argument is the one and only pattern, and it must exist. */
2424
2425 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2426 {
2427 if (i >= argc) return usage(2);
2428 patterns[cmd_pattern_count++] = argv[i++];
2429 }
2430
2431 /* Compile the patterns that were provided on the command line, either by
2432 multiple uses of -e or as a single unkeyed pattern. */
2433
2434 for (j = 0; j < cmd_pattern_count; j++)
2435 {
2436 if (!compile_pattern(patterns[j], pcre_options, NULL,
2437 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2438 goto EXIT2;
2439 }
2440
2441 /* Compile the regular expressions that are provided in a file. */
2442
2443 if (pattern_filename != NULL)
2444 {
2445 int linenumber = 0;
2446 FILE *f;
2447 char *filename;
2448 char buffer[MBUFTHIRD];
2449
2450 if (strcmp(pattern_filename, "-") == 0)
2451 {
2452 f = stdin;
2453 filename = stdin_name;
2454 }
2455 else
2456 {
2457 f = fopen(pattern_filename, "r");
2458 if (f == NULL)
2459 {
2460 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2461 strerror(errno));
2462 goto EXIT2;
2463 }
2464 filename = pattern_filename;
2465 }
2466
2467 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2468 {
2469 char *s = buffer + (int)strlen(buffer);
2470 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2471 *s = 0;
2472 linenumber++;
2473 if (buffer[0] == 0) continue; /* Skip blank lines */
2474 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2475 goto EXIT2;
2476 }
2477
2478 if (f != stdin) fclose(f);
2479 }
2480
2481 /* Study the regular expressions, as we will be running them many times */
2482
2483 for (j = 0; j < pattern_count; j++)
2484 {
2485 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2486 if (error != NULL)
2487 {
2488 char s[16];
2489 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2490 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2491 goto EXIT2;
2492 }
2493 hint_count++;
2494 }
2495
2496 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2497 pcre_extra block for each pattern. */
2498
2499 if (match_limit > 0 || match_limit_recursion > 0)
2500 {
2501 for (j = 0; j < pattern_count; j++)
2502 {
2503 if (hints_list[j] == NULL)
2504 {
2505 hints_list[j] = malloc(sizeof(pcre_extra));
2506 if (hints_list[j] == NULL)
2507 {
2508 fprintf(stderr, "pcregrep: malloc failed\n");
2509 pcregrep_exit(2);
2510 }
2511 }
2512 if (match_limit > 0)
2513 {
2514 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2515 hints_list[j]->match_limit = match_limit;
2516 }
2517 if (match_limit_recursion > 0)
2518 {
2519 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2520 hints_list[j]->match_limit_recursion = match_limit_recursion;
2521 }
2522 }
2523 }
2524
2525 /* If there are include or exclude patterns, compile them. */
2526
2527 if (exclude_pattern != NULL)
2528 {
2529 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2530 pcretables);
2531 if (exclude_compiled == NULL)
2532 {
2533 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2534 errptr, error);
2535 goto EXIT2;
2536 }
2537 }
2538
2539 if (include_pattern != NULL)
2540 {
2541 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2542 pcretables);
2543 if (include_compiled == NULL)
2544 {
2545 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2546 errptr, error);
2547 goto EXIT2;
2548 }
2549 }
2550
2551 if (exclude_dir_pattern != NULL)
2552 {
2553 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2554 pcretables);
2555 if (exclude_dir_compiled == NULL)
2556 {
2557 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2558 errptr, error);
2559 goto EXIT2;
2560 }
2561 }
2562
2563 if (include_dir_pattern != NULL)
2564 {
2565 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2566 pcretables);
2567 if (include_dir_compiled == NULL)
2568 {
2569 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2570 errptr, error);
2571 goto EXIT2;
2572 }
2573 }
2574
2575 /* If there are no further arguments, do the business on stdin and exit. */
2576
2577 if (i >= argc)
2578 {
2579 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2580 goto EXIT;
2581 }
2582
2583 /* Otherwise, work through the remaining arguments as files or directories.
2584 Pass in the fact that there is only one argument at top level - this suppresses
2585 the file name if the argument is not a directory and filenames are not
2586 otherwise forced. */
2587
2588 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2589
2590 for (; i < argc; i++)
2591 {
2592 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2593 only_one_at_top);
2594 if (frc > 1) rc = frc;
2595 else if (frc == 0 && rc == 1) rc = 0;
2596 }
2597
2598 EXIT:
2599 if (pattern_list != NULL)
2600 {
2601 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2602 free(pattern_list);
2603 }
2604 if (hints_list != NULL)
2605 {
2606 for (i = 0; i < hint_count; i++)
2607 {
2608 if (hints_list[i] != NULL) free(hints_list[i]);
2609 }
2610 free(hints_list);
2611 }
2612 pcregrep_exit(rc);
2613
2614 EXIT2:
2615 rc = 2;
2616 goto EXIT;
2617 }
2618
2619 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12