/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 565 - (show annotations) (download)
Sun Oct 31 18:18:48 2010 UTC (4 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 75651 byte(s)
Added parentheses argument to -o and --only-matching options of pcregrep.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2010 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define MBUFTHIRD BUFSIZ
78 #else
79 #define MBUFTHIRD 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117 /*************************************************
118 * Global variables *
119 *************************************************/
120
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131
132 static int endlinetype;
133
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *newline = NULL;
139 static char *pattern_filename = NULL;
140 static char *stdin_name = (char *)"(standard input)";
141 static char *locale = NULL;
142
143 static const unsigned char *pcretables = NULL;
144
145 static int pattern_count = 0;
146 static pcre **pattern_list = NULL;
147 static pcre_extra **hints_list = NULL;
148
149 static char *include_pattern = NULL;
150 static char *exclude_pattern = NULL;
151 static char *include_dir_pattern = NULL;
152 static char *exclude_dir_pattern = NULL;
153
154 static pcre *include_compiled = NULL;
155 static pcre *exclude_compiled = NULL;
156 static pcre *include_dir_compiled = NULL;
157 static pcre *exclude_dir_compiled = NULL;
158
159 static int after_context = 0;
160 static int before_context = 0;
161 static int both_context = 0;
162 static int dee_action = dee_READ;
163 static int DEE_action = DEE_READ;
164 static int error_count = 0;
165 static int filenames = FN_DEFAULT;
166 static int only_matching = -1;
167 static int process_options = 0;
168
169 static unsigned long int match_limit = 0;
170 static unsigned long int match_limit_recursion = 0;
171
172 static BOOL count_only = FALSE;
173 static BOOL do_colour = FALSE;
174 static BOOL file_offsets = FALSE;
175 static BOOL hyphenpending = FALSE;
176 static BOOL invert = FALSE;
177 static BOOL line_buffered = FALSE;
178 static BOOL line_offsets = FALSE;
179 static BOOL multiline = FALSE;
180 static BOOL number = FALSE;
181 static BOOL omit_zero_count = FALSE;
182 static BOOL resource_error = FALSE;
183 static BOOL quiet = FALSE;
184 static BOOL silent = FALSE;
185 static BOOL utf8 = FALSE;
186
187 /* Structure for options and list of them */
188
189 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
190 OP_PATLIST };
191
192 typedef struct option_item {
193 int type;
194 int one_char;
195 void *dataptr;
196 const char *long_name;
197 const char *help_text;
198 } option_item;
199
200 /* Options without a single-letter equivalent get a negative value. This can be
201 used to identify them. */
202
203 #define N_COLOUR (-1)
204 #define N_EXCLUDE (-2)
205 #define N_EXCLUDE_DIR (-3)
206 #define N_HELP (-4)
207 #define N_INCLUDE (-5)
208 #define N_INCLUDE_DIR (-6)
209 #define N_LABEL (-7)
210 #define N_LOCALE (-8)
211 #define N_NULL (-9)
212 #define N_LOFFSETS (-10)
213 #define N_FOFFSETS (-11)
214 #define N_LBUFFER (-12)
215 #define N_M_LIMIT (-13)
216 #define N_M_LIMIT_REC (-14)
217
218 static option_item optionlist[] = {
219 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
220 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
221 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
222 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
223 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
224 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
225 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
226 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
227 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
228 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
229 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
230 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
231 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
232 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
233 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
234 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
235 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
236 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
237 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
238 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
239 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
240 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
241 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
242 { OP_NUMBER, N_M_LIMIT,&match_limit, "match-limit=number", "set PCRE match limit option" },
243 { OP_NUMBER, N_M_LIMIT_REC,&match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
245 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
247 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
248 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
249 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
250 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
251 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
252 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
253 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
254 #ifdef JFRIEDL_DEBUG
255 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
256 #endif
257 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
258 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
259 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
260 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
261 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
262 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
263 { OP_NODATA, 0, NULL, NULL, NULL }
264 };
265
266 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
267 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
268 that the combination of -w and -x has the same effect as -x on its own, so we
269 can treat them as the same. */
270
271 static const char *prefix[] = {
272 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
273
274 static const char *suffix[] = {
275 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
276
277 /* UTF-8 tables - used only when the newline setting is "any". */
278
279 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
280
281 const char utf8_table4[] = {
282 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
283 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
284 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
285 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
286
287
288
289 /*************************************************
290 * OS-specific functions *
291 *************************************************/
292
293 /* These functions are defined so that they can be made system specific,
294 although at present the only ones are for Unix, Win32, and for "no support". */
295
296
297 /************* Directory scanning in Unix ***********/
298
299 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
300 #include <sys/types.h>
301 #include <sys/stat.h>
302 #include <dirent.h>
303
304 typedef DIR directory_type;
305
306 static int
307 isdirectory(char *filename)
308 {
309 struct stat statbuf;
310 if (stat(filename, &statbuf) < 0)
311 return 0; /* In the expectation that opening as a file will fail */
312 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
313 }
314
315 static directory_type *
316 opendirectory(char *filename)
317 {
318 return opendir(filename);
319 }
320
321 static char *
322 readdirectory(directory_type *dir)
323 {
324 for (;;)
325 {
326 struct dirent *dent = readdir(dir);
327 if (dent == NULL) return NULL;
328 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
329 return dent->d_name;
330 }
331 /* Control never reaches here */
332 }
333
334 static void
335 closedirectory(directory_type *dir)
336 {
337 closedir(dir);
338 }
339
340
341 /************* Test for regular file in Unix **********/
342
343 static int
344 isregfile(char *filename)
345 {
346 struct stat statbuf;
347 if (stat(filename, &statbuf) < 0)
348 return 1; /* In the expectation that opening as a file will fail */
349 return (statbuf.st_mode & S_IFMT) == S_IFREG;
350 }
351
352
353 /************* Test for a terminal in Unix **********/
354
355 static BOOL
356 is_stdout_tty(void)
357 {
358 return isatty(fileno(stdout));
359 }
360
361 static BOOL
362 is_file_tty(FILE *f)
363 {
364 return isatty(fileno(f));
365 }
366
367
368 /************* Directory scanning in Win32 ***********/
369
370 /* I (Philip Hazel) have no means of testing this code. It was contributed by
371 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
372 when it did not exist. David Byron added a patch that moved the #include of
373 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
374 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
375 undefined when it is indeed undefined. */
376
377 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
378
379 #ifndef STRICT
380 # define STRICT
381 #endif
382 #ifndef WIN32_LEAN_AND_MEAN
383 # define WIN32_LEAN_AND_MEAN
384 #endif
385
386 #include <windows.h>
387
388 #ifndef INVALID_FILE_ATTRIBUTES
389 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
390 #endif
391
392 typedef struct directory_type
393 {
394 HANDLE handle;
395 BOOL first;
396 WIN32_FIND_DATA data;
397 } directory_type;
398
399 int
400 isdirectory(char *filename)
401 {
402 DWORD attr = GetFileAttributes(filename);
403 if (attr == INVALID_FILE_ATTRIBUTES)
404 return 0;
405 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
406 }
407
408 directory_type *
409 opendirectory(char *filename)
410 {
411 size_t len;
412 char *pattern;
413 directory_type *dir;
414 DWORD err;
415 len = strlen(filename);
416 pattern = (char *) malloc(len + 3);
417 dir = (directory_type *) malloc(sizeof(*dir));
418 if ((pattern == NULL) || (dir == NULL))
419 {
420 fprintf(stderr, "pcregrep: malloc failed\n");
421 pcregrep_exit(2);
422 }
423 memcpy(pattern, filename, len);
424 memcpy(&(pattern[len]), "\\*", 3);
425 dir->handle = FindFirstFile(pattern, &(dir->data));
426 if (dir->handle != INVALID_HANDLE_VALUE)
427 {
428 free(pattern);
429 dir->first = TRUE;
430 return dir;
431 }
432 err = GetLastError();
433 free(pattern);
434 free(dir);
435 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
436 return NULL;
437 }
438
439 char *
440 readdirectory(directory_type *dir)
441 {
442 for (;;)
443 {
444 if (!dir->first)
445 {
446 if (!FindNextFile(dir->handle, &(dir->data)))
447 return NULL;
448 }
449 else
450 {
451 dir->first = FALSE;
452 }
453 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
454 return dir->data.cFileName;
455 }
456 #ifndef _MSC_VER
457 return NULL; /* Keep compiler happy; never executed */
458 #endif
459 }
460
461 void
462 closedirectory(directory_type *dir)
463 {
464 FindClose(dir->handle);
465 free(dir);
466 }
467
468
469 /************* Test for regular file in Win32 **********/
470
471 /* I don't know how to do this, or if it can be done; assume all paths are
472 regular if they are not directories. */
473
474 int isregfile(char *filename)
475 {
476 return !isdirectory(filename);
477 }
478
479
480 /************* Test for a terminal in Win32 **********/
481
482 /* I don't know how to do this; assume never */
483
484 static BOOL
485 is_stdout_tty(void)
486 {
487 return FALSE;
488 }
489
490 static BOOL
491 is_file_tty(FILE *f)
492 {
493 return FALSE;
494 }
495
496
497 /************* Directory scanning when we can't do it ***********/
498
499 /* The type is void, and apart from isdirectory(), the functions do nothing. */
500
501 #else
502
503 typedef void directory_type;
504
505 int isdirectory(char *filename) { return 0; }
506 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
507 char *readdirectory(directory_type *dir) { return (char*)0;}
508 void closedirectory(directory_type *dir) {}
509
510
511 /************* Test for regular when we can't do it **********/
512
513 /* Assume all files are regular. */
514
515 int isregfile(char *filename) { return 1; }
516
517
518 /************* Test for a terminal when we can't do it **********/
519
520 static BOOL
521 is_stdout_tty(void)
522 {
523 return FALSE;
524 }
525
526 static BOOL
527 is_file_tty(FILE *f)
528 {
529 return FALSE;
530 }
531
532 #endif
533
534
535
536 #ifndef HAVE_STRERROR
537 /*************************************************
538 * Provide strerror() for non-ANSI libraries *
539 *************************************************/
540
541 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
542 in their libraries, but can provide the same facility by this simple
543 alternative function. */
544
545 extern int sys_nerr;
546 extern char *sys_errlist[];
547
548 char *
549 strerror(int n)
550 {
551 if (n < 0 || n >= sys_nerr) return "unknown error number";
552 return sys_errlist[n];
553 }
554 #endif /* HAVE_STRERROR */
555
556
557
558 /*************************************************
559 * Exit from the program *
560 *************************************************/
561
562 /* If there has been a resource error, give a suitable message.
563
564 Argument: the return code
565 Returns: does not return
566 */
567
568 static void
569 pcregrep_exit(int rc)
570 {
571 if (resource_error)
572 {
573 fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
574 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
575 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
576 }
577
578 exit(rc);
579 }
580
581
582
583 /*************************************************
584 * Read one line of input *
585 *************************************************/
586
587 /* Normally, input is read using fread() into a large buffer, so many lines may
588 be read at once. However, doing this for tty input means that no output appears
589 until a lot of input has been typed. Instead, tty input is handled line by
590 line. We cannot use fgets() for this, because it does not stop at a binary
591 zero, and therefore there is no way of telling how many characters it has read,
592 because there may be binary zeros embedded in the data.
593
594 Arguments:
595 buffer the buffer to read into
596 length the maximum number of characters to read
597 f the file
598
599 Returns: the number of characters read, zero at end of file
600 */
601
602 static int
603 read_one_line(char *buffer, int length, FILE *f)
604 {
605 int c;
606 int yield = 0;
607 while ((c = fgetc(f)) != EOF)
608 {
609 buffer[yield++] = c;
610 if (c == '\n' || yield >= length) break;
611 }
612 return yield;
613 }
614
615
616
617 /*************************************************
618 * Find end of line *
619 *************************************************/
620
621 /* The length of the endline sequence that is found is set via lenptr. This may
622 be zero at the very end of the file if there is no line-ending sequence there.
623
624 Arguments:
625 p current position in line
626 endptr end of available data
627 lenptr where to put the length of the eol sequence
628
629 Returns: pointer to the last byte of the line
630 */
631
632 static char *
633 end_of_line(char *p, char *endptr, int *lenptr)
634 {
635 switch(endlinetype)
636 {
637 default: /* Just in case */
638 case EL_LF:
639 while (p < endptr && *p != '\n') p++;
640 if (p < endptr)
641 {
642 *lenptr = 1;
643 return p + 1;
644 }
645 *lenptr = 0;
646 return endptr;
647
648 case EL_CR:
649 while (p < endptr && *p != '\r') p++;
650 if (p < endptr)
651 {
652 *lenptr = 1;
653 return p + 1;
654 }
655 *lenptr = 0;
656 return endptr;
657
658 case EL_CRLF:
659 for (;;)
660 {
661 while (p < endptr && *p != '\r') p++;
662 if (++p >= endptr)
663 {
664 *lenptr = 0;
665 return endptr;
666 }
667 if (*p == '\n')
668 {
669 *lenptr = 2;
670 return p + 1;
671 }
672 }
673 break;
674
675 case EL_ANYCRLF:
676 while (p < endptr)
677 {
678 int extra = 0;
679 register int c = *((unsigned char *)p);
680
681 if (utf8 && c >= 0xc0)
682 {
683 int gcii, gcss;
684 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
685 gcss = 6*extra;
686 c = (c & utf8_table3[extra]) << gcss;
687 for (gcii = 1; gcii <= extra; gcii++)
688 {
689 gcss -= 6;
690 c |= (p[gcii] & 0x3f) << gcss;
691 }
692 }
693
694 p += 1 + extra;
695
696 switch (c)
697 {
698 case 0x0a: /* LF */
699 *lenptr = 1;
700 return p;
701
702 case 0x0d: /* CR */
703 if (p < endptr && *p == 0x0a)
704 {
705 *lenptr = 2;
706 p++;
707 }
708 else *lenptr = 1;
709 return p;
710
711 default:
712 break;
713 }
714 } /* End of loop for ANYCRLF case */
715
716 *lenptr = 0; /* Must have hit the end */
717 return endptr;
718
719 case EL_ANY:
720 while (p < endptr)
721 {
722 int extra = 0;
723 register int c = *((unsigned char *)p);
724
725 if (utf8 && c >= 0xc0)
726 {
727 int gcii, gcss;
728 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
729 gcss = 6*extra;
730 c = (c & utf8_table3[extra]) << gcss;
731 for (gcii = 1; gcii <= extra; gcii++)
732 {
733 gcss -= 6;
734 c |= (p[gcii] & 0x3f) << gcss;
735 }
736 }
737
738 p += 1 + extra;
739
740 switch (c)
741 {
742 case 0x0a: /* LF */
743 case 0x0b: /* VT */
744 case 0x0c: /* FF */
745 *lenptr = 1;
746 return p;
747
748 case 0x0d: /* CR */
749 if (p < endptr && *p == 0x0a)
750 {
751 *lenptr = 2;
752 p++;
753 }
754 else *lenptr = 1;
755 return p;
756
757 case 0x85: /* NEL */
758 *lenptr = utf8? 2 : 1;
759 return p;
760
761 case 0x2028: /* LS */
762 case 0x2029: /* PS */
763 *lenptr = 3;
764 return p;
765
766 default:
767 break;
768 }
769 } /* End of loop for ANY case */
770
771 *lenptr = 0; /* Must have hit the end */
772 return endptr;
773 } /* End of overall switch */
774 }
775
776
777
778 /*************************************************
779 * Find start of previous line *
780 *************************************************/
781
782 /* This is called when looking back for before lines to print.
783
784 Arguments:
785 p start of the subsequent line
786 startptr start of available data
787
788 Returns: pointer to the start of the previous line
789 */
790
791 static char *
792 previous_line(char *p, char *startptr)
793 {
794 switch(endlinetype)
795 {
796 default: /* Just in case */
797 case EL_LF:
798 p--;
799 while (p > startptr && p[-1] != '\n') p--;
800 return p;
801
802 case EL_CR:
803 p--;
804 while (p > startptr && p[-1] != '\n') p--;
805 return p;
806
807 case EL_CRLF:
808 for (;;)
809 {
810 p -= 2;
811 while (p > startptr && p[-1] != '\n') p--;
812 if (p <= startptr + 1 || p[-2] == '\r') return p;
813 }
814 return p; /* But control should never get here */
815
816 case EL_ANY:
817 case EL_ANYCRLF:
818 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
819 if (utf8) while ((*p & 0xc0) == 0x80) p--;
820
821 while (p > startptr)
822 {
823 register int c;
824 char *pp = p - 1;
825
826 if (utf8)
827 {
828 int extra = 0;
829 while ((*pp & 0xc0) == 0x80) pp--;
830 c = *((unsigned char *)pp);
831 if (c >= 0xc0)
832 {
833 int gcii, gcss;
834 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
835 gcss = 6*extra;
836 c = (c & utf8_table3[extra]) << gcss;
837 for (gcii = 1; gcii <= extra; gcii++)
838 {
839 gcss -= 6;
840 c |= (pp[gcii] & 0x3f) << gcss;
841 }
842 }
843 }
844 else c = *((unsigned char *)pp);
845
846 if (endlinetype == EL_ANYCRLF) switch (c)
847 {
848 case 0x0a: /* LF */
849 case 0x0d: /* CR */
850 return p;
851
852 default:
853 break;
854 }
855
856 else switch (c)
857 {
858 case 0x0a: /* LF */
859 case 0x0b: /* VT */
860 case 0x0c: /* FF */
861 case 0x0d: /* CR */
862 case 0x85: /* NEL */
863 case 0x2028: /* LS */
864 case 0x2029: /* PS */
865 return p;
866
867 default:
868 break;
869 }
870
871 p = pp; /* Back one character */
872 } /* End of loop for ANY case */
873
874 return startptr; /* Hit start of data */
875 } /* End of overall switch */
876 }
877
878
879
880
881
882 /*************************************************
883 * Print the previous "after" lines *
884 *************************************************/
885
886 /* This is called if we are about to lose said lines because of buffer filling,
887 and at the end of the file. The data in the line is written using fwrite() so
888 that a binary zero does not terminate it.
889
890 Arguments:
891 lastmatchnumber the number of the last matching line, plus one
892 lastmatchrestart where we restarted after the last match
893 endptr end of available data
894 printname filename for printing
895
896 Returns: nothing
897 */
898
899 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
900 char *endptr, char *printname)
901 {
902 if (after_context > 0 && lastmatchnumber > 0)
903 {
904 int count = 0;
905 while (lastmatchrestart < endptr && count++ < after_context)
906 {
907 int ellength;
908 char *pp = lastmatchrestart;
909 if (printname != NULL) fprintf(stdout, "%s-", printname);
910 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
911 pp = end_of_line(pp, endptr, &ellength);
912 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
913 lastmatchrestart = pp;
914 }
915 hyphenpending = TRUE;
916 }
917 }
918
919
920
921 /*************************************************
922 * Apply patterns to subject till one matches *
923 *************************************************/
924
925 /* This function is called to run through all patterns, looking for a match. It
926 is used multiple times for the same subject when colouring is enabled, in order
927 to find all possible matches.
928
929 Arguments:
930 matchptr the start of the subject
931 length the length of the subject to match
932 offsets the offets vector to fill in
933 mrc address of where to put the result of pcre_exec()
934
935 Returns: TRUE if there was a match
936 FALSE if there was no match
937 invert if there was a non-fatal error
938 */
939
940 static BOOL
941 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
942 {
943 int i;
944 size_t slen = length;
945 const char *msg = "this text:\n\n";
946 if (slen > 200)
947 {
948 slen = 200;
949 msg = "text that starts:\n\n";
950 }
951 for (i = 0; i < pattern_count; i++)
952 {
953 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
954 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
955 if (*mrc >= 0) return TRUE;
956 if (*mrc == PCRE_ERROR_NOMATCH) continue;
957 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
958 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
959 fprintf(stderr, "%s", msg);
960 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
961 fprintf(stderr, "\n\n");
962 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
963 resource_error = TRUE;
964 if (error_count++ > 20)
965 {
966 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
967 pcregrep_exit(2);
968 }
969 return invert; /* No more matching; don't show the line again */
970 }
971
972 return FALSE; /* No match, no errors */
973 }
974
975
976
977 /*************************************************
978 * Grep an individual file *
979 *************************************************/
980
981 /* This is called from grep_or_recurse() below. It uses a buffer that is three
982 times the value of MBUFTHIRD. The matching point is never allowed to stray into
983 the top third of the buffer, thus keeping more of the file available for
984 context printing or for multiline scanning. For large files, the pointer will
985 be in the middle third most of the time, so the bottom third is available for
986 "before" context printing.
987
988 Arguments:
989 handle the fopened FILE stream for a normal file
990 the gzFile pointer when reading is via libz
991 the BZFILE pointer when reading is via libbz2
992 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
993 printname the file name if it is to be printed for each match
994 or NULL if the file name is not to be printed
995 it cannot be NULL if filenames[_nomatch]_only is set
996
997 Returns: 0 if there was at least one match
998 1 otherwise (no matches)
999 2 if there is a read error on a .bz2 file
1000 */
1001
1002 static int
1003 pcregrep(void *handle, int frtype, char *printname)
1004 {
1005 int rc = 1;
1006 int linenumber = 1;
1007 int lastmatchnumber = 0;
1008 int count = 0;
1009 int filepos = 0;
1010 int offsets[OFFSET_SIZE];
1011 char *lastmatchrestart = NULL;
1012 char buffer[3*MBUFTHIRD];
1013 char *ptr = buffer;
1014 char *endptr;
1015 size_t bufflength;
1016 BOOL endhyphenpending = FALSE;
1017 BOOL input_line_buffered = line_buffered;
1018 FILE *in = NULL; /* Ensure initialized */
1019
1020 #ifdef SUPPORT_LIBZ
1021 gzFile ingz = NULL;
1022 #endif
1023
1024 #ifdef SUPPORT_LIBBZ2
1025 BZFILE *inbz2 = NULL;
1026 #endif
1027
1028
1029 /* Do the first read into the start of the buffer and set up the pointer to end
1030 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1031 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1032 fail. */
1033
1034 #ifdef SUPPORT_LIBZ
1035 if (frtype == FR_LIBZ)
1036 {
1037 ingz = (gzFile)handle;
1038 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1039 }
1040 else
1041 #endif
1042
1043 #ifdef SUPPORT_LIBBZ2
1044 if (frtype == FR_LIBBZ2)
1045 {
1046 inbz2 = (BZFILE *)handle;
1047 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1048 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1049 } /* without the cast it is unsigned. */
1050 else
1051 #endif
1052
1053 {
1054 in = (FILE *)handle;
1055 if (is_file_tty(in)) input_line_buffered = TRUE;
1056 bufflength = input_line_buffered?
1057 read_one_line(buffer, 3*MBUFTHIRD, in) :
1058 fread(buffer, 1, 3*MBUFTHIRD, in);
1059 }
1060
1061 endptr = buffer + bufflength;
1062
1063 /* Loop while the current pointer is not at the end of the file. For large
1064 files, endptr will be at the end of the buffer when we are in the middle of the
1065 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1066 way, the buffer is shifted left and re-filled. */
1067
1068 while (ptr < endptr)
1069 {
1070 int endlinelength;
1071 int mrc = 0;
1072 BOOL match;
1073 char *matchptr = ptr;
1074 char *t = ptr;
1075 size_t length, linelength;
1076
1077 /* At this point, ptr is at the start of a line. We need to find the length
1078 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1079 length remainder of the data in the buffer. Otherwise, it is the length of
1080 the next line, excluding the terminating newline. After matching, we always
1081 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1082 option is used for compiling, so that any match is constrained to be in the
1083 first line. */
1084
1085 t = end_of_line(t, endptr, &endlinelength);
1086 linelength = t - ptr - endlinelength;
1087 length = multiline? (size_t)(endptr - ptr) : linelength;
1088
1089 /* Extra processing for Jeffrey Friedl's debugging. */
1090
1091 #ifdef JFRIEDL_DEBUG
1092 if (jfriedl_XT || jfriedl_XR)
1093 {
1094 #include <sys/time.h>
1095 #include <time.h>
1096 struct timeval start_time, end_time;
1097 struct timezone dummy;
1098 int i;
1099
1100 if (jfriedl_XT)
1101 {
1102 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1103 const char *orig = ptr;
1104 ptr = malloc(newlen + 1);
1105 if (!ptr) {
1106 printf("out of memory");
1107 pcregrep_exit(2);
1108 }
1109 endptr = ptr;
1110 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1111 for (i = 0; i < jfriedl_XT; i++) {
1112 strncpy(endptr, orig, length);
1113 endptr += length;
1114 }
1115 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1116 length = newlen;
1117 }
1118
1119 if (gettimeofday(&start_time, &dummy) != 0)
1120 perror("bad gettimeofday");
1121
1122
1123 for (i = 0; i < jfriedl_XR; i++)
1124 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1125 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1126
1127 if (gettimeofday(&end_time, &dummy) != 0)
1128 perror("bad gettimeofday");
1129
1130 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1131 -
1132 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1133
1134 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1135 return 0;
1136 }
1137 #endif
1138
1139 /* We come back here after a match when the -o option (only_matching) is set,
1140 in order to find any further matches in the same line. */
1141
1142 ONLY_MATCHING_RESTART:
1143
1144 /* Run through all the patterns until one matches or there is an error other
1145 than NOMATCH. This code is in a subroutine so that it can be re-used for
1146 finding subsequent matches when colouring matched lines. */
1147
1148 match = match_patterns(matchptr, length, offsets, &mrc);
1149
1150 /* If it's a match or a not-match (as required), do what's wanted. */
1151
1152 if (match != invert)
1153 {
1154 BOOL hyphenprinted = FALSE;
1155
1156 /* We've failed if we want a file that doesn't have any matches. */
1157
1158 if (filenames == FN_NOMATCH_ONLY) return 1;
1159
1160 /* Just count if just counting is wanted. */
1161
1162 if (count_only) count++;
1163
1164 /* If all we want is a file name, there is no need to scan any more lines
1165 in the file. */
1166
1167 else if (filenames == FN_MATCH_ONLY)
1168 {
1169 fprintf(stdout, "%s\n", printname);
1170 return 0;
1171 }
1172
1173 /* Likewise, if all we want is a yes/no answer. */
1174
1175 else if (quiet) return 0;
1176
1177 /* The --only-matching option prints just the substring that matched, or a
1178 captured portion of it, as long as this string is not empty, and the
1179 --file-offsets and --line-offsets options output offsets for the matching
1180 substring (they both force --only-matching = 0). None of these options
1181 prints any context. Afterwards, adjust the start and length, and then jump
1182 back to look for further matches in the same line. If we are in invert
1183 mode, however, nothing is printed and we do not restart - this could still
1184 be useful because the return code is set. */
1185
1186 else if (only_matching >= 0)
1187 {
1188 if (!invert)
1189 {
1190 if (printname != NULL) fprintf(stdout, "%s:", printname);
1191 if (number) fprintf(stdout, "%d:", linenumber);
1192 if (line_offsets)
1193 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1194 offsets[1] - offsets[0]);
1195 else if (file_offsets)
1196 fprintf(stdout, "%d,%d\n",
1197 (int)(filepos + matchptr + offsets[0] - ptr),
1198 offsets[1] - offsets[0]);
1199 else if (only_matching < mrc)
1200 {
1201 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1202 if (plen > 0)
1203 {
1204 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1205 FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1206 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1207 fprintf(stdout, "\n");
1208 }
1209 }
1210 else if (printname != NULL || number) fprintf(stdout, "\n");
1211 matchptr += offsets[1];
1212 length -= offsets[1];
1213 match = FALSE;
1214 if (line_buffered) fflush(stdout);
1215 rc = 0; /* Had some success */
1216 goto ONLY_MATCHING_RESTART;
1217 }
1218 }
1219
1220 /* This is the default case when none of the above options is set. We print
1221 the matching lines(s), possibly preceded and/or followed by other lines of
1222 context. */
1223
1224 else
1225 {
1226 /* See if there is a requirement to print some "after" lines from a
1227 previous match. We never print any overlaps. */
1228
1229 if (after_context > 0 && lastmatchnumber > 0)
1230 {
1231 int ellength;
1232 int linecount = 0;
1233 char *p = lastmatchrestart;
1234
1235 while (p < ptr && linecount < after_context)
1236 {
1237 p = end_of_line(p, ptr, &ellength);
1238 linecount++;
1239 }
1240
1241 /* It is important to advance lastmatchrestart during this printing so
1242 that it interacts correctly with any "before" printing below. Print
1243 each line's data using fwrite() in case there are binary zeroes. */
1244
1245 while (lastmatchrestart < p)
1246 {
1247 char *pp = lastmatchrestart;
1248 if (printname != NULL) fprintf(stdout, "%s-", printname);
1249 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1250 pp = end_of_line(pp, endptr, &ellength);
1251 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1252 lastmatchrestart = pp;
1253 }
1254 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1255 }
1256
1257 /* If there were non-contiguous lines printed above, insert hyphens. */
1258
1259 if (hyphenpending)
1260 {
1261 fprintf(stdout, "--\n");
1262 hyphenpending = FALSE;
1263 hyphenprinted = TRUE;
1264 }
1265
1266 /* See if there is a requirement to print some "before" lines for this
1267 match. Again, don't print overlaps. */
1268
1269 if (before_context > 0)
1270 {
1271 int linecount = 0;
1272 char *p = ptr;
1273
1274 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1275 linecount < before_context)
1276 {
1277 linecount++;
1278 p = previous_line(p, buffer);
1279 }
1280
1281 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1282 fprintf(stdout, "--\n");
1283
1284 while (p < ptr)
1285 {
1286 int ellength;
1287 char *pp = p;
1288 if (printname != NULL) fprintf(stdout, "%s-", printname);
1289 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1290 pp = end_of_line(pp, endptr, &ellength);
1291 FWRITE(p, 1, pp - p, stdout);
1292 p = pp;
1293 }
1294 }
1295
1296 /* Now print the matching line(s); ensure we set hyphenpending at the end
1297 of the file if any context lines are being output. */
1298
1299 if (after_context > 0 || before_context > 0)
1300 endhyphenpending = TRUE;
1301
1302 if (printname != NULL) fprintf(stdout, "%s:", printname);
1303 if (number) fprintf(stdout, "%d:", linenumber);
1304
1305 /* In multiline mode, we want to print to the end of the line in which
1306 the end of the matched string is found, so we adjust linelength and the
1307 line number appropriately, but only when there actually was a match
1308 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1309 the match will always be before the first newline sequence. */
1310
1311 if (multiline)
1312 {
1313 int ellength;
1314 char *endmatch = ptr;
1315 if (!invert)
1316 {
1317 endmatch += offsets[1];
1318 t = ptr;
1319 while (t < endmatch)
1320 {
1321 t = end_of_line(t, endptr, &ellength);
1322 if (t <= endmatch) linenumber++; else break;
1323 }
1324 }
1325 endmatch = end_of_line(endmatch, endptr, &ellength);
1326 linelength = endmatch - ptr - ellength;
1327 }
1328
1329 /*** NOTE: Use only fwrite() to output the data line, so that binary
1330 zeroes are treated as just another data character. */
1331
1332 /* This extra option, for Jeffrey Friedl's debugging requirements,
1333 replaces the matched string, or a specific captured string if it exists,
1334 with X. When this happens, colouring is ignored. */
1335
1336 #ifdef JFRIEDL_DEBUG
1337 if (S_arg >= 0 && S_arg < mrc)
1338 {
1339 int first = S_arg * 2;
1340 int last = first + 1;
1341 FWRITE(ptr, 1, offsets[first], stdout);
1342 fprintf(stdout, "X");
1343 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1344 }
1345 else
1346 #endif
1347
1348 /* We have to split the line(s) up if colouring, and search for further
1349 matches. */
1350
1351 if (do_colour)
1352 {
1353 int last_offset = 0;
1354 FWRITE(ptr, 1, offsets[0], stdout);
1355 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1356 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1357 fprintf(stdout, "%c[00m", 0x1b);
1358 for (;;)
1359 {
1360 last_offset += offsets[1];
1361 matchptr += offsets[1];
1362 length -= offsets[1];
1363 if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1364 FWRITE(matchptr, 1, offsets[0], stdout);
1365 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1366 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1367 fprintf(stdout, "%c[00m", 0x1b);
1368 }
1369 FWRITE(ptr + last_offset, 1,
1370 (linelength + endlinelength) - last_offset, stdout);
1371 }
1372
1373 /* Not colouring; no need to search for further matches */
1374
1375 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1376 }
1377
1378 /* End of doing what has to be done for a match. If --line-buffered was
1379 given, flush the output. */
1380
1381 if (line_buffered) fflush(stdout);
1382 rc = 0; /* Had some success */
1383
1384 /* Remember where the last match happened for after_context. We remember
1385 where we are about to restart, and that line's number. */
1386
1387 lastmatchrestart = ptr + linelength + endlinelength;
1388 lastmatchnumber = linenumber + 1;
1389 }
1390
1391 /* For a match in multiline inverted mode (which of course did not cause
1392 anything to be printed), we have to move on to the end of the match before
1393 proceeding. */
1394
1395 if (multiline && invert && match)
1396 {
1397 int ellength;
1398 char *endmatch = ptr + offsets[1];
1399 t = ptr;
1400 while (t < endmatch)
1401 {
1402 t = end_of_line(t, endptr, &ellength);
1403 if (t <= endmatch) linenumber++; else break;
1404 }
1405 endmatch = end_of_line(endmatch, endptr, &ellength);
1406 linelength = endmatch - ptr - ellength;
1407 }
1408
1409 /* Advance to after the newline and increment the line number. The file
1410 offset to the current line is maintained in filepos. */
1411
1412 ptr += linelength + endlinelength;
1413 filepos += (int)(linelength + endlinelength);
1414 linenumber++;
1415
1416 /* If input is line buffered, and the buffer is not yet full, read another
1417 line and add it into the buffer. */
1418
1419 if (input_line_buffered && bufflength < sizeof(buffer))
1420 {
1421 int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1422 bufflength += add;
1423 endptr += add;
1424 }
1425
1426 /* If we haven't yet reached the end of the file (the buffer is full), and
1427 the current point is in the top 1/3 of the buffer, slide the buffer down by
1428 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1429 about to be lost, print them. */
1430
1431 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1432 {
1433 if (after_context > 0 &&
1434 lastmatchnumber > 0 &&
1435 lastmatchrestart < buffer + MBUFTHIRD)
1436 {
1437 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1438 lastmatchnumber = 0;
1439 }
1440
1441 /* Now do the shuffle */
1442
1443 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1444 ptr -= MBUFTHIRD;
1445
1446 #ifdef SUPPORT_LIBZ
1447 if (frtype == FR_LIBZ)
1448 bufflength = 2*MBUFTHIRD +
1449 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1450 else
1451 #endif
1452
1453 #ifdef SUPPORT_LIBBZ2
1454 if (frtype == FR_LIBBZ2)
1455 bufflength = 2*MBUFTHIRD +
1456 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1457 else
1458 #endif
1459
1460 bufflength = 2*MBUFTHIRD +
1461 (input_line_buffered?
1462 read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1463 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1464 endptr = buffer + bufflength;
1465
1466 /* Adjust any last match point */
1467
1468 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1469 }
1470 } /* Loop through the whole file */
1471
1472 /* End of file; print final "after" lines if wanted; do_after_lines sets
1473 hyphenpending if it prints something. */
1474
1475 if (only_matching < 0 && !count_only)
1476 {
1477 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1478 hyphenpending |= endhyphenpending;
1479 }
1480
1481 /* Print the file name if we are looking for those without matches and there
1482 were none. If we found a match, we won't have got this far. */
1483
1484 if (filenames == FN_NOMATCH_ONLY)
1485 {
1486 fprintf(stdout, "%s\n", printname);
1487 return 0;
1488 }
1489
1490 /* Print the match count if wanted */
1491
1492 if (count_only)
1493 {
1494 if (count > 0 || !omit_zero_count)
1495 {
1496 if (printname != NULL && filenames != FN_NONE)
1497 fprintf(stdout, "%s:", printname);
1498 fprintf(stdout, "%d\n", count);
1499 }
1500 }
1501
1502 return rc;
1503 }
1504
1505
1506
1507 /*************************************************
1508 * Grep a file or recurse into a directory *
1509 *************************************************/
1510
1511 /* Given a path name, if it's a directory, scan all the files if we are
1512 recursing; if it's a file, grep it.
1513
1514 Arguments:
1515 pathname the path to investigate
1516 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1517 only_one_at_top TRUE if the path is the only one at toplevel
1518
1519 Returns: 0 if there was at least one match
1520 1 if there were no matches
1521 2 there was some kind of error
1522
1523 However, file opening failures are suppressed if "silent" is set.
1524 */
1525
1526 static int
1527 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1528 {
1529 int rc = 1;
1530 int sep;
1531 int frtype;
1532 int pathlen;
1533 void *handle;
1534 FILE *in = NULL; /* Ensure initialized */
1535
1536 #ifdef SUPPORT_LIBZ
1537 gzFile ingz = NULL;
1538 #endif
1539
1540 #ifdef SUPPORT_LIBBZ2
1541 BZFILE *inbz2 = NULL;
1542 #endif
1543
1544 /* If the file name is "-" we scan stdin */
1545
1546 if (strcmp(pathname, "-") == 0)
1547 {
1548 return pcregrep(stdin, FR_PLAIN,
1549 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1550 stdin_name : NULL);
1551 }
1552
1553 /* If the file is a directory, skip if skipping or if we are recursing, scan
1554 each file and directory within it, subject to any include or exclude patterns
1555 that were set. The scanning code is localized so it can be made
1556 system-specific. */
1557
1558 if ((sep = isdirectory(pathname)) != 0)
1559 {
1560 if (dee_action == dee_SKIP) return 1;
1561 if (dee_action == dee_RECURSE)
1562 {
1563 char buffer[1024];
1564 char *nextfile;
1565 directory_type *dir = opendirectory(pathname);
1566
1567 if (dir == NULL)
1568 {
1569 if (!silent)
1570 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1571 strerror(errno));
1572 return 2;
1573 }
1574
1575 while ((nextfile = readdirectory(dir)) != NULL)
1576 {
1577 int frc, nflen;
1578 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1579 nflen = (int)(strlen(nextfile));
1580
1581 if (isdirectory(buffer))
1582 {
1583 if (exclude_dir_compiled != NULL &&
1584 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1585 continue;
1586
1587 if (include_dir_compiled != NULL &&
1588 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1589 continue;
1590 }
1591 else
1592 {
1593 if (exclude_compiled != NULL &&
1594 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1595 continue;
1596
1597 if (include_compiled != NULL &&
1598 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1599 continue;
1600 }
1601
1602 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1603 if (frc > 1) rc = frc;
1604 else if (frc == 0 && rc == 1) rc = 0;
1605 }
1606
1607 closedirectory(dir);
1608 return rc;
1609 }
1610 }
1611
1612 /* If the file is not a directory and not a regular file, skip it if that's
1613 been requested. */
1614
1615 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1616
1617 /* Control reaches here if we have a regular file, or if we have a directory
1618 and recursion or skipping was not requested, or if we have anything else and
1619 skipping was not requested. The scan proceeds. If this is the first and only
1620 argument at top level, we don't show the file name, unless we are only showing
1621 the file name, or the filename was forced (-H). */
1622
1623 pathlen = (int)(strlen(pathname));
1624
1625 /* Open using zlib if it is supported and the file name ends with .gz. */
1626
1627 #ifdef SUPPORT_LIBZ
1628 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1629 {
1630 ingz = gzopen(pathname, "rb");
1631 if (ingz == NULL)
1632 {
1633 if (!silent)
1634 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1635 strerror(errno));
1636 return 2;
1637 }
1638 handle = (void *)ingz;
1639 frtype = FR_LIBZ;
1640 }
1641 else
1642 #endif
1643
1644 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1645
1646 #ifdef SUPPORT_LIBBZ2
1647 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1648 {
1649 inbz2 = BZ2_bzopen(pathname, "rb");
1650 handle = (void *)inbz2;
1651 frtype = FR_LIBBZ2;
1652 }
1653 else
1654 #endif
1655
1656 /* Otherwise use plain fopen(). The label is so that we can come back here if
1657 an attempt to read a .bz2 file indicates that it really is a plain file. */
1658
1659 #ifdef SUPPORT_LIBBZ2
1660 PLAIN_FILE:
1661 #endif
1662 {
1663 in = fopen(pathname, "rb");
1664 handle = (void *)in;
1665 frtype = FR_PLAIN;
1666 }
1667
1668 /* All the opening methods return errno when they fail. */
1669
1670 if (handle == NULL)
1671 {
1672 if (!silent)
1673 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1674 strerror(errno));
1675 return 2;
1676 }
1677
1678 /* Now grep the file */
1679
1680 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1681 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1682
1683 /* Close in an appropriate manner. */
1684
1685 #ifdef SUPPORT_LIBZ
1686 if (frtype == FR_LIBZ)
1687 gzclose(ingz);
1688 else
1689 #endif
1690
1691 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1692 read failed. If the error indicates that the file isn't in fact bzipped, try
1693 again as a normal file. */
1694
1695 #ifdef SUPPORT_LIBBZ2
1696 if (frtype == FR_LIBBZ2)
1697 {
1698 if (rc == 2)
1699 {
1700 int errnum;
1701 const char *err = BZ2_bzerror(inbz2, &errnum);
1702 if (errnum == BZ_DATA_ERROR_MAGIC)
1703 {
1704 BZ2_bzclose(inbz2);
1705 goto PLAIN_FILE;
1706 }
1707 else if (!silent)
1708 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1709 pathname, err);
1710 }
1711 BZ2_bzclose(inbz2);
1712 }
1713 else
1714 #endif
1715
1716 /* Normal file close */
1717
1718 fclose(in);
1719
1720 /* Pass back the yield from pcregrep(). */
1721
1722 return rc;
1723 }
1724
1725
1726
1727
1728 /*************************************************
1729 * Usage function *
1730 *************************************************/
1731
1732 static int
1733 usage(int rc)
1734 {
1735 option_item *op;
1736 fprintf(stderr, "Usage: pcregrep [-");
1737 for (op = optionlist; op->one_char != 0; op++)
1738 {
1739 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1740 }
1741 fprintf(stderr, "] [long options] [pattern] [files]\n");
1742 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1743 "options.\n");
1744 return rc;
1745 }
1746
1747
1748
1749
1750 /*************************************************
1751 * Help function *
1752 *************************************************/
1753
1754 static void
1755 help(void)
1756 {
1757 option_item *op;
1758
1759 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1760 printf("Search for PATTERN in each FILE or standard input.\n");
1761 printf("PATTERN must be present if neither -e nor -f is used.\n");
1762 printf("\"-\" can be used as a file name to mean STDIN.\n");
1763
1764 #ifdef SUPPORT_LIBZ
1765 printf("Files whose names end in .gz are read using zlib.\n");
1766 #endif
1767
1768 #ifdef SUPPORT_LIBBZ2
1769 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1770 #endif
1771
1772 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1773 printf("Other files and the standard input are read as plain files.\n\n");
1774 #else
1775 printf("All files are read as plain files, without any interpretation.\n\n");
1776 #endif
1777
1778 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1779 printf("Options:\n");
1780
1781 for (op = optionlist; op->one_char != 0; op++)
1782 {
1783 int n;
1784 char s[4];
1785 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1786 n = 30 - printf(" %s --%s", s, op->long_name);
1787 if (n < 1) n = 1;
1788 printf("%.*s%s\n", n, " ", op->help_text);
1789 }
1790
1791 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1792 printf("trailing white space is removed and blank lines are ignored.\n");
1793 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1794
1795 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1796 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1797 }
1798
1799
1800
1801
1802 /*************************************************
1803 * Handle a single-letter, no data option *
1804 *************************************************/
1805
1806 static int
1807 handle_option(int letter, int options)
1808 {
1809 switch(letter)
1810 {
1811 case N_FOFFSETS: file_offsets = TRUE; break;
1812 case N_HELP: help(); pcregrep_exit(0);
1813 case N_LOFFSETS: line_offsets = number = TRUE; break;
1814 case N_LBUFFER: line_buffered = TRUE; break;
1815 case 'c': count_only = TRUE; break;
1816 case 'F': process_options |= PO_FIXED_STRINGS; break;
1817 case 'H': filenames = FN_FORCE; break;
1818 case 'h': filenames = FN_NONE; break;
1819 case 'i': options |= PCRE_CASELESS; break;
1820 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1821 case 'L': filenames = FN_NOMATCH_ONLY; break;
1822 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1823 case 'n': number = TRUE; break;
1824 case 'o': only_matching = 0; break;
1825 case 'q': quiet = TRUE; break;
1826 case 'r': dee_action = dee_RECURSE; break;
1827 case 's': silent = TRUE; break;
1828 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1829 case 'v': invert = TRUE; break;
1830 case 'w': process_options |= PO_WORD_MATCH; break;
1831 case 'x': process_options |= PO_LINE_MATCH; break;
1832
1833 case 'V':
1834 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1835 pcregrep_exit(0);
1836 break;
1837
1838 default:
1839 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1840 pcregrep_exit(usage(2));
1841 }
1842
1843 return options;
1844 }
1845
1846
1847
1848
1849 /*************************************************
1850 * Construct printed ordinal *
1851 *************************************************/
1852
1853 /* This turns a number into "1st", "3rd", etc. */
1854
1855 static char *
1856 ordin(int n)
1857 {
1858 static char buffer[8];
1859 char *p = buffer;
1860 sprintf(p, "%d", n);
1861 while (*p != 0) p++;
1862 switch (n%10)
1863 {
1864 case 1: strcpy(p, "st"); break;
1865 case 2: strcpy(p, "nd"); break;
1866 case 3: strcpy(p, "rd"); break;
1867 default: strcpy(p, "th"); break;
1868 }
1869 return buffer;
1870 }
1871
1872
1873
1874 /*************************************************
1875 * Compile a single pattern *
1876 *************************************************/
1877
1878 /* When the -F option has been used, this is called for each substring.
1879 Otherwise it's called for each supplied pattern.
1880
1881 Arguments:
1882 pattern the pattern string
1883 options the PCRE options
1884 filename the file name, or NULL for a command-line pattern
1885 count 0 if this is the only command line pattern, or
1886 number of the command line pattern, or
1887 linenumber for a pattern from a file
1888
1889 Returns: TRUE on success, FALSE after an error
1890 */
1891
1892 static BOOL
1893 compile_single_pattern(char *pattern, int options, char *filename, int count)
1894 {
1895 char buffer[MBUFTHIRD + 16];
1896 const char *error;
1897 int errptr;
1898
1899 if (pattern_count >= MAX_PATTERN_COUNT)
1900 {
1901 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1902 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1903 return FALSE;
1904 }
1905
1906 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1907 suffix[process_options]);
1908 pattern_list[pattern_count] =
1909 pcre_compile(buffer, options, &error, &errptr, pcretables);
1910 if (pattern_list[pattern_count] != NULL)
1911 {
1912 pattern_count++;
1913 return TRUE;
1914 }
1915
1916 /* Handle compile errors */
1917
1918 errptr -= (int)strlen(prefix[process_options]);
1919 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1920
1921 if (filename == NULL)
1922 {
1923 if (count == 0)
1924 fprintf(stderr, "pcregrep: Error in command-line regex "
1925 "at offset %d: %s\n", errptr, error);
1926 else
1927 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1928 "at offset %d: %s\n", ordin(count), errptr, error);
1929 }
1930 else
1931 {
1932 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1933 "at offset %d: %s\n", count, filename, errptr, error);
1934 }
1935
1936 return FALSE;
1937 }
1938
1939
1940
1941 /*************************************************
1942 * Compile one supplied pattern *
1943 *************************************************/
1944
1945 /* When the -F option has been used, each string may be a list of strings,
1946 separated by line breaks. They will be matched literally.
1947
1948 Arguments:
1949 pattern the pattern string
1950 options the PCRE options
1951 filename the file name, or NULL for a command-line pattern
1952 count 0 if this is the only command line pattern, or
1953 number of the command line pattern, or
1954 linenumber for a pattern from a file
1955
1956 Returns: TRUE on success, FALSE after an error
1957 */
1958
1959 static BOOL
1960 compile_pattern(char *pattern, int options, char *filename, int count)
1961 {
1962 if ((process_options & PO_FIXED_STRINGS) != 0)
1963 {
1964 char *eop = pattern + strlen(pattern);
1965 char buffer[MBUFTHIRD];
1966 for(;;)
1967 {
1968 int ellength;
1969 char *p = end_of_line(pattern, eop, &ellength);
1970 if (ellength == 0)
1971 return compile_single_pattern(pattern, options, filename, count);
1972 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1973 pattern = p;
1974 if (!compile_single_pattern(buffer, options, filename, count))
1975 return FALSE;
1976 }
1977 }
1978 else return compile_single_pattern(pattern, options, filename, count);
1979 }
1980
1981
1982
1983 /*************************************************
1984 * Main program *
1985 *************************************************/
1986
1987 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1988
1989 int
1990 main(int argc, char **argv)
1991 {
1992 int i, j;
1993 int rc = 1;
1994 int pcre_options = 0;
1995 int cmd_pattern_count = 0;
1996 int hint_count = 0;
1997 int errptr;
1998 BOOL only_one_at_top;
1999 char *patterns[MAX_PATTERN_COUNT];
2000 const char *locale_from = "--locale";
2001 const char *error;
2002
2003 /* Set the default line ending value from the default in the PCRE library;
2004 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2005 Note that the return values from pcre_config(), though derived from the ASCII
2006 codes, are the same in EBCDIC environments, so we must use the actual values
2007 rather than escapes such as as '\r'. */
2008
2009 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2010 switch(i)
2011 {
2012 default: newline = (char *)"lf"; break;
2013 case 13: newline = (char *)"cr"; break;
2014 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2015 case -1: newline = (char *)"any"; break;
2016 case -2: newline = (char *)"anycrlf"; break;
2017 }
2018
2019 /* Process the options */
2020
2021 for (i = 1; i < argc; i++)
2022 {
2023 option_item *op = NULL;
2024 char *option_data = (char *)""; /* default to keep compiler happy */
2025 BOOL longop;
2026 BOOL longopwasequals = FALSE;
2027
2028 if (argv[i][0] != '-') break;
2029
2030 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2031 but only if we have previously had -e or -f to define the patterns. */
2032
2033 if (argv[i][1] == 0)
2034 {
2035 if (pattern_filename != NULL || pattern_count > 0) break;
2036 else pcregrep_exit(usage(2));
2037 }
2038
2039 /* Handle a long name option, or -- to terminate the options */
2040
2041 if (argv[i][1] == '-')
2042 {
2043 char *arg = argv[i] + 2;
2044 char *argequals = strchr(arg, '=');
2045
2046 if (*arg == 0) /* -- terminates options */
2047 {
2048 i++;
2049 break; /* out of the options-handling loop */
2050 }
2051
2052 longop = TRUE;
2053
2054 /* Some long options have data that follows after =, for example file=name.
2055 Some options have variations in the long name spelling: specifically, we
2056 allow "regexp" because GNU grep allows it, though I personally go along
2057 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2058 These options are entered in the table as "regex(p)". Options can be in
2059 both these categories. */
2060
2061 for (op = optionlist; op->one_char != 0; op++)
2062 {
2063 char *opbra = strchr(op->long_name, '(');
2064 char *equals = strchr(op->long_name, '=');
2065
2066 /* Handle options with only one spelling of the name */
2067
2068 if (opbra == NULL) /* Does not contain '(' */
2069 {
2070 if (equals == NULL) /* Not thing=data case */
2071 {
2072 if (strcmp(arg, op->long_name) == 0) break;
2073 }
2074 else /* Special case xxx=data */
2075 {
2076 int oplen = (int)(equals - op->long_name);
2077 int arglen = (argequals == NULL)?
2078 (int)strlen(arg) : (int)(argequals - arg);
2079 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2080 {
2081 option_data = arg + arglen;
2082 if (*option_data == '=')
2083 {
2084 option_data++;
2085 longopwasequals = TRUE;
2086 }
2087 break;
2088 }
2089 }
2090 }
2091
2092 /* Handle options with an alternate spelling of the name */
2093
2094 else
2095 {
2096 char buff1[24];
2097 char buff2[24];
2098
2099 int baselen = (int)(opbra - op->long_name);
2100 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2101 int arglen = (argequals == NULL || equals == NULL)?
2102 (int)strlen(arg) : (int)(argequals - arg);
2103
2104 sprintf(buff1, "%.*s", baselen, op->long_name);
2105 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2106
2107 if (strncmp(arg, buff1, arglen) == 0 ||
2108 strncmp(arg, buff2, arglen) == 0)
2109 {
2110 if (equals != NULL && argequals != NULL)
2111 {
2112 option_data = argequals;
2113 if (*option_data == '=')
2114 {
2115 option_data++;
2116 longopwasequals = TRUE;
2117 }
2118 }
2119 break;
2120 }
2121 }
2122 }
2123
2124 if (op->one_char == 0)
2125 {
2126 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2127 pcregrep_exit(usage(2));
2128 }
2129 }
2130
2131 /* Jeffrey Friedl's debugging harness uses these additional options which
2132 are not in the right form for putting in the option table because they use
2133 only one hyphen, yet are more than one character long. By putting them
2134 separately here, they will not get displayed as part of the help() output,
2135 but I don't think Jeffrey will care about that. */
2136
2137 #ifdef JFRIEDL_DEBUG
2138 else if (strcmp(argv[i], "-pre") == 0) {
2139 jfriedl_prefix = argv[++i];
2140 continue;
2141 } else if (strcmp(argv[i], "-post") == 0) {
2142 jfriedl_postfix = argv[++i];
2143 continue;
2144 } else if (strcmp(argv[i], "-XT") == 0) {
2145 sscanf(argv[++i], "%d", &jfriedl_XT);
2146 continue;
2147 } else if (strcmp(argv[i], "-XR") == 0) {
2148 sscanf(argv[++i], "%d", &jfriedl_XR);
2149 continue;
2150 }
2151 #endif
2152
2153
2154 /* One-char options; many that have no data may be in a single argument; we
2155 continue till we hit the last one or one that needs data. */
2156
2157 else
2158 {
2159 char *s = argv[i] + 1;
2160 longop = FALSE;
2161 while (*s != 0)
2162 {
2163 for (op = optionlist; op->one_char != 0; op++)
2164 {
2165 if (*s == op->one_char) break;
2166 }
2167 if (op->one_char == 0)
2168 {
2169 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2170 *s, argv[i]);
2171 pcregrep_exit(usage(2));
2172 }
2173
2174 /* Check for a single-character option that has data: OP_OP_NUMBER
2175 is used for one that either has a numerical number or defaults, i.e. the
2176 data is optional. If a digit follows, there is data; if not, carry on
2177 with other single-character options in the same string. */
2178
2179 option_data = s+1;
2180 if (op->type == OP_OP_NUMBER)
2181 {
2182 if (isdigit((unsigned char)s[1])) break;
2183 }
2184 else /* Check for end or a dataless option */
2185 {
2186 if (op->type != OP_NODATA || s[1] == 0) break;
2187 }
2188
2189 /* Handle a single-character option with no data, then loop for the
2190 next character in the string. */
2191
2192 pcre_options = handle_option(*s++, pcre_options);
2193 }
2194 }
2195
2196 /* At this point we should have op pointing to a matched option. If the type
2197 is NO_DATA, it means that there is no data, and the option might set
2198 something in the PCRE options. */
2199
2200 if (op->type == OP_NODATA)
2201 {
2202 pcre_options = handle_option(op->one_char, pcre_options);
2203 continue;
2204 }
2205
2206 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2207 either has a value or defaults to something. It cannot have data in a
2208 separate item. At the moment, the only such options are "colo(u)r",
2209 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2210
2211 if (*option_data == 0 &&
2212 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2213 {
2214 switch (op->one_char)
2215 {
2216 case N_COLOUR:
2217 colour_option = (char *)"auto";
2218 break;
2219
2220 case 'o':
2221 only_matching = 0;
2222 break;
2223
2224 #ifdef JFRIEDL_DEBUG
2225 case 'S':
2226 S_arg = 0;
2227 break;
2228 #endif
2229 }
2230 continue;
2231 }
2232
2233 /* Otherwise, find the data string for the option. */
2234
2235 if (*option_data == 0)
2236 {
2237 if (i >= argc - 1 || longopwasequals)
2238 {
2239 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2240 pcregrep_exit(usage(2));
2241 }
2242 option_data = argv[++i];
2243 }
2244
2245 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2246 multiple times to create a list of patterns. */
2247
2248 if (op->type == OP_PATLIST)
2249 {
2250 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2251 {
2252 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2253 MAX_PATTERN_COUNT);
2254 return 2;
2255 }
2256 patterns[cmd_pattern_count++] = option_data;
2257 }
2258
2259 /* Otherwise, deal with single string or numeric data values. */
2260
2261 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2262 {
2263 *((char **)op->dataptr) = option_data;
2264 }
2265
2266 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2267 only for unpicking arguments, so just keep it simple. */
2268
2269 else
2270 {
2271 unsigned long int n = 0;
2272 char *endptr = option_data;
2273 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2274 while (isdigit((unsigned char)(*endptr)))
2275 n = n * 10 + (int)(*endptr++ - '0');
2276 if (*endptr != 0)
2277 {
2278 if (longop)
2279 {
2280 char *equals = strchr(op->long_name, '=');
2281 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2282 (int)(equals - op->long_name);
2283 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2284 option_data, nlen, op->long_name);
2285 }
2286 else
2287 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2288 option_data, op->one_char);
2289 pcregrep_exit(usage(2));
2290 }
2291 *((int *)op->dataptr) = n;
2292 }
2293 }
2294
2295 /* Options have been decoded. If -C was used, its value is used as a default
2296 for -A and -B. */
2297
2298 if (both_context > 0)
2299 {
2300 if (after_context == 0) after_context = both_context;
2301 if (before_context == 0) before_context = both_context;
2302 }
2303
2304 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2305 However, the latter two set only_matching. */
2306
2307 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2308 (file_offsets && line_offsets))
2309 {
2310 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2311 "and/or --line-offsets\n");
2312 pcregrep_exit(usage(2));
2313 }
2314
2315 if (file_offsets || line_offsets) only_matching = 0;
2316
2317 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2318 LC_ALL environment variable is set, and if so, use it. */
2319
2320 if (locale == NULL)
2321 {
2322 locale = getenv("LC_ALL");
2323 locale_from = "LCC_ALL";
2324 }
2325
2326 if (locale == NULL)
2327 {
2328 locale = getenv("LC_CTYPE");
2329 locale_from = "LC_CTYPE";
2330 }
2331
2332 /* If a locale has been provided, set it, and generate the tables the PCRE
2333 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2334
2335 if (locale != NULL)
2336 {
2337 if (setlocale(LC_CTYPE, locale) == NULL)
2338 {
2339 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2340 locale, locale_from);
2341 return 2;
2342 }
2343 pcretables = pcre_maketables();
2344 }
2345
2346 /* Sort out colouring */
2347
2348 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2349 {
2350 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2351 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2352 else
2353 {
2354 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2355 colour_option);
2356 return 2;
2357 }
2358 if (do_colour)
2359 {
2360 char *cs = getenv("PCREGREP_COLOUR");
2361 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2362 if (cs != NULL) colour_string = cs;
2363 }
2364 }
2365
2366 /* Interpret the newline type; the default settings are Unix-like. */
2367
2368 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2369 {
2370 pcre_options |= PCRE_NEWLINE_CR;
2371 endlinetype = EL_CR;
2372 }
2373 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2374 {
2375 pcre_options |= PCRE_NEWLINE_LF;
2376 endlinetype = EL_LF;
2377 }
2378 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2379 {
2380 pcre_options |= PCRE_NEWLINE_CRLF;
2381 endlinetype = EL_CRLF;
2382 }
2383 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2384 {
2385 pcre_options |= PCRE_NEWLINE_ANY;
2386 endlinetype = EL_ANY;
2387 }
2388 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2389 {
2390 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2391 endlinetype = EL_ANYCRLF;
2392 }
2393 else
2394 {
2395 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2396 return 2;
2397 }
2398
2399 /* Interpret the text values for -d and -D */
2400
2401 if (dee_option != NULL)
2402 {
2403 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2404 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2405 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2406 else
2407 {
2408 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2409 return 2;
2410 }
2411 }
2412
2413 if (DEE_option != NULL)
2414 {
2415 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2416 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2417 else
2418 {
2419 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2420 return 2;
2421 }
2422 }
2423
2424 /* Check the values for Jeffrey Friedl's debugging options. */
2425
2426 #ifdef JFRIEDL_DEBUG
2427 if (S_arg > 9)
2428 {
2429 fprintf(stderr, "pcregrep: bad value for -S option\n");
2430 return 2;
2431 }
2432 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2433 {
2434 if (jfriedl_XT == 0) jfriedl_XT = 1;
2435 if (jfriedl_XR == 0) jfriedl_XR = 1;
2436 }
2437 #endif
2438
2439 /* Get memory to store the pattern and hints lists. */
2440
2441 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2442 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2443
2444 if (pattern_list == NULL || hints_list == NULL)
2445 {
2446 fprintf(stderr, "pcregrep: malloc failed\n");
2447 goto EXIT2;
2448 }
2449
2450 /* If no patterns were provided by -e, and there is no file provided by -f,
2451 the first argument is the one and only pattern, and it must exist. */
2452
2453 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2454 {
2455 if (i >= argc) return usage(2);
2456 patterns[cmd_pattern_count++] = argv[i++];
2457 }
2458
2459 /* Compile the patterns that were provided on the command line, either by
2460 multiple uses of -e or as a single unkeyed pattern. */
2461
2462 for (j = 0; j < cmd_pattern_count; j++)
2463 {
2464 if (!compile_pattern(patterns[j], pcre_options, NULL,
2465 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2466 goto EXIT2;
2467 }
2468
2469 /* Compile the regular expressions that are provided in a file. */
2470
2471 if (pattern_filename != NULL)
2472 {
2473 int linenumber = 0;
2474 FILE *f;
2475 char *filename;
2476 char buffer[MBUFTHIRD];
2477
2478 if (strcmp(pattern_filename, "-") == 0)
2479 {
2480 f = stdin;
2481 filename = stdin_name;
2482 }
2483 else
2484 {
2485 f = fopen(pattern_filename, "r");
2486 if (f == NULL)
2487 {
2488 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2489 strerror(errno));
2490 goto EXIT2;
2491 }
2492 filename = pattern_filename;
2493 }
2494
2495 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2496 {
2497 char *s = buffer + (int)strlen(buffer);
2498 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2499 *s = 0;
2500 linenumber++;
2501 if (buffer[0] == 0) continue; /* Skip blank lines */
2502 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2503 goto EXIT2;
2504 }
2505
2506 if (f != stdin) fclose(f);
2507 }
2508
2509 /* Study the regular expressions, as we will be running them many times */
2510
2511 for (j = 0; j < pattern_count; j++)
2512 {
2513 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2514 if (error != NULL)
2515 {
2516 char s[16];
2517 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2518 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2519 goto EXIT2;
2520 }
2521 hint_count++;
2522 }
2523
2524 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2525 pcre_extra block for each pattern. */
2526
2527 if (match_limit > 0 || match_limit_recursion > 0)
2528 {
2529 for (j = 0; j < pattern_count; j++)
2530 {
2531 if (hints_list[j] == NULL)
2532 {
2533 hints_list[j] = malloc(sizeof(pcre_extra));
2534 if (hints_list[j] == NULL)
2535 {
2536 fprintf(stderr, "pcregrep: malloc failed\n");
2537 pcregrep_exit(2);
2538 }
2539 }
2540 if (match_limit > 0)
2541 {
2542 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2543 hints_list[j]->match_limit = match_limit;
2544 }
2545 if (match_limit_recursion > 0)
2546 {
2547 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2548 hints_list[j]->match_limit_recursion = match_limit_recursion;
2549 }
2550 }
2551 }
2552
2553 /* If there are include or exclude patterns, compile them. */
2554
2555 if (exclude_pattern != NULL)
2556 {
2557 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2558 pcretables);
2559 if (exclude_compiled == NULL)
2560 {
2561 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2562 errptr, error);
2563 goto EXIT2;
2564 }
2565 }
2566
2567 if (include_pattern != NULL)
2568 {
2569 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2570 pcretables);
2571 if (include_compiled == NULL)
2572 {
2573 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2574 errptr, error);
2575 goto EXIT2;
2576 }
2577 }
2578
2579 if (exclude_dir_pattern != NULL)
2580 {
2581 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2582 pcretables);
2583 if (exclude_dir_compiled == NULL)
2584 {
2585 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2586 errptr, error);
2587 goto EXIT2;
2588 }
2589 }
2590
2591 if (include_dir_pattern != NULL)
2592 {
2593 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2594 pcretables);
2595 if (include_dir_compiled == NULL)
2596 {
2597 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2598 errptr, error);
2599 goto EXIT2;
2600 }
2601 }
2602
2603 /* If there are no further arguments, do the business on stdin and exit. */
2604
2605 if (i >= argc)
2606 {
2607 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2608 goto EXIT;
2609 }
2610
2611 /* Otherwise, work through the remaining arguments as files or directories.
2612 Pass in the fact that there is only one argument at top level - this suppresses
2613 the file name if the argument is not a directory and filenames are not
2614 otherwise forced. */
2615
2616 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2617
2618 for (; i < argc; i++)
2619 {
2620 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2621 only_one_at_top);
2622 if (frc > 1) rc = frc;
2623 else if (frc == 0 && rc == 1) rc = 0;
2624 }
2625
2626 EXIT:
2627 if (pattern_list != NULL)
2628 {
2629 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2630 free(pattern_list);
2631 }
2632 if (hints_list != NULL)
2633 {
2634 for (i = 0; i < hint_count; i++)
2635 {
2636 if (hints_list[i] != NULL) free(hints_list[i]);
2637 }
2638 free(hints_list);
2639 }
2640 pcregrep_exit(rc);
2641
2642 EXIT2:
2643 rc = 2;
2644 goto EXIT;
2645 }
2646
2647 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12