/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 685 - (show annotations) (download)
Tue Sep 6 15:02:07 2011 UTC (3 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 79897 byte(s)
Update pcregrep to use JIT by default with options to disable.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2011 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define PATBUFSIZE BUFSIZ
78 #else
79 #define PATBUFSIZE 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117 /*************************************************
118 * Global variables *
119 *************************************************/
120
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131
132 static int endlinetype;
133
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *main_buffer = NULL;
139 static char *newline = NULL;
140 static char *pattern_filename = NULL;
141 static char *stdin_name = (char *)"(standard input)";
142 static char *locale = NULL;
143
144 static const unsigned char *pcretables = NULL;
145
146 static int pattern_count = 0;
147 static pcre **pattern_list = NULL;
148 static pcre_extra **hints_list = NULL;
149
150 static char *include_pattern = NULL;
151 static char *exclude_pattern = NULL;
152 static char *include_dir_pattern = NULL;
153 static char *exclude_dir_pattern = NULL;
154
155 static pcre *include_compiled = NULL;
156 static pcre *exclude_compiled = NULL;
157 static pcre *include_dir_compiled = NULL;
158 static pcre *exclude_dir_compiled = NULL;
159
160 static int after_context = 0;
161 static int before_context = 0;
162 static int both_context = 0;
163 static int bufthird = PCREGREP_BUFSIZE;
164 static int bufsize = 3*PCREGREP_BUFSIZE;
165 static int dee_action = dee_READ;
166 static int DEE_action = DEE_READ;
167 static int error_count = 0;
168 static int filenames = FN_DEFAULT;
169 static int only_matching = -1;
170 static int process_options = 0;
171
172 #ifdef SUPPORT_PCREGREP_JIT
173 static int study_options = PCRE_STUDY_JIT_COMPILE;
174 #else
175 static int study_options = 0;
176 #endif
177
178 static unsigned long int match_limit = 0;
179 static unsigned long int match_limit_recursion = 0;
180
181 static BOOL count_only = FALSE;
182 static BOOL do_colour = FALSE;
183 static BOOL file_offsets = FALSE;
184 static BOOL hyphenpending = FALSE;
185 static BOOL invert = FALSE;
186 static BOOL line_buffered = FALSE;
187 static BOOL line_offsets = FALSE;
188 static BOOL multiline = FALSE;
189 static BOOL number = FALSE;
190 static BOOL omit_zero_count = FALSE;
191 static BOOL resource_error = FALSE;
192 static BOOL quiet = FALSE;
193 static BOOL silent = FALSE;
194 static BOOL utf8 = FALSE;
195
196 /* Structure for options and list of them */
197
198 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
199 OP_OP_NUMBER, OP_PATLIST };
200
201 typedef struct option_item {
202 int type;
203 int one_char;
204 void *dataptr;
205 const char *long_name;
206 const char *help_text;
207 } option_item;
208
209 /* Options without a single-letter equivalent get a negative value. This can be
210 used to identify them. */
211
212 #define N_COLOUR (-1)
213 #define N_EXCLUDE (-2)
214 #define N_EXCLUDE_DIR (-3)
215 #define N_HELP (-4)
216 #define N_INCLUDE (-5)
217 #define N_INCLUDE_DIR (-6)
218 #define N_LABEL (-7)
219 #define N_LOCALE (-8)
220 #define N_NULL (-9)
221 #define N_LOFFSETS (-10)
222 #define N_FOFFSETS (-11)
223 #define N_LBUFFER (-12)
224 #define N_M_LIMIT (-13)
225 #define N_M_LIMIT_REC (-14)
226 #define N_BUFSIZE (-15)
227 #define N_NOJIT (-16)
228
229 static option_item optionlist[] = {
230 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
231 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
232 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
233 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
234 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
235 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
236 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
237 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
238 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
239 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
240 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
241 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
242 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
243 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
244 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
245 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
246 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
247 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
248 #ifdef SUPPORT_PCREGREP_JIT
249 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
250 #else
251 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
252 #endif
253 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
254 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
255 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
256 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
257 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
258 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
259 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
260 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
261 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
262 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
263 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
264 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
265 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
266 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
267 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
268 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
269 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
270 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
271
272 /* These two were accidentally implemented with underscores instead of
273 hyphens in the option names. As this was not discovered for several releases,
274 the incorrect versions are left in the table for compatibility. However, the
275 --help function misses out any option that has an underscore in its name. */
276
277 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
278 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
279
280 #ifdef JFRIEDL_DEBUG
281 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
282 #endif
283 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
284 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
285 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
286 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
287 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
288 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
289 { OP_NODATA, 0, NULL, NULL, NULL }
290 };
291
292 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
293 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
294 that the combination of -w and -x has the same effect as -x on its own, so we
295 can treat them as the same. */
296
297 static const char *prefix[] = {
298 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
299
300 static const char *suffix[] = {
301 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
302
303 /* UTF-8 tables - used only when the newline setting is "any". */
304
305 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
306
307 const char utf8_table4[] = {
308 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
309 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
310 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
311 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
312
313
314
315 /*************************************************
316 * Exit from the program *
317 *************************************************/
318
319 /* If there has been a resource error, give a suitable message.
320
321 Argument: the return code
322 Returns: does not return
323 */
324
325 static void
326 pcregrep_exit(int rc)
327 {
328 if (resource_error)
329 {
330 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
331 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
332 PCRE_ERROR_JIT_STACKLIMIT);
333 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
334 }
335
336 exit(rc);
337 }
338
339
340 /*************************************************
341 * OS-specific functions *
342 *************************************************/
343
344 /* These functions are defined so that they can be made system specific,
345 although at present the only ones are for Unix, Win32, and for "no support". */
346
347
348 /************* Directory scanning in Unix ***********/
349
350 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
351 #include <sys/types.h>
352 #include <sys/stat.h>
353 #include <dirent.h>
354
355 typedef DIR directory_type;
356
357 static int
358 isdirectory(char *filename)
359 {
360 struct stat statbuf;
361 if (stat(filename, &statbuf) < 0)
362 return 0; /* In the expectation that opening as a file will fail */
363 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
364 }
365
366 static directory_type *
367 opendirectory(char *filename)
368 {
369 return opendir(filename);
370 }
371
372 static char *
373 readdirectory(directory_type *dir)
374 {
375 for (;;)
376 {
377 struct dirent *dent = readdir(dir);
378 if (dent == NULL) return NULL;
379 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
380 return dent->d_name;
381 }
382 /* Control never reaches here */
383 }
384
385 static void
386 closedirectory(directory_type *dir)
387 {
388 closedir(dir);
389 }
390
391
392 /************* Test for regular file in Unix **********/
393
394 static int
395 isregfile(char *filename)
396 {
397 struct stat statbuf;
398 if (stat(filename, &statbuf) < 0)
399 return 1; /* In the expectation that opening as a file will fail */
400 return (statbuf.st_mode & S_IFMT) == S_IFREG;
401 }
402
403
404 /************* Test for a terminal in Unix **********/
405
406 static BOOL
407 is_stdout_tty(void)
408 {
409 return isatty(fileno(stdout));
410 }
411
412 static BOOL
413 is_file_tty(FILE *f)
414 {
415 return isatty(fileno(f));
416 }
417
418
419 /************* Directory scanning in Win32 ***********/
420
421 /* I (Philip Hazel) have no means of testing this code. It was contributed by
422 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
423 when it did not exist. David Byron added a patch that moved the #include of
424 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
425 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
426 undefined when it is indeed undefined. */
427
428 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
429
430 #ifndef STRICT
431 # define STRICT
432 #endif
433 #ifndef WIN32_LEAN_AND_MEAN
434 # define WIN32_LEAN_AND_MEAN
435 #endif
436
437 #include <windows.h>
438
439 #ifndef INVALID_FILE_ATTRIBUTES
440 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
441 #endif
442
443 typedef struct directory_type
444 {
445 HANDLE handle;
446 BOOL first;
447 WIN32_FIND_DATA data;
448 } directory_type;
449
450 int
451 isdirectory(char *filename)
452 {
453 DWORD attr = GetFileAttributes(filename);
454 if (attr == INVALID_FILE_ATTRIBUTES)
455 return 0;
456 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
457 }
458
459 directory_type *
460 opendirectory(char *filename)
461 {
462 size_t len;
463 char *pattern;
464 directory_type *dir;
465 DWORD err;
466 len = strlen(filename);
467 pattern = (char *) malloc(len + 3);
468 dir = (directory_type *) malloc(sizeof(*dir));
469 if ((pattern == NULL) || (dir == NULL))
470 {
471 fprintf(stderr, "pcregrep: malloc failed\n");
472 pcregrep_exit(2);
473 }
474 memcpy(pattern, filename, len);
475 memcpy(&(pattern[len]), "\\*", 3);
476 dir->handle = FindFirstFile(pattern, &(dir->data));
477 if (dir->handle != INVALID_HANDLE_VALUE)
478 {
479 free(pattern);
480 dir->first = TRUE;
481 return dir;
482 }
483 err = GetLastError();
484 free(pattern);
485 free(dir);
486 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
487 return NULL;
488 }
489
490 char *
491 readdirectory(directory_type *dir)
492 {
493 for (;;)
494 {
495 if (!dir->first)
496 {
497 if (!FindNextFile(dir->handle, &(dir->data)))
498 return NULL;
499 }
500 else
501 {
502 dir->first = FALSE;
503 }
504 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
505 return dir->data.cFileName;
506 }
507 #ifndef _MSC_VER
508 return NULL; /* Keep compiler happy; never executed */
509 #endif
510 }
511
512 void
513 closedirectory(directory_type *dir)
514 {
515 FindClose(dir->handle);
516 free(dir);
517 }
518
519
520 /************* Test for regular file in Win32 **********/
521
522 /* I don't know how to do this, or if it can be done; assume all paths are
523 regular if they are not directories. */
524
525 int isregfile(char *filename)
526 {
527 return !isdirectory(filename);
528 }
529
530
531 /************* Test for a terminal in Win32 **********/
532
533 /* I don't know how to do this; assume never */
534
535 static BOOL
536 is_stdout_tty(void)
537 {
538 return FALSE;
539 }
540
541 static BOOL
542 is_file_tty(FILE *f)
543 {
544 return FALSE;
545 }
546
547
548 /************* Directory scanning when we can't do it ***********/
549
550 /* The type is void, and apart from isdirectory(), the functions do nothing. */
551
552 #else
553
554 typedef void directory_type;
555
556 int isdirectory(char *filename) { return 0; }
557 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
558 char *readdirectory(directory_type *dir) { return (char*)0;}
559 void closedirectory(directory_type *dir) {}
560
561
562 /************* Test for regular when we can't do it **********/
563
564 /* Assume all files are regular. */
565
566 int isregfile(char *filename) { return 1; }
567
568
569 /************* Test for a terminal when we can't do it **********/
570
571 static BOOL
572 is_stdout_tty(void)
573 {
574 return FALSE;
575 }
576
577 static BOOL
578 is_file_tty(FILE *f)
579 {
580 return FALSE;
581 }
582
583 #endif
584
585
586
587 #ifndef HAVE_STRERROR
588 /*************************************************
589 * Provide strerror() for non-ANSI libraries *
590 *************************************************/
591
592 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
593 in their libraries, but can provide the same facility by this simple
594 alternative function. */
595
596 extern int sys_nerr;
597 extern char *sys_errlist[];
598
599 char *
600 strerror(int n)
601 {
602 if (n < 0 || n >= sys_nerr) return "unknown error number";
603 return sys_errlist[n];
604 }
605 #endif /* HAVE_STRERROR */
606
607
608
609 /*************************************************
610 * Read one line of input *
611 *************************************************/
612
613 /* Normally, input is read using fread() into a large buffer, so many lines may
614 be read at once. However, doing this for tty input means that no output appears
615 until a lot of input has been typed. Instead, tty input is handled line by
616 line. We cannot use fgets() for this, because it does not stop at a binary
617 zero, and therefore there is no way of telling how many characters it has read,
618 because there may be binary zeros embedded in the data.
619
620 Arguments:
621 buffer the buffer to read into
622 length the maximum number of characters to read
623 f the file
624
625 Returns: the number of characters read, zero at end of file
626 */
627
628 static int
629 read_one_line(char *buffer, int length, FILE *f)
630 {
631 int c;
632 int yield = 0;
633 while ((c = fgetc(f)) != EOF)
634 {
635 buffer[yield++] = c;
636 if (c == '\n' || yield >= length) break;
637 }
638 return yield;
639 }
640
641
642
643 /*************************************************
644 * Find end of line *
645 *************************************************/
646
647 /* The length of the endline sequence that is found is set via lenptr. This may
648 be zero at the very end of the file if there is no line-ending sequence there.
649
650 Arguments:
651 p current position in line
652 endptr end of available data
653 lenptr where to put the length of the eol sequence
654
655 Returns: pointer after the last byte of the line,
656 including the newline byte(s)
657 */
658
659 static char *
660 end_of_line(char *p, char *endptr, int *lenptr)
661 {
662 switch(endlinetype)
663 {
664 default: /* Just in case */
665 case EL_LF:
666 while (p < endptr && *p != '\n') p++;
667 if (p < endptr)
668 {
669 *lenptr = 1;
670 return p + 1;
671 }
672 *lenptr = 0;
673 return endptr;
674
675 case EL_CR:
676 while (p < endptr && *p != '\r') p++;
677 if (p < endptr)
678 {
679 *lenptr = 1;
680 return p + 1;
681 }
682 *lenptr = 0;
683 return endptr;
684
685 case EL_CRLF:
686 for (;;)
687 {
688 while (p < endptr && *p != '\r') p++;
689 if (++p >= endptr)
690 {
691 *lenptr = 0;
692 return endptr;
693 }
694 if (*p == '\n')
695 {
696 *lenptr = 2;
697 return p + 1;
698 }
699 }
700 break;
701
702 case EL_ANYCRLF:
703 while (p < endptr)
704 {
705 int extra = 0;
706 register int c = *((unsigned char *)p);
707
708 if (utf8 && c >= 0xc0)
709 {
710 int gcii, gcss;
711 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
712 gcss = 6*extra;
713 c = (c & utf8_table3[extra]) << gcss;
714 for (gcii = 1; gcii <= extra; gcii++)
715 {
716 gcss -= 6;
717 c |= (p[gcii] & 0x3f) << gcss;
718 }
719 }
720
721 p += 1 + extra;
722
723 switch (c)
724 {
725 case 0x0a: /* LF */
726 *lenptr = 1;
727 return p;
728
729 case 0x0d: /* CR */
730 if (p < endptr && *p == 0x0a)
731 {
732 *lenptr = 2;
733 p++;
734 }
735 else *lenptr = 1;
736 return p;
737
738 default:
739 break;
740 }
741 } /* End of loop for ANYCRLF case */
742
743 *lenptr = 0; /* Must have hit the end */
744 return endptr;
745
746 case EL_ANY:
747 while (p < endptr)
748 {
749 int extra = 0;
750 register int c = *((unsigned char *)p);
751
752 if (utf8 && c >= 0xc0)
753 {
754 int gcii, gcss;
755 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
756 gcss = 6*extra;
757 c = (c & utf8_table3[extra]) << gcss;
758 for (gcii = 1; gcii <= extra; gcii++)
759 {
760 gcss -= 6;
761 c |= (p[gcii] & 0x3f) << gcss;
762 }
763 }
764
765 p += 1 + extra;
766
767 switch (c)
768 {
769 case 0x0a: /* LF */
770 case 0x0b: /* VT */
771 case 0x0c: /* FF */
772 *lenptr = 1;
773 return p;
774
775 case 0x0d: /* CR */
776 if (p < endptr && *p == 0x0a)
777 {
778 *lenptr = 2;
779 p++;
780 }
781 else *lenptr = 1;
782 return p;
783
784 case 0x85: /* NEL */
785 *lenptr = utf8? 2 : 1;
786 return p;
787
788 case 0x2028: /* LS */
789 case 0x2029: /* PS */
790 *lenptr = 3;
791 return p;
792
793 default:
794 break;
795 }
796 } /* End of loop for ANY case */
797
798 *lenptr = 0; /* Must have hit the end */
799 return endptr;
800 } /* End of overall switch */
801 }
802
803
804
805 /*************************************************
806 * Find start of previous line *
807 *************************************************/
808
809 /* This is called when looking back for before lines to print.
810
811 Arguments:
812 p start of the subsequent line
813 startptr start of available data
814
815 Returns: pointer to the start of the previous line
816 */
817
818 static char *
819 previous_line(char *p, char *startptr)
820 {
821 switch(endlinetype)
822 {
823 default: /* Just in case */
824 case EL_LF:
825 p--;
826 while (p > startptr && p[-1] != '\n') p--;
827 return p;
828
829 case EL_CR:
830 p--;
831 while (p > startptr && p[-1] != '\n') p--;
832 return p;
833
834 case EL_CRLF:
835 for (;;)
836 {
837 p -= 2;
838 while (p > startptr && p[-1] != '\n') p--;
839 if (p <= startptr + 1 || p[-2] == '\r') return p;
840 }
841 return p; /* But control should never get here */
842
843 case EL_ANY:
844 case EL_ANYCRLF:
845 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
846 if (utf8) while ((*p & 0xc0) == 0x80) p--;
847
848 while (p > startptr)
849 {
850 register int c;
851 char *pp = p - 1;
852
853 if (utf8)
854 {
855 int extra = 0;
856 while ((*pp & 0xc0) == 0x80) pp--;
857 c = *((unsigned char *)pp);
858 if (c >= 0xc0)
859 {
860 int gcii, gcss;
861 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
862 gcss = 6*extra;
863 c = (c & utf8_table3[extra]) << gcss;
864 for (gcii = 1; gcii <= extra; gcii++)
865 {
866 gcss -= 6;
867 c |= (pp[gcii] & 0x3f) << gcss;
868 }
869 }
870 }
871 else c = *((unsigned char *)pp);
872
873 if (endlinetype == EL_ANYCRLF) switch (c)
874 {
875 case 0x0a: /* LF */
876 case 0x0d: /* CR */
877 return p;
878
879 default:
880 break;
881 }
882
883 else switch (c)
884 {
885 case 0x0a: /* LF */
886 case 0x0b: /* VT */
887 case 0x0c: /* FF */
888 case 0x0d: /* CR */
889 case 0x85: /* NEL */
890 case 0x2028: /* LS */
891 case 0x2029: /* PS */
892 return p;
893
894 default:
895 break;
896 }
897
898 p = pp; /* Back one character */
899 } /* End of loop for ANY case */
900
901 return startptr; /* Hit start of data */
902 } /* End of overall switch */
903 }
904
905
906
907
908
909 /*************************************************
910 * Print the previous "after" lines *
911 *************************************************/
912
913 /* This is called if we are about to lose said lines because of buffer filling,
914 and at the end of the file. The data in the line is written using fwrite() so
915 that a binary zero does not terminate it.
916
917 Arguments:
918 lastmatchnumber the number of the last matching line, plus one
919 lastmatchrestart where we restarted after the last match
920 endptr end of available data
921 printname filename for printing
922
923 Returns: nothing
924 */
925
926 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
927 char *endptr, char *printname)
928 {
929 if (after_context > 0 && lastmatchnumber > 0)
930 {
931 int count = 0;
932 while (lastmatchrestart < endptr && count++ < after_context)
933 {
934 int ellength;
935 char *pp = lastmatchrestart;
936 if (printname != NULL) fprintf(stdout, "%s-", printname);
937 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
938 pp = end_of_line(pp, endptr, &ellength);
939 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
940 lastmatchrestart = pp;
941 }
942 hyphenpending = TRUE;
943 }
944 }
945
946
947
948 /*************************************************
949 * Apply patterns to subject till one matches *
950 *************************************************/
951
952 /* This function is called to run through all patterns, looking for a match. It
953 is used multiple times for the same subject when colouring is enabled, in order
954 to find all possible matches.
955
956 Arguments:
957 matchptr the start of the subject
958 length the length of the subject to match
959 startoffset where to start matching
960 offsets the offets vector to fill in
961 mrc address of where to put the result of pcre_exec()
962
963 Returns: TRUE if there was a match
964 FALSE if there was no match
965 invert if there was a non-fatal error
966 */
967
968 static BOOL
969 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
970 int *mrc)
971 {
972 int i;
973 size_t slen = length;
974 const char *msg = "this text:\n\n";
975 if (slen > 200)
976 {
977 slen = 200;
978 msg = "text that starts:\n\n";
979 }
980 for (i = 0; i < pattern_count; i++)
981 {
982 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
983 startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
984 if (*mrc >= 0) return TRUE;
985 if (*mrc == PCRE_ERROR_NOMATCH) continue;
986 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
987 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
988 fprintf(stderr, "%s", msg);
989 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
990 fprintf(stderr, "\n\n");
991 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
992 *mrc == PCRE_ERROR_JIT_STACKLIMIT)
993 resource_error = TRUE;
994 if (error_count++ > 20)
995 {
996 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
997 pcregrep_exit(2);
998 }
999 return invert; /* No more matching; don't show the line again */
1000 }
1001
1002 return FALSE; /* No match, no errors */
1003 }
1004
1005
1006
1007 /*************************************************
1008 * Grep an individual file *
1009 *************************************************/
1010
1011 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1012 times the value of bufthird. The matching point is never allowed to stray into
1013 the top third of the buffer, thus keeping more of the file available for
1014 context printing or for multiline scanning. For large files, the pointer will
1015 be in the middle third most of the time, so the bottom third is available for
1016 "before" context printing.
1017
1018 Arguments:
1019 handle the fopened FILE stream for a normal file
1020 the gzFile pointer when reading is via libz
1021 the BZFILE pointer when reading is via libbz2
1022 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1023 filename the file name or NULL (for errors)
1024 printname the file name if it is to be printed for each match
1025 or NULL if the file name is not to be printed
1026 it cannot be NULL if filenames[_nomatch]_only is set
1027
1028 Returns: 0 if there was at least one match
1029 1 otherwise (no matches)
1030 2 if an overlong line is encountered
1031 3 if there is a read error on a .bz2 file
1032 */
1033
1034 static int
1035 pcregrep(void *handle, int frtype, char *filename, char *printname)
1036 {
1037 int rc = 1;
1038 int linenumber = 1;
1039 int lastmatchnumber = 0;
1040 int count = 0;
1041 int filepos = 0;
1042 int offsets[OFFSET_SIZE];
1043 char *lastmatchrestart = NULL;
1044 char *ptr = main_buffer;
1045 char *endptr;
1046 size_t bufflength;
1047 BOOL endhyphenpending = FALSE;
1048 BOOL input_line_buffered = line_buffered;
1049 FILE *in = NULL; /* Ensure initialized */
1050
1051 #ifdef SUPPORT_LIBZ
1052 gzFile ingz = NULL;
1053 #endif
1054
1055 #ifdef SUPPORT_LIBBZ2
1056 BZFILE *inbz2 = NULL;
1057 #endif
1058
1059
1060 /* Do the first read into the start of the buffer and set up the pointer to end
1061 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1062 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1063 fail. */
1064
1065 #ifdef SUPPORT_LIBZ
1066 if (frtype == FR_LIBZ)
1067 {
1068 ingz = (gzFile)handle;
1069 bufflength = gzread (ingz, main_buffer, bufsize);
1070 }
1071 else
1072 #endif
1073
1074 #ifdef SUPPORT_LIBBZ2
1075 if (frtype == FR_LIBBZ2)
1076 {
1077 inbz2 = (BZFILE *)handle;
1078 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1079 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1080 } /* without the cast it is unsigned. */
1081 else
1082 #endif
1083
1084 {
1085 in = (FILE *)handle;
1086 if (is_file_tty(in)) input_line_buffered = TRUE;
1087 bufflength = input_line_buffered?
1088 read_one_line(main_buffer, bufsize, in) :
1089 fread(main_buffer, 1, bufsize, in);
1090 }
1091
1092 endptr = main_buffer + bufflength;
1093
1094 /* Loop while the current pointer is not at the end of the file. For large
1095 files, endptr will be at the end of the buffer when we are in the middle of the
1096 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1097 way, the buffer is shifted left and re-filled. */
1098
1099 while (ptr < endptr)
1100 {
1101 int endlinelength;
1102 int mrc = 0;
1103 int startoffset = 0;
1104 BOOL match;
1105 char *matchptr = ptr;
1106 char *t = ptr;
1107 size_t length, linelength;
1108
1109 /* At this point, ptr is at the start of a line. We need to find the length
1110 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1111 length remainder of the data in the buffer. Otherwise, it is the length of
1112 the next line, excluding the terminating newline. After matching, we always
1113 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1114 option is used for compiling, so that any match is constrained to be in the
1115 first line. */
1116
1117 t = end_of_line(t, endptr, &endlinelength);
1118 linelength = t - ptr - endlinelength;
1119 length = multiline? (size_t)(endptr - ptr) : linelength;
1120
1121 /* Check to see if the line we are looking at extends right to the very end
1122 of the buffer without a line terminator. This means the line is too long to
1123 handle. */
1124
1125 if (endlinelength == 0 && t == main_buffer + bufsize)
1126 {
1127 fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1128 "pcregrep: check the --buffer-size option\n",
1129 linenumber,
1130 (filename == NULL)? "" : " of file ",
1131 (filename == NULL)? "" : filename);
1132 return 2;
1133 }
1134
1135 /* Extra processing for Jeffrey Friedl's debugging. */
1136
1137 #ifdef JFRIEDL_DEBUG
1138 if (jfriedl_XT || jfriedl_XR)
1139 {
1140 #include <sys/time.h>
1141 #include <time.h>
1142 struct timeval start_time, end_time;
1143 struct timezone dummy;
1144 int i;
1145
1146 if (jfriedl_XT)
1147 {
1148 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1149 const char *orig = ptr;
1150 ptr = malloc(newlen + 1);
1151 if (!ptr) {
1152 printf("out of memory");
1153 pcregrep_exit(2);
1154 }
1155 endptr = ptr;
1156 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1157 for (i = 0; i < jfriedl_XT; i++) {
1158 strncpy(endptr, orig, length);
1159 endptr += length;
1160 }
1161 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1162 length = newlen;
1163 }
1164
1165 if (gettimeofday(&start_time, &dummy) != 0)
1166 perror("bad gettimeofday");
1167
1168
1169 for (i = 0; i < jfriedl_XR; i++)
1170 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1171 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1172
1173 if (gettimeofday(&end_time, &dummy) != 0)
1174 perror("bad gettimeofday");
1175
1176 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1177 -
1178 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1179
1180 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1181 return 0;
1182 }
1183 #endif
1184
1185 /* We come back here after a match when the -o option (only_matching) is set,
1186 in order to find any further matches in the same line. */
1187
1188 ONLY_MATCHING_RESTART:
1189
1190 /* Run through all the patterns until one matches or there is an error other
1191 than NOMATCH. This code is in a subroutine so that it can be re-used for
1192 finding subsequent matches when colouring matched lines. */
1193
1194 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1195
1196 /* If it's a match or a not-match (as required), do what's wanted. */
1197
1198 if (match != invert)
1199 {
1200 BOOL hyphenprinted = FALSE;
1201
1202 /* We've failed if we want a file that doesn't have any matches. */
1203
1204 if (filenames == FN_NOMATCH_ONLY) return 1;
1205
1206 /* Just count if just counting is wanted. */
1207
1208 if (count_only) count++;
1209
1210 /* If all we want is a file name, there is no need to scan any more lines
1211 in the file. */
1212
1213 else if (filenames == FN_MATCH_ONLY)
1214 {
1215 fprintf(stdout, "%s\n", printname);
1216 return 0;
1217 }
1218
1219 /* Likewise, if all we want is a yes/no answer. */
1220
1221 else if (quiet) return 0;
1222
1223 /* The --only-matching option prints just the substring that matched, or a
1224 captured portion of it, as long as this string is not empty, and the
1225 --file-offsets and --line-offsets options output offsets for the matching
1226 substring (they both force --only-matching = 0). None of these options
1227 prints any context. Afterwards, adjust the start and then jump back to look
1228 for further matches in the same line. If we are in invert mode, however,
1229 nothing is printed and we do not restart - this could still be useful
1230 because the return code is set. */
1231
1232 else if (only_matching >= 0)
1233 {
1234 if (!invert)
1235 {
1236 if (printname != NULL) fprintf(stdout, "%s:", printname);
1237 if (number) fprintf(stdout, "%d:", linenumber);
1238 if (line_offsets)
1239 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1240 offsets[1] - offsets[0]);
1241 else if (file_offsets)
1242 fprintf(stdout, "%d,%d\n",
1243 (int)(filepos + matchptr + offsets[0] - ptr),
1244 offsets[1] - offsets[0]);
1245 else if (only_matching < mrc)
1246 {
1247 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1248 if (plen > 0)
1249 {
1250 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1251 FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1252 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1253 fprintf(stdout, "\n");
1254 }
1255 }
1256 else if (printname != NULL || number) fprintf(stdout, "\n");
1257 match = FALSE;
1258 if (line_buffered) fflush(stdout);
1259 rc = 0; /* Had some success */
1260 startoffset = offsets[1]; /* Restart after the match */
1261 goto ONLY_MATCHING_RESTART;
1262 }
1263 }
1264
1265 /* This is the default case when none of the above options is set. We print
1266 the matching lines(s), possibly preceded and/or followed by other lines of
1267 context. */
1268
1269 else
1270 {
1271 /* See if there is a requirement to print some "after" lines from a
1272 previous match. We never print any overlaps. */
1273
1274 if (after_context > 0 && lastmatchnumber > 0)
1275 {
1276 int ellength;
1277 int linecount = 0;
1278 char *p = lastmatchrestart;
1279
1280 while (p < ptr && linecount < after_context)
1281 {
1282 p = end_of_line(p, ptr, &ellength);
1283 linecount++;
1284 }
1285
1286 /* It is important to advance lastmatchrestart during this printing so
1287 that it interacts correctly with any "before" printing below. Print
1288 each line's data using fwrite() in case there are binary zeroes. */
1289
1290 while (lastmatchrestart < p)
1291 {
1292 char *pp = lastmatchrestart;
1293 if (printname != NULL) fprintf(stdout, "%s-", printname);
1294 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1295 pp = end_of_line(pp, endptr, &ellength);
1296 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1297 lastmatchrestart = pp;
1298 }
1299 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1300 }
1301
1302 /* If there were non-contiguous lines printed above, insert hyphens. */
1303
1304 if (hyphenpending)
1305 {
1306 fprintf(stdout, "--\n");
1307 hyphenpending = FALSE;
1308 hyphenprinted = TRUE;
1309 }
1310
1311 /* See if there is a requirement to print some "before" lines for this
1312 match. Again, don't print overlaps. */
1313
1314 if (before_context > 0)
1315 {
1316 int linecount = 0;
1317 char *p = ptr;
1318
1319 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1320 linecount < before_context)
1321 {
1322 linecount++;
1323 p = previous_line(p, main_buffer);
1324 }
1325
1326 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1327 fprintf(stdout, "--\n");
1328
1329 while (p < ptr)
1330 {
1331 int ellength;
1332 char *pp = p;
1333 if (printname != NULL) fprintf(stdout, "%s-", printname);
1334 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1335 pp = end_of_line(pp, endptr, &ellength);
1336 FWRITE(p, 1, pp - p, stdout);
1337 p = pp;
1338 }
1339 }
1340
1341 /* Now print the matching line(s); ensure we set hyphenpending at the end
1342 of the file if any context lines are being output. */
1343
1344 if (after_context > 0 || before_context > 0)
1345 endhyphenpending = TRUE;
1346
1347 if (printname != NULL) fprintf(stdout, "%s:", printname);
1348 if (number) fprintf(stdout, "%d:", linenumber);
1349
1350 /* In multiline mode, we want to print to the end of the line in which
1351 the end of the matched string is found, so we adjust linelength and the
1352 line number appropriately, but only when there actually was a match
1353 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1354 the match will always be before the first newline sequence. */
1355
1356 if (multiline & !invert)
1357 {
1358 char *endmatch = ptr + offsets[1];
1359 t = ptr;
1360 while (t < endmatch)
1361 {
1362 t = end_of_line(t, endptr, &endlinelength);
1363 if (t < endmatch) linenumber++; else break;
1364 }
1365 linelength = t - ptr - endlinelength;
1366 }
1367
1368 /*** NOTE: Use only fwrite() to output the data line, so that binary
1369 zeroes are treated as just another data character. */
1370
1371 /* This extra option, for Jeffrey Friedl's debugging requirements,
1372 replaces the matched string, or a specific captured string if it exists,
1373 with X. When this happens, colouring is ignored. */
1374
1375 #ifdef JFRIEDL_DEBUG
1376 if (S_arg >= 0 && S_arg < mrc)
1377 {
1378 int first = S_arg * 2;
1379 int last = first + 1;
1380 FWRITE(ptr, 1, offsets[first], stdout);
1381 fprintf(stdout, "X");
1382 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1383 }
1384 else
1385 #endif
1386
1387 /* We have to split the line(s) up if colouring, and search for further
1388 matches, but not of course if the line is a non-match. */
1389
1390 if (do_colour && !invert)
1391 {
1392 int plength;
1393 FWRITE(ptr, 1, offsets[0], stdout);
1394 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1395 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1396 fprintf(stdout, "%c[00m", 0x1b);
1397 for (;;)
1398 {
1399 startoffset = offsets[1];
1400 if (startoffset >= linelength + endlinelength ||
1401 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1402 break;
1403 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1404 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1405 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1406 fprintf(stdout, "%c[00m", 0x1b);
1407 }
1408
1409 /* In multiline mode, we may have already printed the complete line
1410 and its line-ending characters (if they matched the pattern), so there
1411 may be no more to print. */
1412
1413 plength = (linelength + endlinelength) - startoffset;
1414 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1415 }
1416
1417 /* Not colouring; no need to search for further matches */
1418
1419 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1420 }
1421
1422 /* End of doing what has to be done for a match. If --line-buffered was
1423 given, flush the output. */
1424
1425 if (line_buffered) fflush(stdout);
1426 rc = 0; /* Had some success */
1427
1428 /* Remember where the last match happened for after_context. We remember
1429 where we are about to restart, and that line's number. */
1430
1431 lastmatchrestart = ptr + linelength + endlinelength;
1432 lastmatchnumber = linenumber + 1;
1433 }
1434
1435 /* For a match in multiline inverted mode (which of course did not cause
1436 anything to be printed), we have to move on to the end of the match before
1437 proceeding. */
1438
1439 if (multiline && invert && match)
1440 {
1441 int ellength;
1442 char *endmatch = ptr + offsets[1];
1443 t = ptr;
1444 while (t < endmatch)
1445 {
1446 t = end_of_line(t, endptr, &ellength);
1447 if (t <= endmatch) linenumber++; else break;
1448 }
1449 endmatch = end_of_line(endmatch, endptr, &ellength);
1450 linelength = endmatch - ptr - ellength;
1451 }
1452
1453 /* Advance to after the newline and increment the line number. The file
1454 offset to the current line is maintained in filepos. */
1455
1456 ptr += linelength + endlinelength;
1457 filepos += (int)(linelength + endlinelength);
1458 linenumber++;
1459
1460 /* If input is line buffered, and the buffer is not yet full, read another
1461 line and add it into the buffer. */
1462
1463 if (input_line_buffered && bufflength < bufsize)
1464 {
1465 int add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
1466 bufflength += add;
1467 endptr += add;
1468 }
1469
1470 /* If we haven't yet reached the end of the file (the buffer is full), and
1471 the current point is in the top 1/3 of the buffer, slide the buffer down by
1472 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1473 about to be lost, print them. */
1474
1475 if (bufflength >= bufsize && ptr > main_buffer + 2*bufthird)
1476 {
1477 if (after_context > 0 &&
1478 lastmatchnumber > 0 &&
1479 lastmatchrestart < main_buffer + bufthird)
1480 {
1481 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1482 lastmatchnumber = 0;
1483 }
1484
1485 /* Now do the shuffle */
1486
1487 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1488 ptr -= bufthird;
1489
1490 #ifdef SUPPORT_LIBZ
1491 if (frtype == FR_LIBZ)
1492 bufflength = 2*bufthird +
1493 gzread (ingz, main_buffer + 2*bufthird, bufthird);
1494 else
1495 #endif
1496
1497 #ifdef SUPPORT_LIBBZ2
1498 if (frtype == FR_LIBBZ2)
1499 bufflength = 2*bufthird +
1500 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1501 else
1502 #endif
1503
1504 bufflength = 2*bufthird +
1505 (input_line_buffered?
1506 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1507 fread(main_buffer + 2*bufthird, 1, bufthird, in));
1508 endptr = main_buffer + bufflength;
1509
1510 /* Adjust any last match point */
1511
1512 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1513 }
1514 } /* Loop through the whole file */
1515
1516 /* End of file; print final "after" lines if wanted; do_after_lines sets
1517 hyphenpending if it prints something. */
1518
1519 if (only_matching < 0 && !count_only)
1520 {
1521 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1522 hyphenpending |= endhyphenpending;
1523 }
1524
1525 /* Print the file name if we are looking for those without matches and there
1526 were none. If we found a match, we won't have got this far. */
1527
1528 if (filenames == FN_NOMATCH_ONLY)
1529 {
1530 fprintf(stdout, "%s\n", printname);
1531 return 0;
1532 }
1533
1534 /* Print the match count if wanted */
1535
1536 if (count_only)
1537 {
1538 if (count > 0 || !omit_zero_count)
1539 {
1540 if (printname != NULL && filenames != FN_NONE)
1541 fprintf(stdout, "%s:", printname);
1542 fprintf(stdout, "%d\n", count);
1543 }
1544 }
1545
1546 return rc;
1547 }
1548
1549
1550
1551 /*************************************************
1552 * Grep a file or recurse into a directory *
1553 *************************************************/
1554
1555 /* Given a path name, if it's a directory, scan all the files if we are
1556 recursing; if it's a file, grep it.
1557
1558 Arguments:
1559 pathname the path to investigate
1560 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1561 only_one_at_top TRUE if the path is the only one at toplevel
1562
1563 Returns: 0 if there was at least one match
1564 1 if there were no matches
1565 2 there was some kind of error
1566
1567 However, file opening failures are suppressed if "silent" is set.
1568 */
1569
1570 static int
1571 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1572 {
1573 int rc = 1;
1574 int sep;
1575 int frtype;
1576 int pathlen;
1577 void *handle;
1578 FILE *in = NULL; /* Ensure initialized */
1579
1580 #ifdef SUPPORT_LIBZ
1581 gzFile ingz = NULL;
1582 #endif
1583
1584 #ifdef SUPPORT_LIBBZ2
1585 BZFILE *inbz2 = NULL;
1586 #endif
1587
1588 /* If the file name is "-" we scan stdin */
1589
1590 if (strcmp(pathname, "-") == 0)
1591 {
1592 return pcregrep(stdin, FR_PLAIN, stdin_name,
1593 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1594 stdin_name : NULL);
1595 }
1596
1597 /* If the file is a directory, skip if skipping or if we are recursing, scan
1598 each file and directory within it, subject to any include or exclude patterns
1599 that were set. The scanning code is localized so it can be made
1600 system-specific. */
1601
1602 if ((sep = isdirectory(pathname)) != 0)
1603 {
1604 if (dee_action == dee_SKIP) return 1;
1605 if (dee_action == dee_RECURSE)
1606 {
1607 char buffer[1024];
1608 char *nextfile;
1609 directory_type *dir = opendirectory(pathname);
1610
1611 if (dir == NULL)
1612 {
1613 if (!silent)
1614 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1615 strerror(errno));
1616 return 2;
1617 }
1618
1619 while ((nextfile = readdirectory(dir)) != NULL)
1620 {
1621 int frc, nflen;
1622 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1623 nflen = (int)(strlen(nextfile));
1624
1625 if (isdirectory(buffer))
1626 {
1627 if (exclude_dir_compiled != NULL &&
1628 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1629 continue;
1630
1631 if (include_dir_compiled != NULL &&
1632 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1633 continue;
1634 }
1635 else
1636 {
1637 if (exclude_compiled != NULL &&
1638 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1639 continue;
1640
1641 if (include_compiled != NULL &&
1642 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1643 continue;
1644 }
1645
1646 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1647 if (frc > 1) rc = frc;
1648 else if (frc == 0 && rc == 1) rc = 0;
1649 }
1650
1651 closedirectory(dir);
1652 return rc;
1653 }
1654 }
1655
1656 /* If the file is not a directory and not a regular file, skip it if that's
1657 been requested. */
1658
1659 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1660
1661 /* Control reaches here if we have a regular file, or if we have a directory
1662 and recursion or skipping was not requested, or if we have anything else and
1663 skipping was not requested. The scan proceeds. If this is the first and only
1664 argument at top level, we don't show the file name, unless we are only showing
1665 the file name, or the filename was forced (-H). */
1666
1667 pathlen = (int)(strlen(pathname));
1668
1669 /* Open using zlib if it is supported and the file name ends with .gz. */
1670
1671 #ifdef SUPPORT_LIBZ
1672 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1673 {
1674 ingz = gzopen(pathname, "rb");
1675 if (ingz == NULL)
1676 {
1677 if (!silent)
1678 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1679 strerror(errno));
1680 return 2;
1681 }
1682 handle = (void *)ingz;
1683 frtype = FR_LIBZ;
1684 }
1685 else
1686 #endif
1687
1688 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1689
1690 #ifdef SUPPORT_LIBBZ2
1691 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1692 {
1693 inbz2 = BZ2_bzopen(pathname, "rb");
1694 handle = (void *)inbz2;
1695 frtype = FR_LIBBZ2;
1696 }
1697 else
1698 #endif
1699
1700 /* Otherwise use plain fopen(). The label is so that we can come back here if
1701 an attempt to read a .bz2 file indicates that it really is a plain file. */
1702
1703 #ifdef SUPPORT_LIBBZ2
1704 PLAIN_FILE:
1705 #endif
1706 {
1707 in = fopen(pathname, "rb");
1708 handle = (void *)in;
1709 frtype = FR_PLAIN;
1710 }
1711
1712 /* All the opening methods return errno when they fail. */
1713
1714 if (handle == NULL)
1715 {
1716 if (!silent)
1717 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1718 strerror(errno));
1719 return 2;
1720 }
1721
1722 /* Now grep the file */
1723
1724 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1725 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1726
1727 /* Close in an appropriate manner. */
1728
1729 #ifdef SUPPORT_LIBZ
1730 if (frtype == FR_LIBZ)
1731 gzclose(ingz);
1732 else
1733 #endif
1734
1735 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1736 read failed. If the error indicates that the file isn't in fact bzipped, try
1737 again as a normal file. */
1738
1739 #ifdef SUPPORT_LIBBZ2
1740 if (frtype == FR_LIBBZ2)
1741 {
1742 if (rc == 3)
1743 {
1744 int errnum;
1745 const char *err = BZ2_bzerror(inbz2, &errnum);
1746 if (errnum == BZ_DATA_ERROR_MAGIC)
1747 {
1748 BZ2_bzclose(inbz2);
1749 goto PLAIN_FILE;
1750 }
1751 else if (!silent)
1752 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1753 pathname, err);
1754 rc = 2; /* The normal "something went wrong" code */
1755 }
1756 BZ2_bzclose(inbz2);
1757 }
1758 else
1759 #endif
1760
1761 /* Normal file close */
1762
1763 fclose(in);
1764
1765 /* Pass back the yield from pcregrep(). */
1766
1767 return rc;
1768 }
1769
1770
1771
1772
1773 /*************************************************
1774 * Usage function *
1775 *************************************************/
1776
1777 static int
1778 usage(int rc)
1779 {
1780 option_item *op;
1781 fprintf(stderr, "Usage: pcregrep [-");
1782 for (op = optionlist; op->one_char != 0; op++)
1783 {
1784 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1785 }
1786 fprintf(stderr, "] [long options] [pattern] [files]\n");
1787 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1788 "options.\n");
1789 return rc;
1790 }
1791
1792
1793
1794
1795 /*************************************************
1796 * Help function *
1797 *************************************************/
1798
1799 static void
1800 help(void)
1801 {
1802 option_item *op;
1803
1804 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1805 printf("Search for PATTERN in each FILE or standard input.\n");
1806 printf("PATTERN must be present if neither -e nor -f is used.\n");
1807 printf("\"-\" can be used as a file name to mean STDIN.\n");
1808
1809 #ifdef SUPPORT_LIBZ
1810 printf("Files whose names end in .gz are read using zlib.\n");
1811 #endif
1812
1813 #ifdef SUPPORT_LIBBZ2
1814 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1815 #endif
1816
1817 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1818 printf("Other files and the standard input are read as plain files.\n\n");
1819 #else
1820 printf("All files are read as plain files, without any interpretation.\n\n");
1821 #endif
1822
1823 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1824 printf("Options:\n");
1825
1826 for (op = optionlist; op->one_char != 0; op++)
1827 {
1828 int n;
1829 char s[4];
1830
1831 /* Two options were accidentally implemented and documented with underscores
1832 instead of hyphens in their names, something that was not noticed for quite a
1833 few releases. When fixing this, I left the underscored versions in the list
1834 in case people were using them. However, we don't want to display them in the
1835 help data. There are no other options that contain underscores, and we do not
1836 expect ever to implement such options. Therefore, just omit any option that
1837 contains an underscore. */
1838
1839 if (strchr(op->long_name, '_') != NULL) continue;
1840
1841 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1842 n = 31 - printf(" %s --%s", s, op->long_name);
1843 if (n < 1) n = 1;
1844 printf("%.*s%s\n", n, " ", op->help_text);
1845 }
1846
1847 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1848 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1849 printf("When reading patterns from a file instead of using a command line option,\n");
1850 printf("trailing white space is removed and blank lines are ignored.\n");
1851 printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1852 MAX_PATTERN_COUNT, PATBUFSIZE);
1853
1854 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1855 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1856 }
1857
1858
1859
1860
1861 /*************************************************
1862 * Handle a single-letter, no data option *
1863 *************************************************/
1864
1865 static int
1866 handle_option(int letter, int options)
1867 {
1868 switch(letter)
1869 {
1870 case N_FOFFSETS: file_offsets = TRUE; break;
1871 case N_HELP: help(); pcregrep_exit(0);
1872 case N_LBUFFER: line_buffered = TRUE; break;
1873 case N_LOFFSETS: line_offsets = number = TRUE; break;
1874 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1875 case 'c': count_only = TRUE; break;
1876 case 'F': process_options |= PO_FIXED_STRINGS; break;
1877 case 'H': filenames = FN_FORCE; break;
1878 case 'h': filenames = FN_NONE; break;
1879 case 'i': options |= PCRE_CASELESS; break;
1880 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1881 case 'L': filenames = FN_NOMATCH_ONLY; break;
1882 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1883 case 'n': number = TRUE; break;
1884 case 'o': only_matching = 0; break;
1885 case 'q': quiet = TRUE; break;
1886 case 'r': dee_action = dee_RECURSE; break;
1887 case 's': silent = TRUE; break;
1888 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1889 case 'v': invert = TRUE; break;
1890 case 'w': process_options |= PO_WORD_MATCH; break;
1891 case 'x': process_options |= PO_LINE_MATCH; break;
1892
1893 case 'V':
1894 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1895 pcregrep_exit(0);
1896 break;
1897
1898 default:
1899 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1900 pcregrep_exit(usage(2));
1901 }
1902
1903 return options;
1904 }
1905
1906
1907
1908
1909 /*************************************************
1910 * Construct printed ordinal *
1911 *************************************************/
1912
1913 /* This turns a number into "1st", "3rd", etc. */
1914
1915 static char *
1916 ordin(int n)
1917 {
1918 static char buffer[8];
1919 char *p = buffer;
1920 sprintf(p, "%d", n);
1921 while (*p != 0) p++;
1922 switch (n%10)
1923 {
1924 case 1: strcpy(p, "st"); break;
1925 case 2: strcpy(p, "nd"); break;
1926 case 3: strcpy(p, "rd"); break;
1927 default: strcpy(p, "th"); break;
1928 }
1929 return buffer;
1930 }
1931
1932
1933
1934 /*************************************************
1935 * Compile a single pattern *
1936 *************************************************/
1937
1938 /* When the -F option has been used, this is called for each substring.
1939 Otherwise it's called for each supplied pattern.
1940
1941 Arguments:
1942 pattern the pattern string
1943 options the PCRE options
1944 filename the file name, or NULL for a command-line pattern
1945 count 0 if this is the only command line pattern, or
1946 number of the command line pattern, or
1947 linenumber for a pattern from a file
1948
1949 Returns: TRUE on success, FALSE after an error
1950 */
1951
1952 static BOOL
1953 compile_single_pattern(char *pattern, int options, char *filename, int count)
1954 {
1955 char buffer[PATBUFSIZE];
1956 const char *error;
1957 int errptr;
1958
1959 if (pattern_count >= MAX_PATTERN_COUNT)
1960 {
1961 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1962 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1963 return FALSE;
1964 }
1965
1966 sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1967 suffix[process_options]);
1968 pattern_list[pattern_count] =
1969 pcre_compile(buffer, options, &error, &errptr, pcretables);
1970 if (pattern_list[pattern_count] != NULL)
1971 {
1972 pattern_count++;
1973 return TRUE;
1974 }
1975
1976 /* Handle compile errors */
1977
1978 errptr -= (int)strlen(prefix[process_options]);
1979 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1980
1981 if (filename == NULL)
1982 {
1983 if (count == 0)
1984 fprintf(stderr, "pcregrep: Error in command-line regex "
1985 "at offset %d: %s\n", errptr, error);
1986 else
1987 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1988 "at offset %d: %s\n", ordin(count), errptr, error);
1989 }
1990 else
1991 {
1992 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1993 "at offset %d: %s\n", count, filename, errptr, error);
1994 }
1995
1996 return FALSE;
1997 }
1998
1999
2000
2001 /*************************************************
2002 * Compile one supplied pattern *
2003 *************************************************/
2004
2005 /* When the -F option has been used, each string may be a list of strings,
2006 separated by line breaks. They will be matched literally.
2007
2008 Arguments:
2009 pattern the pattern string
2010 options the PCRE options
2011 filename the file name, or NULL for a command-line pattern
2012 count 0 if this is the only command line pattern, or
2013 number of the command line pattern, or
2014 linenumber for a pattern from a file
2015
2016 Returns: TRUE on success, FALSE after an error
2017 */
2018
2019 static BOOL
2020 compile_pattern(char *pattern, int options, char *filename, int count)
2021 {
2022 if ((process_options & PO_FIXED_STRINGS) != 0)
2023 {
2024 char *eop = pattern + strlen(pattern);
2025 char buffer[PATBUFSIZE];
2026 for(;;)
2027 {
2028 int ellength;
2029 char *p = end_of_line(pattern, eop, &ellength);
2030 if (ellength == 0)
2031 return compile_single_pattern(pattern, options, filename, count);
2032 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2033 pattern = p;
2034 if (!compile_single_pattern(buffer, options, filename, count))
2035 return FALSE;
2036 }
2037 }
2038 else return compile_single_pattern(pattern, options, filename, count);
2039 }
2040
2041
2042
2043 /*************************************************
2044 * Main program *
2045 *************************************************/
2046
2047 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2048
2049 int
2050 main(int argc, char **argv)
2051 {
2052 int i, j;
2053 int rc = 1;
2054 int pcre_options = 0;
2055 int cmd_pattern_count = 0;
2056 int hint_count = 0;
2057 int errptr;
2058 BOOL only_one_at_top;
2059 char *patterns[MAX_PATTERN_COUNT];
2060 const char *locale_from = "--locale";
2061 const char *error;
2062
2063 #ifdef SUPPORT_PCREGREP_JIT
2064 pcre_jit_stack *jit_stack = NULL;
2065 #endif
2066
2067 /* Set the default line ending value from the default in the PCRE library;
2068 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2069 Note that the return values from pcre_config(), though derived from the ASCII
2070 codes, are the same in EBCDIC environments, so we must use the actual values
2071 rather than escapes such as as '\r'. */
2072
2073 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2074 switch(i)
2075 {
2076 default: newline = (char *)"lf"; break;
2077 case 13: newline = (char *)"cr"; break;
2078 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2079 case -1: newline = (char *)"any"; break;
2080 case -2: newline = (char *)"anycrlf"; break;
2081 }
2082
2083 /* Process the options */
2084
2085 for (i = 1; i < argc; i++)
2086 {
2087 option_item *op = NULL;
2088 char *option_data = (char *)""; /* default to keep compiler happy */
2089 BOOL longop;
2090 BOOL longopwasequals = FALSE;
2091
2092 if (argv[i][0] != '-') break;
2093
2094 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2095 but only if we have previously had -e or -f to define the patterns. */
2096
2097 if (argv[i][1] == 0)
2098 {
2099 if (pattern_filename != NULL || pattern_count > 0) break;
2100 else pcregrep_exit(usage(2));
2101 }
2102
2103 /* Handle a long name option, or -- to terminate the options */
2104
2105 if (argv[i][1] == '-')
2106 {
2107 char *arg = argv[i] + 2;
2108 char *argequals = strchr(arg, '=');
2109
2110 if (*arg == 0) /* -- terminates options */
2111 {
2112 i++;
2113 break; /* out of the options-handling loop */
2114 }
2115
2116 longop = TRUE;
2117
2118 /* Some long options have data that follows after =, for example file=name.
2119 Some options have variations in the long name spelling: specifically, we
2120 allow "regexp" because GNU grep allows it, though I personally go along
2121 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2122 These options are entered in the table as "regex(p)". Options can be in
2123 both these categories. */
2124
2125 for (op = optionlist; op->one_char != 0; op++)
2126 {
2127 char *opbra = strchr(op->long_name, '(');
2128 char *equals = strchr(op->long_name, '=');
2129
2130 /* Handle options with only one spelling of the name */
2131
2132 if (opbra == NULL) /* Does not contain '(' */
2133 {
2134 if (equals == NULL) /* Not thing=data case */
2135 {
2136 if (strcmp(arg, op->long_name) == 0) break;
2137 }
2138 else /* Special case xxx=data */
2139 {
2140 int oplen = (int)(equals - op->long_name);
2141 int arglen = (argequals == NULL)?
2142 (int)strlen(arg) : (int)(argequals - arg);
2143 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2144 {
2145 option_data = arg + arglen;
2146 if (*option_data == '=')
2147 {
2148 option_data++;
2149 longopwasequals = TRUE;
2150 }
2151 break;
2152 }
2153 }
2154 }
2155
2156 /* Handle options with an alternate spelling of the name */
2157
2158 else
2159 {
2160 char buff1[24];
2161 char buff2[24];
2162
2163 int baselen = (int)(opbra - op->long_name);
2164 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2165 int arglen = (argequals == NULL || equals == NULL)?
2166 (int)strlen(arg) : (int)(argequals - arg);
2167
2168 sprintf(buff1, "%.*s", baselen, op->long_name);
2169 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2170
2171 if (strncmp(arg, buff1, arglen) == 0 ||
2172 strncmp(arg, buff2, arglen) == 0)
2173 {
2174 if (equals != NULL && argequals != NULL)
2175 {
2176 option_data = argequals;
2177 if (*option_data == '=')
2178 {
2179 option_data++;
2180 longopwasequals = TRUE;
2181 }
2182 }
2183 break;
2184 }
2185 }
2186 }
2187
2188 if (op->one_char == 0)
2189 {
2190 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2191 pcregrep_exit(usage(2));
2192 }
2193 }
2194
2195 /* Jeffrey Friedl's debugging harness uses these additional options which
2196 are not in the right form for putting in the option table because they use
2197 only one hyphen, yet are more than one character long. By putting them
2198 separately here, they will not get displayed as part of the help() output,
2199 but I don't think Jeffrey will care about that. */
2200
2201 #ifdef JFRIEDL_DEBUG
2202 else if (strcmp(argv[i], "-pre") == 0) {
2203 jfriedl_prefix = argv[++i];
2204 continue;
2205 } else if (strcmp(argv[i], "-post") == 0) {
2206 jfriedl_postfix = argv[++i];
2207 continue;
2208 } else if (strcmp(argv[i], "-XT") == 0) {
2209 sscanf(argv[++i], "%d", &jfriedl_XT);
2210 continue;
2211 } else if (strcmp(argv[i], "-XR") == 0) {
2212 sscanf(argv[++i], "%d", &jfriedl_XR);
2213 continue;
2214 }
2215 #endif
2216
2217
2218 /* One-char options; many that have no data may be in a single argument; we
2219 continue till we hit the last one or one that needs data. */
2220
2221 else
2222 {
2223 char *s = argv[i] + 1;
2224 longop = FALSE;
2225 while (*s != 0)
2226 {
2227 for (op = optionlist; op->one_char != 0; op++)
2228 {
2229 if (*s == op->one_char) break;
2230 }
2231 if (op->one_char == 0)
2232 {
2233 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2234 *s, argv[i]);
2235 pcregrep_exit(usage(2));
2236 }
2237
2238 /* Check for a single-character option that has data: OP_OP_NUMBER
2239 is used for one that either has a numerical number or defaults, i.e. the
2240 data is optional. If a digit follows, there is data; if not, carry on
2241 with other single-character options in the same string. */
2242
2243 option_data = s+1;
2244 if (op->type == OP_OP_NUMBER)
2245 {
2246 if (isdigit((unsigned char)s[1])) break;
2247 }
2248 else /* Check for end or a dataless option */
2249 {
2250 if (op->type != OP_NODATA || s[1] == 0) break;
2251 }
2252
2253 /* Handle a single-character option with no data, then loop for the
2254 next character in the string. */
2255
2256 pcre_options = handle_option(*s++, pcre_options);
2257 }
2258 }
2259
2260 /* At this point we should have op pointing to a matched option. If the type
2261 is NO_DATA, it means that there is no data, and the option might set
2262 something in the PCRE options. */
2263
2264 if (op->type == OP_NODATA)
2265 {
2266 pcre_options = handle_option(op->one_char, pcre_options);
2267 continue;
2268 }
2269
2270 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2271 either has a value or defaults to something. It cannot have data in a
2272 separate item. At the moment, the only such options are "colo(u)r",
2273 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2274
2275 if (*option_data == 0 &&
2276 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2277 {
2278 switch (op->one_char)
2279 {
2280 case N_COLOUR:
2281 colour_option = (char *)"auto";
2282 break;
2283
2284 case 'o':
2285 only_matching = 0;
2286 break;
2287
2288 #ifdef JFRIEDL_DEBUG
2289 case 'S':
2290 S_arg = 0;
2291 break;
2292 #endif
2293 }
2294 continue;
2295 }
2296
2297 /* Otherwise, find the data string for the option. */
2298
2299 if (*option_data == 0)
2300 {
2301 if (i >= argc - 1 || longopwasequals)
2302 {
2303 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2304 pcregrep_exit(usage(2));
2305 }
2306 option_data = argv[++i];
2307 }
2308
2309 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2310 multiple times to create a list of patterns. */
2311
2312 if (op->type == OP_PATLIST)
2313 {
2314 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2315 {
2316 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2317 MAX_PATTERN_COUNT);
2318 return 2;
2319 }
2320 patterns[cmd_pattern_count++] = option_data;
2321 }
2322
2323 /* Otherwise, deal with single string or numeric data values. */
2324
2325 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2326 op->type != OP_OP_NUMBER)
2327 {
2328 *((char **)op->dataptr) = option_data;
2329 }
2330
2331 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2332 only for unpicking arguments, so just keep it simple. */
2333
2334 else
2335 {
2336 unsigned long int n = 0;
2337 char *endptr = option_data;
2338 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2339 while (isdigit((unsigned char)(*endptr)))
2340 n = n * 10 + (int)(*endptr++ - '0');
2341 if (toupper(*endptr) == 'K')
2342 {
2343 n *= 1024;
2344 endptr++;
2345 }
2346 else if (toupper(*endptr) == 'M')
2347 {
2348 n *= 1024*1024;
2349 endptr++;
2350 }
2351 if (*endptr != 0)
2352 {
2353 if (longop)
2354 {
2355 char *equals = strchr(op->long_name, '=');
2356 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2357 (int)(equals - op->long_name);
2358 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2359 option_data, nlen, op->long_name);
2360 }
2361 else
2362 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2363 option_data, op->one_char);
2364 pcregrep_exit(usage(2));
2365 }
2366 if (op->type == OP_LONGNUMBER)
2367 *((unsigned long int *)op->dataptr) = n;
2368 else
2369 *((int *)op->dataptr) = n;
2370 }
2371 }
2372
2373 /* Options have been decoded. If -C was used, its value is used as a default
2374 for -A and -B. */
2375
2376 if (both_context > 0)
2377 {
2378 if (after_context == 0) after_context = both_context;
2379 if (before_context == 0) before_context = both_context;
2380 }
2381
2382 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2383 However, the latter two set only_matching. */
2384
2385 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2386 (file_offsets && line_offsets))
2387 {
2388 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2389 "and/or --line-offsets\n");
2390 pcregrep_exit(usage(2));
2391 }
2392
2393 if (file_offsets || line_offsets) only_matching = 0;
2394
2395 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2396 LC_ALL environment variable is set, and if so, use it. */
2397
2398 if (locale == NULL)
2399 {
2400 locale = getenv("LC_ALL");
2401 locale_from = "LCC_ALL";
2402 }
2403
2404 if (locale == NULL)
2405 {
2406 locale = getenv("LC_CTYPE");
2407 locale_from = "LC_CTYPE";
2408 }
2409
2410 /* If a locale has been provided, set it, and generate the tables the PCRE
2411 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2412
2413 if (locale != NULL)
2414 {
2415 if (setlocale(LC_CTYPE, locale) == NULL)
2416 {
2417 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2418 locale, locale_from);
2419 return 2;
2420 }
2421 pcretables = pcre_maketables();
2422 }
2423
2424 /* Sort out colouring */
2425
2426 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2427 {
2428 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2429 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2430 else
2431 {
2432 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2433 colour_option);
2434 return 2;
2435 }
2436 if (do_colour)
2437 {
2438 char *cs = getenv("PCREGREP_COLOUR");
2439 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2440 if (cs != NULL) colour_string = cs;
2441 }
2442 }
2443
2444 /* Interpret the newline type; the default settings are Unix-like. */
2445
2446 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2447 {
2448 pcre_options |= PCRE_NEWLINE_CR;
2449 endlinetype = EL_CR;
2450 }
2451 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2452 {
2453 pcre_options |= PCRE_NEWLINE_LF;
2454 endlinetype = EL_LF;
2455 }
2456 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2457 {
2458 pcre_options |= PCRE_NEWLINE_CRLF;
2459 endlinetype = EL_CRLF;
2460 }
2461 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2462 {
2463 pcre_options |= PCRE_NEWLINE_ANY;
2464 endlinetype = EL_ANY;
2465 }
2466 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2467 {
2468 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2469 endlinetype = EL_ANYCRLF;
2470 }
2471 else
2472 {
2473 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2474 return 2;
2475 }
2476
2477 /* Interpret the text values for -d and -D */
2478
2479 if (dee_option != NULL)
2480 {
2481 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2482 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2483 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2484 else
2485 {
2486 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2487 return 2;
2488 }
2489 }
2490
2491 if (DEE_option != NULL)
2492 {
2493 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2494 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2495 else
2496 {
2497 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2498 return 2;
2499 }
2500 }
2501
2502 /* Check the values for Jeffrey Friedl's debugging options. */
2503
2504 #ifdef JFRIEDL_DEBUG
2505 if (S_arg > 9)
2506 {
2507 fprintf(stderr, "pcregrep: bad value for -S option\n");
2508 return 2;
2509 }
2510 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2511 {
2512 if (jfriedl_XT == 0) jfriedl_XT = 1;
2513 if (jfriedl_XR == 0) jfriedl_XR = 1;
2514 }
2515 #endif
2516
2517 /* Get memory for the main buffer, and to store the pattern and hints lists. */
2518
2519 bufsize = 3*bufthird;
2520 main_buffer = (char *)malloc(bufsize);
2521 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2522 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2523
2524 if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2525 {
2526 fprintf(stderr, "pcregrep: malloc failed\n");
2527 goto EXIT2;
2528 }
2529
2530 /* If no patterns were provided by -e, and there is no file provided by -f,
2531 the first argument is the one and only pattern, and it must exist. */
2532
2533 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2534 {
2535 if (i >= argc) return usage(2);
2536 patterns[cmd_pattern_count++] = argv[i++];
2537 }
2538
2539 /* Compile the patterns that were provided on the command line, either by
2540 multiple uses of -e or as a single unkeyed pattern. */
2541
2542 for (j = 0; j < cmd_pattern_count; j++)
2543 {
2544 if (!compile_pattern(patterns[j], pcre_options, NULL,
2545 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2546 goto EXIT2;
2547 }
2548
2549 /* Compile the regular expressions that are provided in a file. */
2550
2551 if (pattern_filename != NULL)
2552 {
2553 int linenumber = 0;
2554 FILE *f;
2555 char *filename;
2556 char buffer[PATBUFSIZE];
2557
2558 if (strcmp(pattern_filename, "-") == 0)
2559 {
2560 f = stdin;
2561 filename = stdin_name;
2562 }
2563 else
2564 {
2565 f = fopen(pattern_filename, "r");
2566 if (f == NULL)
2567 {
2568 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2569 strerror(errno));
2570 goto EXIT2;
2571 }
2572 filename = pattern_filename;
2573 }
2574
2575 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2576 {
2577 char *s = buffer + (int)strlen(buffer);
2578 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2579 *s = 0;
2580 linenumber++;
2581 if (buffer[0] == 0) continue; /* Skip blank lines */
2582 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2583 goto EXIT2;
2584 }
2585
2586 if (f != stdin) fclose(f);
2587 }
2588
2589 /* Study the regular expressions, as we will be running them many times. Unless
2590 JIT has been explicitly disabled, arrange a stack for it to use. */
2591
2592 #ifdef SUPPORT_PCREGREP_JIT
2593 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2594 jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2595 #endif
2596
2597 for (j = 0; j < pattern_count; j++)
2598 {
2599 hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2600 if (error != NULL)
2601 {
2602 char s[16];
2603 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2604 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2605 goto EXIT2;
2606 }
2607 hint_count++;
2608 #ifdef SUPPORT_PCREGREP_JIT
2609 if (jit_stack != NULL && hints_list[j] != NULL)
2610 pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
2611 #endif
2612 }
2613
2614 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2615 pcre_extra block for each pattern. */
2616
2617 if (match_limit > 0 || match_limit_recursion > 0)
2618 {
2619 for (j = 0; j < pattern_count; j++)
2620 {
2621 if (hints_list[j] == NULL)
2622 {
2623 hints_list[j] = malloc(sizeof(pcre_extra));
2624 if (hints_list[j] == NULL)
2625 {
2626 fprintf(stderr, "pcregrep: malloc failed\n");
2627 pcregrep_exit(2);
2628 }
2629 }
2630 if (match_limit > 0)
2631 {
2632 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2633 hints_list[j]->match_limit = match_limit;
2634 }
2635 if (match_limit_recursion > 0)
2636 {
2637 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2638 hints_list[j]->match_limit_recursion = match_limit_recursion;
2639 }
2640 }
2641 }
2642
2643 /* If there are include or exclude patterns, compile them. */
2644
2645 if (exclude_pattern != NULL)
2646 {
2647 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2648 pcretables);
2649 if (exclude_compiled == NULL)
2650 {
2651 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2652 errptr, error);
2653 goto EXIT2;
2654 }
2655 }
2656
2657 if (include_pattern != NULL)
2658 {
2659 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2660 pcretables);
2661 if (include_compiled == NULL)
2662 {
2663 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2664 errptr, error);
2665 goto EXIT2;
2666 }
2667 }
2668
2669 if (exclude_dir_pattern != NULL)
2670 {
2671 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2672 pcretables);
2673 if (exclude_dir_compiled == NULL)
2674 {
2675 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2676 errptr, error);
2677 goto EXIT2;
2678 }
2679 }
2680
2681 if (include_dir_pattern != NULL)
2682 {
2683 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2684 pcretables);
2685 if (include_dir_compiled == NULL)
2686 {
2687 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2688 errptr, error);
2689 goto EXIT2;
2690 }
2691 }
2692
2693 /* If there are no further arguments, do the business on stdin and exit. */
2694
2695 if (i >= argc)
2696 {
2697 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2698 (filenames > FN_DEFAULT)? stdin_name : NULL);
2699 goto EXIT;
2700 }
2701
2702 /* Otherwise, work through the remaining arguments as files or directories.
2703 Pass in the fact that there is only one argument at top level - this suppresses
2704 the file name if the argument is not a directory and filenames are not
2705 otherwise forced. */
2706
2707 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2708
2709 for (; i < argc; i++)
2710 {
2711 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2712 only_one_at_top);
2713 if (frc > 1) rc = frc;
2714 else if (frc == 0 && rc == 1) rc = 0;
2715 }
2716
2717 EXIT:
2718 #ifdef SUPPORT_PCREGREP_JIT
2719 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2720 #endif
2721 if (main_buffer != NULL) free(main_buffer);
2722 if (pattern_list != NULL)
2723 {
2724 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2725 free(pattern_list);
2726 }
2727 if (hints_list != NULL)
2728 {
2729 for (i = 0; i < hint_count; i++)
2730 {
2731 if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2732 }
2733 free(hints_list);
2734 }
2735 pcregrep_exit(rc);
2736
2737 EXIT2:
2738 rc = 2;
2739 goto EXIT;
2740 }
2741
2742 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12