/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 632 - (show annotations) (download)
Fri Jul 22 17:47:49 2011 UTC (3 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 77500 byte(s)
Fix pcregrep repeated match in same line bug.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2011 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define MBUFTHIRD BUFSIZ
78 #else
79 #define MBUFTHIRD 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117 /*************************************************
118 * Global variables *
119 *************************************************/
120
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131
132 static int endlinetype;
133
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *newline = NULL;
139 static char *pattern_filename = NULL;
140 static char *stdin_name = (char *)"(standard input)";
141 static char *locale = NULL;
142
143 static const unsigned char *pcretables = NULL;
144
145 static int pattern_count = 0;
146 static pcre **pattern_list = NULL;
147 static pcre_extra **hints_list = NULL;
148
149 static char *include_pattern = NULL;
150 static char *exclude_pattern = NULL;
151 static char *include_dir_pattern = NULL;
152 static char *exclude_dir_pattern = NULL;
153
154 static pcre *include_compiled = NULL;
155 static pcre *exclude_compiled = NULL;
156 static pcre *include_dir_compiled = NULL;
157 static pcre *exclude_dir_compiled = NULL;
158
159 static int after_context = 0;
160 static int before_context = 0;
161 static int both_context = 0;
162 static int dee_action = dee_READ;
163 static int DEE_action = DEE_READ;
164 static int error_count = 0;
165 static int filenames = FN_DEFAULT;
166 static int only_matching = -1;
167 static int process_options = 0;
168
169 static unsigned long int match_limit = 0;
170 static unsigned long int match_limit_recursion = 0;
171
172 static BOOL count_only = FALSE;
173 static BOOL do_colour = FALSE;
174 static BOOL file_offsets = FALSE;
175 static BOOL hyphenpending = FALSE;
176 static BOOL invert = FALSE;
177 static BOOL line_buffered = FALSE;
178 static BOOL line_offsets = FALSE;
179 static BOOL multiline = FALSE;
180 static BOOL number = FALSE;
181 static BOOL omit_zero_count = FALSE;
182 static BOOL resource_error = FALSE;
183 static BOOL quiet = FALSE;
184 static BOOL silent = FALSE;
185 static BOOL utf8 = FALSE;
186
187 /* Structure for options and list of them */
188
189 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
190 OP_OP_NUMBER, OP_PATLIST };
191
192 typedef struct option_item {
193 int type;
194 int one_char;
195 void *dataptr;
196 const char *long_name;
197 const char *help_text;
198 } option_item;
199
200 /* Options without a single-letter equivalent get a negative value. This can be
201 used to identify them. */
202
203 #define N_COLOUR (-1)
204 #define N_EXCLUDE (-2)
205 #define N_EXCLUDE_DIR (-3)
206 #define N_HELP (-4)
207 #define N_INCLUDE (-5)
208 #define N_INCLUDE_DIR (-6)
209 #define N_LABEL (-7)
210 #define N_LOCALE (-8)
211 #define N_NULL (-9)
212 #define N_LOFFSETS (-10)
213 #define N_FOFFSETS (-11)
214 #define N_LBUFFER (-12)
215 #define N_M_LIMIT (-13)
216 #define N_M_LIMIT_REC (-14)
217
218 static option_item optionlist[] = {
219 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
220 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
221 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
222 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
223 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
224 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
225 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
226 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
227 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
228 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
229 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
230 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
231 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
232 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
233 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
234 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
235 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
236 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
237 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
238 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
239 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
240 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
241 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
242 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
243 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
245 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
247 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
248 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
249 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
250 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
251 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
252 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
253 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
254
255 /* These two were accidentally implemented with underscores instead of
256 hyphens in the option names. As this was not discovered for several releases,
257 the incorrect versions are left in the table for compatibility. However, the
258 --help function misses out any option that has an underscore in its name. */
259
260 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
261 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
262
263 #ifdef JFRIEDL_DEBUG
264 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
265 #endif
266 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
267 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
268 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
269 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
270 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
271 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
272 { OP_NODATA, 0, NULL, NULL, NULL }
273 };
274
275 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
276 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
277 that the combination of -w and -x has the same effect as -x on its own, so we
278 can treat them as the same. */
279
280 static const char *prefix[] = {
281 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
282
283 static const char *suffix[] = {
284 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
285
286 /* UTF-8 tables - used only when the newline setting is "any". */
287
288 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
289
290 const char utf8_table4[] = {
291 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
292 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
293 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
294 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
295
296
297
298 /*************************************************
299 * Exit from the program *
300 *************************************************/
301
302 /* If there has been a resource error, give a suitable message.
303
304 Argument: the return code
305 Returns: does not return
306 */
307
308 static void
309 pcregrep_exit(int rc)
310 {
311 if (resource_error)
312 {
313 fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
314 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
315 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
316 }
317
318 exit(rc);
319 }
320
321
322 /*************************************************
323 * OS-specific functions *
324 *************************************************/
325
326 /* These functions are defined so that they can be made system specific,
327 although at present the only ones are for Unix, Win32, and for "no support". */
328
329
330 /************* Directory scanning in Unix ***********/
331
332 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
333 #include <sys/types.h>
334 #include <sys/stat.h>
335 #include <dirent.h>
336
337 typedef DIR directory_type;
338
339 static int
340 isdirectory(char *filename)
341 {
342 struct stat statbuf;
343 if (stat(filename, &statbuf) < 0)
344 return 0; /* In the expectation that opening as a file will fail */
345 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
346 }
347
348 static directory_type *
349 opendirectory(char *filename)
350 {
351 return opendir(filename);
352 }
353
354 static char *
355 readdirectory(directory_type *dir)
356 {
357 for (;;)
358 {
359 struct dirent *dent = readdir(dir);
360 if (dent == NULL) return NULL;
361 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
362 return dent->d_name;
363 }
364 /* Control never reaches here */
365 }
366
367 static void
368 closedirectory(directory_type *dir)
369 {
370 closedir(dir);
371 }
372
373
374 /************* Test for regular file in Unix **********/
375
376 static int
377 isregfile(char *filename)
378 {
379 struct stat statbuf;
380 if (stat(filename, &statbuf) < 0)
381 return 1; /* In the expectation that opening as a file will fail */
382 return (statbuf.st_mode & S_IFMT) == S_IFREG;
383 }
384
385
386 /************* Test for a terminal in Unix **********/
387
388 static BOOL
389 is_stdout_tty(void)
390 {
391 return isatty(fileno(stdout));
392 }
393
394 static BOOL
395 is_file_tty(FILE *f)
396 {
397 return isatty(fileno(f));
398 }
399
400
401 /************* Directory scanning in Win32 ***********/
402
403 /* I (Philip Hazel) have no means of testing this code. It was contributed by
404 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
405 when it did not exist. David Byron added a patch that moved the #include of
406 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
407 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
408 undefined when it is indeed undefined. */
409
410 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
411
412 #ifndef STRICT
413 # define STRICT
414 #endif
415 #ifndef WIN32_LEAN_AND_MEAN
416 # define WIN32_LEAN_AND_MEAN
417 #endif
418
419 #include <windows.h>
420
421 #ifndef INVALID_FILE_ATTRIBUTES
422 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
423 #endif
424
425 typedef struct directory_type
426 {
427 HANDLE handle;
428 BOOL first;
429 WIN32_FIND_DATA data;
430 } directory_type;
431
432 int
433 isdirectory(char *filename)
434 {
435 DWORD attr = GetFileAttributes(filename);
436 if (attr == INVALID_FILE_ATTRIBUTES)
437 return 0;
438 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
439 }
440
441 directory_type *
442 opendirectory(char *filename)
443 {
444 size_t len;
445 char *pattern;
446 directory_type *dir;
447 DWORD err;
448 len = strlen(filename);
449 pattern = (char *) malloc(len + 3);
450 dir = (directory_type *) malloc(sizeof(*dir));
451 if ((pattern == NULL) || (dir == NULL))
452 {
453 fprintf(stderr, "pcregrep: malloc failed\n");
454 pcregrep_exit(2);
455 }
456 memcpy(pattern, filename, len);
457 memcpy(&(pattern[len]), "\\*", 3);
458 dir->handle = FindFirstFile(pattern, &(dir->data));
459 if (dir->handle != INVALID_HANDLE_VALUE)
460 {
461 free(pattern);
462 dir->first = TRUE;
463 return dir;
464 }
465 err = GetLastError();
466 free(pattern);
467 free(dir);
468 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
469 return NULL;
470 }
471
472 char *
473 readdirectory(directory_type *dir)
474 {
475 for (;;)
476 {
477 if (!dir->first)
478 {
479 if (!FindNextFile(dir->handle, &(dir->data)))
480 return NULL;
481 }
482 else
483 {
484 dir->first = FALSE;
485 }
486 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
487 return dir->data.cFileName;
488 }
489 #ifndef _MSC_VER
490 return NULL; /* Keep compiler happy; never executed */
491 #endif
492 }
493
494 void
495 closedirectory(directory_type *dir)
496 {
497 FindClose(dir->handle);
498 free(dir);
499 }
500
501
502 /************* Test for regular file in Win32 **********/
503
504 /* I don't know how to do this, or if it can be done; assume all paths are
505 regular if they are not directories. */
506
507 int isregfile(char *filename)
508 {
509 return !isdirectory(filename);
510 }
511
512
513 /************* Test for a terminal in Win32 **********/
514
515 /* I don't know how to do this; assume never */
516
517 static BOOL
518 is_stdout_tty(void)
519 {
520 return FALSE;
521 }
522
523 static BOOL
524 is_file_tty(FILE *f)
525 {
526 return FALSE;
527 }
528
529
530 /************* Directory scanning when we can't do it ***********/
531
532 /* The type is void, and apart from isdirectory(), the functions do nothing. */
533
534 #else
535
536 typedef void directory_type;
537
538 int isdirectory(char *filename) { return 0; }
539 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
540 char *readdirectory(directory_type *dir) { return (char*)0;}
541 void closedirectory(directory_type *dir) {}
542
543
544 /************* Test for regular when we can't do it **********/
545
546 /* Assume all files are regular. */
547
548 int isregfile(char *filename) { return 1; }
549
550
551 /************* Test for a terminal when we can't do it **********/
552
553 static BOOL
554 is_stdout_tty(void)
555 {
556 return FALSE;
557 }
558
559 static BOOL
560 is_file_tty(FILE *f)
561 {
562 return FALSE;
563 }
564
565 #endif
566
567
568
569 #ifndef HAVE_STRERROR
570 /*************************************************
571 * Provide strerror() for non-ANSI libraries *
572 *************************************************/
573
574 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
575 in their libraries, but can provide the same facility by this simple
576 alternative function. */
577
578 extern int sys_nerr;
579 extern char *sys_errlist[];
580
581 char *
582 strerror(int n)
583 {
584 if (n < 0 || n >= sys_nerr) return "unknown error number";
585 return sys_errlist[n];
586 }
587 #endif /* HAVE_STRERROR */
588
589
590
591 /*************************************************
592 * Read one line of input *
593 *************************************************/
594
595 /* Normally, input is read using fread() into a large buffer, so many lines may
596 be read at once. However, doing this for tty input means that no output appears
597 until a lot of input has been typed. Instead, tty input is handled line by
598 line. We cannot use fgets() for this, because it does not stop at a binary
599 zero, and therefore there is no way of telling how many characters it has read,
600 because there may be binary zeros embedded in the data.
601
602 Arguments:
603 buffer the buffer to read into
604 length the maximum number of characters to read
605 f the file
606
607 Returns: the number of characters read, zero at end of file
608 */
609
610 static int
611 read_one_line(char *buffer, int length, FILE *f)
612 {
613 int c;
614 int yield = 0;
615 while ((c = fgetc(f)) != EOF)
616 {
617 buffer[yield++] = c;
618 if (c == '\n' || yield >= length) break;
619 }
620 return yield;
621 }
622
623
624
625 /*************************************************
626 * Find end of line *
627 *************************************************/
628
629 /* The length of the endline sequence that is found is set via lenptr. This may
630 be zero at the very end of the file if there is no line-ending sequence there.
631
632 Arguments:
633 p current position in line
634 endptr end of available data
635 lenptr where to put the length of the eol sequence
636
637 Returns: pointer to the last byte of the line, including the newline byte(s)
638 */
639
640 static char *
641 end_of_line(char *p, char *endptr, int *lenptr)
642 {
643 switch(endlinetype)
644 {
645 default: /* Just in case */
646 case EL_LF:
647 while (p < endptr && *p != '\n') p++;
648 if (p < endptr)
649 {
650 *lenptr = 1;
651 return p + 1;
652 }
653 *lenptr = 0;
654 return endptr;
655
656 case EL_CR:
657 while (p < endptr && *p != '\r') p++;
658 if (p < endptr)
659 {
660 *lenptr = 1;
661 return p + 1;
662 }
663 *lenptr = 0;
664 return endptr;
665
666 case EL_CRLF:
667 for (;;)
668 {
669 while (p < endptr && *p != '\r') p++;
670 if (++p >= endptr)
671 {
672 *lenptr = 0;
673 return endptr;
674 }
675 if (*p == '\n')
676 {
677 *lenptr = 2;
678 return p + 1;
679 }
680 }
681 break;
682
683 case EL_ANYCRLF:
684 while (p < endptr)
685 {
686 int extra = 0;
687 register int c = *((unsigned char *)p);
688
689 if (utf8 && c >= 0xc0)
690 {
691 int gcii, gcss;
692 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
693 gcss = 6*extra;
694 c = (c & utf8_table3[extra]) << gcss;
695 for (gcii = 1; gcii <= extra; gcii++)
696 {
697 gcss -= 6;
698 c |= (p[gcii] & 0x3f) << gcss;
699 }
700 }
701
702 p += 1 + extra;
703
704 switch (c)
705 {
706 case 0x0a: /* LF */
707 *lenptr = 1;
708 return p;
709
710 case 0x0d: /* CR */
711 if (p < endptr && *p == 0x0a)
712 {
713 *lenptr = 2;
714 p++;
715 }
716 else *lenptr = 1;
717 return p;
718
719 default:
720 break;
721 }
722 } /* End of loop for ANYCRLF case */
723
724 *lenptr = 0; /* Must have hit the end */
725 return endptr;
726
727 case EL_ANY:
728 while (p < endptr)
729 {
730 int extra = 0;
731 register int c = *((unsigned char *)p);
732
733 if (utf8 && c >= 0xc0)
734 {
735 int gcii, gcss;
736 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
737 gcss = 6*extra;
738 c = (c & utf8_table3[extra]) << gcss;
739 for (gcii = 1; gcii <= extra; gcii++)
740 {
741 gcss -= 6;
742 c |= (p[gcii] & 0x3f) << gcss;
743 }
744 }
745
746 p += 1 + extra;
747
748 switch (c)
749 {
750 case 0x0a: /* LF */
751 case 0x0b: /* VT */
752 case 0x0c: /* FF */
753 *lenptr = 1;
754 return p;
755
756 case 0x0d: /* CR */
757 if (p < endptr && *p == 0x0a)
758 {
759 *lenptr = 2;
760 p++;
761 }
762 else *lenptr = 1;
763 return p;
764
765 case 0x85: /* NEL */
766 *lenptr = utf8? 2 : 1;
767 return p;
768
769 case 0x2028: /* LS */
770 case 0x2029: /* PS */
771 *lenptr = 3;
772 return p;
773
774 default:
775 break;
776 }
777 } /* End of loop for ANY case */
778
779 *lenptr = 0; /* Must have hit the end */
780 return endptr;
781 } /* End of overall switch */
782 }
783
784
785
786 /*************************************************
787 * Find start of previous line *
788 *************************************************/
789
790 /* This is called when looking back for before lines to print.
791
792 Arguments:
793 p start of the subsequent line
794 startptr start of available data
795
796 Returns: pointer to the start of the previous line
797 */
798
799 static char *
800 previous_line(char *p, char *startptr)
801 {
802 switch(endlinetype)
803 {
804 default: /* Just in case */
805 case EL_LF:
806 p--;
807 while (p > startptr && p[-1] != '\n') p--;
808 return p;
809
810 case EL_CR:
811 p--;
812 while (p > startptr && p[-1] != '\n') p--;
813 return p;
814
815 case EL_CRLF:
816 for (;;)
817 {
818 p -= 2;
819 while (p > startptr && p[-1] != '\n') p--;
820 if (p <= startptr + 1 || p[-2] == '\r') return p;
821 }
822 return p; /* But control should never get here */
823
824 case EL_ANY:
825 case EL_ANYCRLF:
826 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
827 if (utf8) while ((*p & 0xc0) == 0x80) p--;
828
829 while (p > startptr)
830 {
831 register int c;
832 char *pp = p - 1;
833
834 if (utf8)
835 {
836 int extra = 0;
837 while ((*pp & 0xc0) == 0x80) pp--;
838 c = *((unsigned char *)pp);
839 if (c >= 0xc0)
840 {
841 int gcii, gcss;
842 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
843 gcss = 6*extra;
844 c = (c & utf8_table3[extra]) << gcss;
845 for (gcii = 1; gcii <= extra; gcii++)
846 {
847 gcss -= 6;
848 c |= (pp[gcii] & 0x3f) << gcss;
849 }
850 }
851 }
852 else c = *((unsigned char *)pp);
853
854 if (endlinetype == EL_ANYCRLF) switch (c)
855 {
856 case 0x0a: /* LF */
857 case 0x0d: /* CR */
858 return p;
859
860 default:
861 break;
862 }
863
864 else switch (c)
865 {
866 case 0x0a: /* LF */
867 case 0x0b: /* VT */
868 case 0x0c: /* FF */
869 case 0x0d: /* CR */
870 case 0x85: /* NEL */
871 case 0x2028: /* LS */
872 case 0x2029: /* PS */
873 return p;
874
875 default:
876 break;
877 }
878
879 p = pp; /* Back one character */
880 } /* End of loop for ANY case */
881
882 return startptr; /* Hit start of data */
883 } /* End of overall switch */
884 }
885
886
887
888
889
890 /*************************************************
891 * Print the previous "after" lines *
892 *************************************************/
893
894 /* This is called if we are about to lose said lines because of buffer filling,
895 and at the end of the file. The data in the line is written using fwrite() so
896 that a binary zero does not terminate it.
897
898 Arguments:
899 lastmatchnumber the number of the last matching line, plus one
900 lastmatchrestart where we restarted after the last match
901 endptr end of available data
902 printname filename for printing
903
904 Returns: nothing
905 */
906
907 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
908 char *endptr, char *printname)
909 {
910 if (after_context > 0 && lastmatchnumber > 0)
911 {
912 int count = 0;
913 while (lastmatchrestart < endptr && count++ < after_context)
914 {
915 int ellength;
916 char *pp = lastmatchrestart;
917 if (printname != NULL) fprintf(stdout, "%s-", printname);
918 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
919 pp = end_of_line(pp, endptr, &ellength);
920 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
921 lastmatchrestart = pp;
922 }
923 hyphenpending = TRUE;
924 }
925 }
926
927
928
929 /*************************************************
930 * Apply patterns to subject till one matches *
931 *************************************************/
932
933 /* This function is called to run through all patterns, looking for a match. It
934 is used multiple times for the same subject when colouring is enabled, in order
935 to find all possible matches.
936
937 Arguments:
938 matchptr the start of the subject
939 length the length of the subject to match
940 startoffset where to start matching
941 offsets the offets vector to fill in
942 mrc address of where to put the result of pcre_exec()
943
944 Returns: TRUE if there was a match
945 FALSE if there was no match
946 invert if there was a non-fatal error
947 */
948
949 static BOOL
950 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
951 int *mrc)
952 {
953 int i;
954 size_t slen = length;
955 const char *msg = "this text:\n\n";
956 if (slen > 200)
957 {
958 slen = 200;
959 msg = "text that starts:\n\n";
960 }
961 for (i = 0; i < pattern_count; i++)
962 {
963 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
964 startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
965 if (*mrc >= 0) return TRUE;
966 if (*mrc == PCRE_ERROR_NOMATCH) continue;
967 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
968 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
969 fprintf(stderr, "%s", msg);
970 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
971 fprintf(stderr, "\n\n");
972 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
973 resource_error = TRUE;
974 if (error_count++ > 20)
975 {
976 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
977 pcregrep_exit(2);
978 }
979 return invert; /* No more matching; don't show the line again */
980 }
981
982 return FALSE; /* No match, no errors */
983 }
984
985
986
987 /*************************************************
988 * Grep an individual file *
989 *************************************************/
990
991 /* This is called from grep_or_recurse() below. It uses a buffer that is three
992 times the value of MBUFTHIRD. The matching point is never allowed to stray into
993 the top third of the buffer, thus keeping more of the file available for
994 context printing or for multiline scanning. For large files, the pointer will
995 be in the middle third most of the time, so the bottom third is available for
996 "before" context printing.
997
998 Arguments:
999 handle the fopened FILE stream for a normal file
1000 the gzFile pointer when reading is via libz
1001 the BZFILE pointer when reading is via libbz2
1002 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1003 printname the file name if it is to be printed for each match
1004 or NULL if the file name is not to be printed
1005 it cannot be NULL if filenames[_nomatch]_only is set
1006
1007 Returns: 0 if there was at least one match
1008 1 otherwise (no matches)
1009 2 if there is a read error on a .bz2 file
1010 */
1011
1012 static int
1013 pcregrep(void *handle, int frtype, char *printname)
1014 {
1015 int rc = 1;
1016 int linenumber = 1;
1017 int lastmatchnumber = 0;
1018 int count = 0;
1019 int filepos = 0;
1020 int offsets[OFFSET_SIZE];
1021 char *lastmatchrestart = NULL;
1022 char buffer[3*MBUFTHIRD];
1023 char *ptr = buffer;
1024 char *endptr;
1025 size_t bufflength;
1026 BOOL endhyphenpending = FALSE;
1027 BOOL input_line_buffered = line_buffered;
1028 FILE *in = NULL; /* Ensure initialized */
1029
1030 #ifdef SUPPORT_LIBZ
1031 gzFile ingz = NULL;
1032 #endif
1033
1034 #ifdef SUPPORT_LIBBZ2
1035 BZFILE *inbz2 = NULL;
1036 #endif
1037
1038
1039 /* Do the first read into the start of the buffer and set up the pointer to end
1040 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1041 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1042 fail. */
1043
1044 #ifdef SUPPORT_LIBZ
1045 if (frtype == FR_LIBZ)
1046 {
1047 ingz = (gzFile)handle;
1048 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1049 }
1050 else
1051 #endif
1052
1053 #ifdef SUPPORT_LIBBZ2
1054 if (frtype == FR_LIBBZ2)
1055 {
1056 inbz2 = (BZFILE *)handle;
1057 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1058 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1059 } /* without the cast it is unsigned. */
1060 else
1061 #endif
1062
1063 {
1064 in = (FILE *)handle;
1065 if (is_file_tty(in)) input_line_buffered = TRUE;
1066 bufflength = input_line_buffered?
1067 read_one_line(buffer, 3*MBUFTHIRD, in) :
1068 fread(buffer, 1, 3*MBUFTHIRD, in);
1069 }
1070
1071 endptr = buffer + bufflength;
1072
1073 /* Loop while the current pointer is not at the end of the file. For large
1074 files, endptr will be at the end of the buffer when we are in the middle of the
1075 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1076 way, the buffer is shifted left and re-filled. */
1077
1078 while (ptr < endptr)
1079 {
1080 int endlinelength;
1081 int mrc = 0;
1082 int startoffset = 0;
1083 BOOL match;
1084 char *matchptr = ptr;
1085 char *t = ptr;
1086 size_t length, linelength;
1087
1088 /* At this point, ptr is at the start of a line. We need to find the length
1089 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1090 length remainder of the data in the buffer. Otherwise, it is the length of
1091 the next line, excluding the terminating newline. After matching, we always
1092 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1093 option is used for compiling, so that any match is constrained to be in the
1094 first line. */
1095
1096 t = end_of_line(t, endptr, &endlinelength);
1097 linelength = t - ptr - endlinelength;
1098 length = multiline? (size_t)(endptr - ptr) : linelength;
1099
1100 /* Extra processing for Jeffrey Friedl's debugging. */
1101
1102 #ifdef JFRIEDL_DEBUG
1103 if (jfriedl_XT || jfriedl_XR)
1104 {
1105 #include <sys/time.h>
1106 #include <time.h>
1107 struct timeval start_time, end_time;
1108 struct timezone dummy;
1109 int i;
1110
1111 if (jfriedl_XT)
1112 {
1113 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1114 const char *orig = ptr;
1115 ptr = malloc(newlen + 1);
1116 if (!ptr) {
1117 printf("out of memory");
1118 pcregrep_exit(2);
1119 }
1120 endptr = ptr;
1121 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1122 for (i = 0; i < jfriedl_XT; i++) {
1123 strncpy(endptr, orig, length);
1124 endptr += length;
1125 }
1126 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1127 length = newlen;
1128 }
1129
1130 if (gettimeofday(&start_time, &dummy) != 0)
1131 perror("bad gettimeofday");
1132
1133
1134 for (i = 0; i < jfriedl_XR; i++)
1135 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1136 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1137
1138 if (gettimeofday(&end_time, &dummy) != 0)
1139 perror("bad gettimeofday");
1140
1141 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1142 -
1143 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1144
1145 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1146 return 0;
1147 }
1148 #endif
1149
1150 /* We come back here after a match when the -o option (only_matching) is set,
1151 in order to find any further matches in the same line. */
1152
1153 ONLY_MATCHING_RESTART:
1154
1155 /* Run through all the patterns until one matches or there is an error other
1156 than NOMATCH. This code is in a subroutine so that it can be re-used for
1157 finding subsequent matches when colouring matched lines. */
1158
1159 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1160
1161 /* If it's a match or a not-match (as required), do what's wanted. */
1162
1163 if (match != invert)
1164 {
1165 BOOL hyphenprinted = FALSE;
1166
1167 /* We've failed if we want a file that doesn't have any matches. */
1168
1169 if (filenames == FN_NOMATCH_ONLY) return 1;
1170
1171 /* Just count if just counting is wanted. */
1172
1173 if (count_only) count++;
1174
1175 /* If all we want is a file name, there is no need to scan any more lines
1176 in the file. */
1177
1178 else if (filenames == FN_MATCH_ONLY)
1179 {
1180 fprintf(stdout, "%s\n", printname);
1181 return 0;
1182 }
1183
1184 /* Likewise, if all we want is a yes/no answer. */
1185
1186 else if (quiet) return 0;
1187
1188 /* The --only-matching option prints just the substring that matched, or a
1189 captured portion of it, as long as this string is not empty, and the
1190 --file-offsets and --line-offsets options output offsets for the matching
1191 substring (they both force --only-matching = 0). None of these options
1192 prints any context. Afterwards, adjust the start and length, and then jump
1193 back to look for further matches in the same line. If we are in invert
1194 mode, however, nothing is printed and we do not restart - this could still
1195 be useful because the return code is set. */
1196
1197 else if (only_matching >= 0)
1198 {
1199 if (!invert)
1200 {
1201 if (printname != NULL) fprintf(stdout, "%s:", printname);
1202 if (number) fprintf(stdout, "%d:", linenumber);
1203 if (line_offsets)
1204 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1205 offsets[1] - offsets[0]);
1206 else if (file_offsets)
1207 fprintf(stdout, "%d,%d\n",
1208 (int)(filepos + matchptr + offsets[0] - ptr),
1209 offsets[1] - offsets[0]);
1210 else if (only_matching < mrc)
1211 {
1212 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1213 if (plen > 0)
1214 {
1215 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1216 FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1217 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1218 fprintf(stdout, "\n");
1219 }
1220 }
1221 else if (printname != NULL || number) fprintf(stdout, "\n");
1222 /*
1223 matchptr += offsets[1];
1224 length -= offsets[1];
1225 */
1226 match = FALSE;
1227 if (line_buffered) fflush(stdout);
1228 rc = 0; /* Had some success */
1229 startoffset = offsets[1];
1230 goto ONLY_MATCHING_RESTART;
1231 }
1232 }
1233
1234 /* This is the default case when none of the above options is set. We print
1235 the matching lines(s), possibly preceded and/or followed by other lines of
1236 context. */
1237
1238 else
1239 {
1240 /* See if there is a requirement to print some "after" lines from a
1241 previous match. We never print any overlaps. */
1242
1243 if (after_context > 0 && lastmatchnumber > 0)
1244 {
1245 int ellength;
1246 int linecount = 0;
1247 char *p = lastmatchrestart;
1248
1249 while (p < ptr && linecount < after_context)
1250 {
1251 p = end_of_line(p, ptr, &ellength);
1252 linecount++;
1253 }
1254
1255 /* It is important to advance lastmatchrestart during this printing so
1256 that it interacts correctly with any "before" printing below. Print
1257 each line's data using fwrite() in case there are binary zeroes. */
1258
1259 while (lastmatchrestart < p)
1260 {
1261 char *pp = lastmatchrestart;
1262 if (printname != NULL) fprintf(stdout, "%s-", printname);
1263 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1264 pp = end_of_line(pp, endptr, &ellength);
1265 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1266 lastmatchrestart = pp;
1267 }
1268 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1269 }
1270
1271 /* If there were non-contiguous lines printed above, insert hyphens. */
1272
1273 if (hyphenpending)
1274 {
1275 fprintf(stdout, "--\n");
1276 hyphenpending = FALSE;
1277 hyphenprinted = TRUE;
1278 }
1279
1280 /* See if there is a requirement to print some "before" lines for this
1281 match. Again, don't print overlaps. */
1282
1283 if (before_context > 0)
1284 {
1285 int linecount = 0;
1286 char *p = ptr;
1287
1288 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1289 linecount < before_context)
1290 {
1291 linecount++;
1292 p = previous_line(p, buffer);
1293 }
1294
1295 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1296 fprintf(stdout, "--\n");
1297
1298 while (p < ptr)
1299 {
1300 int ellength;
1301 char *pp = p;
1302 if (printname != NULL) fprintf(stdout, "%s-", printname);
1303 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1304 pp = end_of_line(pp, endptr, &ellength);
1305 FWRITE(p, 1, pp - p, stdout);
1306 p = pp;
1307 }
1308 }
1309
1310 /* Now print the matching line(s); ensure we set hyphenpending at the end
1311 of the file if any context lines are being output. */
1312
1313 if (after_context > 0 || before_context > 0)
1314 endhyphenpending = TRUE;
1315
1316 if (printname != NULL) fprintf(stdout, "%s:", printname);
1317 if (number) fprintf(stdout, "%d:", linenumber);
1318
1319 /* In multiline mode, we want to print to the end of the line in which
1320 the end of the matched string is found, so we adjust linelength and the
1321 line number appropriately, but only when there actually was a match
1322 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1323 the match will always be before the first newline sequence. */
1324
1325 if (multiline & !invert)
1326 {
1327 char *endmatch = ptr + offsets[1];
1328 t = ptr;
1329 while (t < endmatch)
1330 {
1331 t = end_of_line(t, endptr, &endlinelength);
1332 if (t < endmatch) linenumber++; else break;
1333 }
1334 linelength = t - ptr - endlinelength;
1335 }
1336
1337 /*** NOTE: Use only fwrite() to output the data line, so that binary
1338 zeroes are treated as just another data character. */
1339
1340 /* This extra option, for Jeffrey Friedl's debugging requirements,
1341 replaces the matched string, or a specific captured string if it exists,
1342 with X. When this happens, colouring is ignored. */
1343
1344 #ifdef JFRIEDL_DEBUG
1345 if (S_arg >= 0 && S_arg < mrc)
1346 {
1347 int first = S_arg * 2;
1348 int last = first + 1;
1349 FWRITE(ptr, 1, offsets[first], stdout);
1350 fprintf(stdout, "X");
1351 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1352 }
1353 else
1354 #endif
1355
1356 /* We have to split the line(s) up if colouring, and search for further
1357 matches, but not of course if the line is a non-match. */
1358
1359 if (do_colour && !invert)
1360 {
1361 int plength;
1362 int last_offset = 0;
1363 FWRITE(ptr, 1, offsets[0], stdout);
1364 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1365 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1366 fprintf(stdout, "%c[00m", 0x1b);
1367 for (;;)
1368 {
1369 /*
1370 last_offset += offsets[1];
1371 matchptr += offsets[1];
1372 length -= offsets[1];
1373 */
1374
1375 startoffset = offsets[1];
1376 last_offset = startoffset;
1377 if (last_offset >= linelength + endlinelength ||
1378 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1379 break;
1380 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1381 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1382 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1383 fprintf(stdout, "%c[00m", 0x1b);
1384 }
1385
1386 /* In multiline mode, we may have already printed the complete line
1387 and its line-ending characters (if they matched the pattern), so there
1388 may be no more to print. */
1389
1390 plength = (linelength + endlinelength) - last_offset;
1391 if (plength > 0)
1392 FWRITE(ptr + last_offset, 1, plength, stdout);
1393 }
1394
1395 /* Not colouring; no need to search for further matches */
1396
1397 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1398 }
1399
1400 /* End of doing what has to be done for a match. If --line-buffered was
1401 given, flush the output. */
1402
1403 if (line_buffered) fflush(stdout);
1404 rc = 0; /* Had some success */
1405
1406 /* Remember where the last match happened for after_context. We remember
1407 where we are about to restart, and that line's number. */
1408
1409 lastmatchrestart = ptr + linelength + endlinelength;
1410 lastmatchnumber = linenumber + 1;
1411 }
1412
1413 /* For a match in multiline inverted mode (which of course did not cause
1414 anything to be printed), we have to move on to the end of the match before
1415 proceeding. */
1416
1417 if (multiline && invert && match)
1418 {
1419 int ellength;
1420 char *endmatch = ptr + offsets[1];
1421 t = ptr;
1422 while (t < endmatch)
1423 {
1424 t = end_of_line(t, endptr, &ellength);
1425 if (t <= endmatch) linenumber++; else break;
1426 }
1427 endmatch = end_of_line(endmatch, endptr, &ellength);
1428 linelength = endmatch - ptr - ellength;
1429 }
1430
1431 /* Advance to after the newline and increment the line number. The file
1432 offset to the current line is maintained in filepos. */
1433
1434 ptr += linelength + endlinelength;
1435 filepos += (int)(linelength + endlinelength);
1436 linenumber++;
1437
1438 /* If input is line buffered, and the buffer is not yet full, read another
1439 line and add it into the buffer. */
1440
1441 if (input_line_buffered && bufflength < sizeof(buffer))
1442 {
1443 int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1444 bufflength += add;
1445 endptr += add;
1446 }
1447
1448 /* If we haven't yet reached the end of the file (the buffer is full), and
1449 the current point is in the top 1/3 of the buffer, slide the buffer down by
1450 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1451 about to be lost, print them. */
1452
1453 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1454 {
1455 if (after_context > 0 &&
1456 lastmatchnumber > 0 &&
1457 lastmatchrestart < buffer + MBUFTHIRD)
1458 {
1459 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1460 lastmatchnumber = 0;
1461 }
1462
1463 /* Now do the shuffle */
1464
1465 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1466 ptr -= MBUFTHIRD;
1467
1468 #ifdef SUPPORT_LIBZ
1469 if (frtype == FR_LIBZ)
1470 bufflength = 2*MBUFTHIRD +
1471 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1472 else
1473 #endif
1474
1475 #ifdef SUPPORT_LIBBZ2
1476 if (frtype == FR_LIBBZ2)
1477 bufflength = 2*MBUFTHIRD +
1478 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1479 else
1480 #endif
1481
1482 bufflength = 2*MBUFTHIRD +
1483 (input_line_buffered?
1484 read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1485 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1486 endptr = buffer + bufflength;
1487
1488 /* Adjust any last match point */
1489
1490 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1491 }
1492 } /* Loop through the whole file */
1493
1494 /* End of file; print final "after" lines if wanted; do_after_lines sets
1495 hyphenpending if it prints something. */
1496
1497 if (only_matching < 0 && !count_only)
1498 {
1499 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1500 hyphenpending |= endhyphenpending;
1501 }
1502
1503 /* Print the file name if we are looking for those without matches and there
1504 were none. If we found a match, we won't have got this far. */
1505
1506 if (filenames == FN_NOMATCH_ONLY)
1507 {
1508 fprintf(stdout, "%s\n", printname);
1509 return 0;
1510 }
1511
1512 /* Print the match count if wanted */
1513
1514 if (count_only)
1515 {
1516 if (count > 0 || !omit_zero_count)
1517 {
1518 if (printname != NULL && filenames != FN_NONE)
1519 fprintf(stdout, "%s:", printname);
1520 fprintf(stdout, "%d\n", count);
1521 }
1522 }
1523
1524 return rc;
1525 }
1526
1527
1528
1529 /*************************************************
1530 * Grep a file or recurse into a directory *
1531 *************************************************/
1532
1533 /* Given a path name, if it's a directory, scan all the files if we are
1534 recursing; if it's a file, grep it.
1535
1536 Arguments:
1537 pathname the path to investigate
1538 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1539 only_one_at_top TRUE if the path is the only one at toplevel
1540
1541 Returns: 0 if there was at least one match
1542 1 if there were no matches
1543 2 there was some kind of error
1544
1545 However, file opening failures are suppressed if "silent" is set.
1546 */
1547
1548 static int
1549 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1550 {
1551 int rc = 1;
1552 int sep;
1553 int frtype;
1554 int pathlen;
1555 void *handle;
1556 FILE *in = NULL; /* Ensure initialized */
1557
1558 #ifdef SUPPORT_LIBZ
1559 gzFile ingz = NULL;
1560 #endif
1561
1562 #ifdef SUPPORT_LIBBZ2
1563 BZFILE *inbz2 = NULL;
1564 #endif
1565
1566 /* If the file name is "-" we scan stdin */
1567
1568 if (strcmp(pathname, "-") == 0)
1569 {
1570 return pcregrep(stdin, FR_PLAIN,
1571 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1572 stdin_name : NULL);
1573 }
1574
1575 /* If the file is a directory, skip if skipping or if we are recursing, scan
1576 each file and directory within it, subject to any include or exclude patterns
1577 that were set. The scanning code is localized so it can be made
1578 system-specific. */
1579
1580 if ((sep = isdirectory(pathname)) != 0)
1581 {
1582 if (dee_action == dee_SKIP) return 1;
1583 if (dee_action == dee_RECURSE)
1584 {
1585 char buffer[1024];
1586 char *nextfile;
1587 directory_type *dir = opendirectory(pathname);
1588
1589 if (dir == NULL)
1590 {
1591 if (!silent)
1592 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1593 strerror(errno));
1594 return 2;
1595 }
1596
1597 while ((nextfile = readdirectory(dir)) != NULL)
1598 {
1599 int frc, nflen;
1600 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1601 nflen = (int)(strlen(nextfile));
1602
1603 if (isdirectory(buffer))
1604 {
1605 if (exclude_dir_compiled != NULL &&
1606 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1607 continue;
1608
1609 if (include_dir_compiled != NULL &&
1610 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1611 continue;
1612 }
1613 else
1614 {
1615 if (exclude_compiled != NULL &&
1616 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1617 continue;
1618
1619 if (include_compiled != NULL &&
1620 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1621 continue;
1622 }
1623
1624 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1625 if (frc > 1) rc = frc;
1626 else if (frc == 0 && rc == 1) rc = 0;
1627 }
1628
1629 closedirectory(dir);
1630 return rc;
1631 }
1632 }
1633
1634 /* If the file is not a directory and not a regular file, skip it if that's
1635 been requested. */
1636
1637 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1638
1639 /* Control reaches here if we have a regular file, or if we have a directory
1640 and recursion or skipping was not requested, or if we have anything else and
1641 skipping was not requested. The scan proceeds. If this is the first and only
1642 argument at top level, we don't show the file name, unless we are only showing
1643 the file name, or the filename was forced (-H). */
1644
1645 pathlen = (int)(strlen(pathname));
1646
1647 /* Open using zlib if it is supported and the file name ends with .gz. */
1648
1649 #ifdef SUPPORT_LIBZ
1650 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1651 {
1652 ingz = gzopen(pathname, "rb");
1653 if (ingz == NULL)
1654 {
1655 if (!silent)
1656 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1657 strerror(errno));
1658 return 2;
1659 }
1660 handle = (void *)ingz;
1661 frtype = FR_LIBZ;
1662 }
1663 else
1664 #endif
1665
1666 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1667
1668 #ifdef SUPPORT_LIBBZ2
1669 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1670 {
1671 inbz2 = BZ2_bzopen(pathname, "rb");
1672 handle = (void *)inbz2;
1673 frtype = FR_LIBBZ2;
1674 }
1675 else
1676 #endif
1677
1678 /* Otherwise use plain fopen(). The label is so that we can come back here if
1679 an attempt to read a .bz2 file indicates that it really is a plain file. */
1680
1681 #ifdef SUPPORT_LIBBZ2
1682 PLAIN_FILE:
1683 #endif
1684 {
1685 in = fopen(pathname, "rb");
1686 handle = (void *)in;
1687 frtype = FR_PLAIN;
1688 }
1689
1690 /* All the opening methods return errno when they fail. */
1691
1692 if (handle == NULL)
1693 {
1694 if (!silent)
1695 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1696 strerror(errno));
1697 return 2;
1698 }
1699
1700 /* Now grep the file */
1701
1702 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1703 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1704
1705 /* Close in an appropriate manner. */
1706
1707 #ifdef SUPPORT_LIBZ
1708 if (frtype == FR_LIBZ)
1709 gzclose(ingz);
1710 else
1711 #endif
1712
1713 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1714 read failed. If the error indicates that the file isn't in fact bzipped, try
1715 again as a normal file. */
1716
1717 #ifdef SUPPORT_LIBBZ2
1718 if (frtype == FR_LIBBZ2)
1719 {
1720 if (rc == 2)
1721 {
1722 int errnum;
1723 const char *err = BZ2_bzerror(inbz2, &errnum);
1724 if (errnum == BZ_DATA_ERROR_MAGIC)
1725 {
1726 BZ2_bzclose(inbz2);
1727 goto PLAIN_FILE;
1728 }
1729 else if (!silent)
1730 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1731 pathname, err);
1732 }
1733 BZ2_bzclose(inbz2);
1734 }
1735 else
1736 #endif
1737
1738 /* Normal file close */
1739
1740 fclose(in);
1741
1742 /* Pass back the yield from pcregrep(). */
1743
1744 return rc;
1745 }
1746
1747
1748
1749
1750 /*************************************************
1751 * Usage function *
1752 *************************************************/
1753
1754 static int
1755 usage(int rc)
1756 {
1757 option_item *op;
1758 fprintf(stderr, "Usage: pcregrep [-");
1759 for (op = optionlist; op->one_char != 0; op++)
1760 {
1761 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1762 }
1763 fprintf(stderr, "] [long options] [pattern] [files]\n");
1764 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1765 "options.\n");
1766 return rc;
1767 }
1768
1769
1770
1771
1772 /*************************************************
1773 * Help function *
1774 *************************************************/
1775
1776 static void
1777 help(void)
1778 {
1779 option_item *op;
1780
1781 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1782 printf("Search for PATTERN in each FILE or standard input.\n");
1783 printf("PATTERN must be present if neither -e nor -f is used.\n");
1784 printf("\"-\" can be used as a file name to mean STDIN.\n");
1785
1786 #ifdef SUPPORT_LIBZ
1787 printf("Files whose names end in .gz are read using zlib.\n");
1788 #endif
1789
1790 #ifdef SUPPORT_LIBBZ2
1791 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1792 #endif
1793
1794 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1795 printf("Other files and the standard input are read as plain files.\n\n");
1796 #else
1797 printf("All files are read as plain files, without any interpretation.\n\n");
1798 #endif
1799
1800 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1801 printf("Options:\n");
1802
1803 for (op = optionlist; op->one_char != 0; op++)
1804 {
1805 int n;
1806 char s[4];
1807
1808 /* Two options were accidentally implemented and documented with underscores
1809 instead of hyphens in their names, something that was not noticed for quite a
1810 few releases. When fixing this, I left the underscored versions in the list
1811 in case people were using them. However, we don't want to display them in the
1812 help data. There are no other options that contain underscores, and we do not
1813 expect ever to implement such options. Therefore, just omit any option that
1814 contains an underscore. */
1815
1816 if (strchr(op->long_name, '_') != NULL) continue;
1817
1818 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1819 n = 31 - printf(" %s --%s", s, op->long_name);
1820 if (n < 1) n = 1;
1821 printf("%.*s%s\n", n, " ", op->help_text);
1822 }
1823
1824 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1825 printf("trailing white space is removed and blank lines are ignored.\n");
1826 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1827
1828 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1829 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1830 }
1831
1832
1833
1834
1835 /*************************************************
1836 * Handle a single-letter, no data option *
1837 *************************************************/
1838
1839 static int
1840 handle_option(int letter, int options)
1841 {
1842 switch(letter)
1843 {
1844 case N_FOFFSETS: file_offsets = TRUE; break;
1845 case N_HELP: help(); pcregrep_exit(0);
1846 case N_LOFFSETS: line_offsets = number = TRUE; break;
1847 case N_LBUFFER: line_buffered = TRUE; break;
1848 case 'c': count_only = TRUE; break;
1849 case 'F': process_options |= PO_FIXED_STRINGS; break;
1850 case 'H': filenames = FN_FORCE; break;
1851 case 'h': filenames = FN_NONE; break;
1852 case 'i': options |= PCRE_CASELESS; break;
1853 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1854 case 'L': filenames = FN_NOMATCH_ONLY; break;
1855 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1856 case 'n': number = TRUE; break;
1857 case 'o': only_matching = 0; break;
1858 case 'q': quiet = TRUE; break;
1859 case 'r': dee_action = dee_RECURSE; break;
1860 case 's': silent = TRUE; break;
1861 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1862 case 'v': invert = TRUE; break;
1863 case 'w': process_options |= PO_WORD_MATCH; break;
1864 case 'x': process_options |= PO_LINE_MATCH; break;
1865
1866 case 'V':
1867 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1868 pcregrep_exit(0);
1869 break;
1870
1871 default:
1872 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1873 pcregrep_exit(usage(2));
1874 }
1875
1876 return options;
1877 }
1878
1879
1880
1881
1882 /*************************************************
1883 * Construct printed ordinal *
1884 *************************************************/
1885
1886 /* This turns a number into "1st", "3rd", etc. */
1887
1888 static char *
1889 ordin(int n)
1890 {
1891 static char buffer[8];
1892 char *p = buffer;
1893 sprintf(p, "%d", n);
1894 while (*p != 0) p++;
1895 switch (n%10)
1896 {
1897 case 1: strcpy(p, "st"); break;
1898 case 2: strcpy(p, "nd"); break;
1899 case 3: strcpy(p, "rd"); break;
1900 default: strcpy(p, "th"); break;
1901 }
1902 return buffer;
1903 }
1904
1905
1906
1907 /*************************************************
1908 * Compile a single pattern *
1909 *************************************************/
1910
1911 /* When the -F option has been used, this is called for each substring.
1912 Otherwise it's called for each supplied pattern.
1913
1914 Arguments:
1915 pattern the pattern string
1916 options the PCRE options
1917 filename the file name, or NULL for a command-line pattern
1918 count 0 if this is the only command line pattern, or
1919 number of the command line pattern, or
1920 linenumber for a pattern from a file
1921
1922 Returns: TRUE on success, FALSE after an error
1923 */
1924
1925 static BOOL
1926 compile_single_pattern(char *pattern, int options, char *filename, int count)
1927 {
1928 char buffer[MBUFTHIRD + 16];
1929 const char *error;
1930 int errptr;
1931
1932 if (pattern_count >= MAX_PATTERN_COUNT)
1933 {
1934 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1935 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1936 return FALSE;
1937 }
1938
1939 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1940 suffix[process_options]);
1941 pattern_list[pattern_count] =
1942 pcre_compile(buffer, options, &error, &errptr, pcretables);
1943 if (pattern_list[pattern_count] != NULL)
1944 {
1945 pattern_count++;
1946 return TRUE;
1947 }
1948
1949 /* Handle compile errors */
1950
1951 errptr -= (int)strlen(prefix[process_options]);
1952 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1953
1954 if (filename == NULL)
1955 {
1956 if (count == 0)
1957 fprintf(stderr, "pcregrep: Error in command-line regex "
1958 "at offset %d: %s\n", errptr, error);
1959 else
1960 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1961 "at offset %d: %s\n", ordin(count), errptr, error);
1962 }
1963 else
1964 {
1965 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1966 "at offset %d: %s\n", count, filename, errptr, error);
1967 }
1968
1969 return FALSE;
1970 }
1971
1972
1973
1974 /*************************************************
1975 * Compile one supplied pattern *
1976 *************************************************/
1977
1978 /* When the -F option has been used, each string may be a list of strings,
1979 separated by line breaks. They will be matched literally.
1980
1981 Arguments:
1982 pattern the pattern string
1983 options the PCRE options
1984 filename the file name, or NULL for a command-line pattern
1985 count 0 if this is the only command line pattern, or
1986 number of the command line pattern, or
1987 linenumber for a pattern from a file
1988
1989 Returns: TRUE on success, FALSE after an error
1990 */
1991
1992 static BOOL
1993 compile_pattern(char *pattern, int options, char *filename, int count)
1994 {
1995 if ((process_options & PO_FIXED_STRINGS) != 0)
1996 {
1997 char *eop = pattern + strlen(pattern);
1998 char buffer[MBUFTHIRD];
1999 for(;;)
2000 {
2001 int ellength;
2002 char *p = end_of_line(pattern, eop, &ellength);
2003 if (ellength == 0)
2004 return compile_single_pattern(pattern, options, filename, count);
2005 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2006 pattern = p;
2007 if (!compile_single_pattern(buffer, options, filename, count))
2008 return FALSE;
2009 }
2010 }
2011 else return compile_single_pattern(pattern, options, filename, count);
2012 }
2013
2014
2015
2016 /*************************************************
2017 * Main program *
2018 *************************************************/
2019
2020 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2021
2022 int
2023 main(int argc, char **argv)
2024 {
2025 int i, j;
2026 int rc = 1;
2027 int pcre_options = 0;
2028 int cmd_pattern_count = 0;
2029 int hint_count = 0;
2030 int errptr;
2031 BOOL only_one_at_top;
2032 char *patterns[MAX_PATTERN_COUNT];
2033 const char *locale_from = "--locale";
2034 const char *error;
2035
2036 /* Set the default line ending value from the default in the PCRE library;
2037 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2038 Note that the return values from pcre_config(), though derived from the ASCII
2039 codes, are the same in EBCDIC environments, so we must use the actual values
2040 rather than escapes such as as '\r'. */
2041
2042 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2043 switch(i)
2044 {
2045 default: newline = (char *)"lf"; break;
2046 case 13: newline = (char *)"cr"; break;
2047 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2048 case -1: newline = (char *)"any"; break;
2049 case -2: newline = (char *)"anycrlf"; break;
2050 }
2051
2052 /* Process the options */
2053
2054 for (i = 1; i < argc; i++)
2055 {
2056 option_item *op = NULL;
2057 char *option_data = (char *)""; /* default to keep compiler happy */
2058 BOOL longop;
2059 BOOL longopwasequals = FALSE;
2060
2061 if (argv[i][0] != '-') break;
2062
2063 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2064 but only if we have previously had -e or -f to define the patterns. */
2065
2066 if (argv[i][1] == 0)
2067 {
2068 if (pattern_filename != NULL || pattern_count > 0) break;
2069 else pcregrep_exit(usage(2));
2070 }
2071
2072 /* Handle a long name option, or -- to terminate the options */
2073
2074 if (argv[i][1] == '-')
2075 {
2076 char *arg = argv[i] + 2;
2077 char *argequals = strchr(arg, '=');
2078
2079 if (*arg == 0) /* -- terminates options */
2080 {
2081 i++;
2082 break; /* out of the options-handling loop */
2083 }
2084
2085 longop = TRUE;
2086
2087 /* Some long options have data that follows after =, for example file=name.
2088 Some options have variations in the long name spelling: specifically, we
2089 allow "regexp" because GNU grep allows it, though I personally go along
2090 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2091 These options are entered in the table as "regex(p)". Options can be in
2092 both these categories. */
2093
2094 for (op = optionlist; op->one_char != 0; op++)
2095 {
2096 char *opbra = strchr(op->long_name, '(');
2097 char *equals = strchr(op->long_name, '=');
2098
2099 /* Handle options with only one spelling of the name */
2100
2101 if (opbra == NULL) /* Does not contain '(' */
2102 {
2103 if (equals == NULL) /* Not thing=data case */
2104 {
2105 if (strcmp(arg, op->long_name) == 0) break;
2106 }
2107 else /* Special case xxx=data */
2108 {
2109 int oplen = (int)(equals - op->long_name);
2110 int arglen = (argequals == NULL)?
2111 (int)strlen(arg) : (int)(argequals - arg);
2112 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2113 {
2114 option_data = arg + arglen;
2115 if (*option_data == '=')
2116 {
2117 option_data++;
2118 longopwasequals = TRUE;
2119 }
2120 break;
2121 }
2122 }
2123 }
2124
2125 /* Handle options with an alternate spelling of the name */
2126
2127 else
2128 {
2129 char buff1[24];
2130 char buff2[24];
2131
2132 int baselen = (int)(opbra - op->long_name);
2133 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2134 int arglen = (argequals == NULL || equals == NULL)?
2135 (int)strlen(arg) : (int)(argequals - arg);
2136
2137 sprintf(buff1, "%.*s", baselen, op->long_name);
2138 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2139
2140 if (strncmp(arg, buff1, arglen) == 0 ||
2141 strncmp(arg, buff2, arglen) == 0)
2142 {
2143 if (equals != NULL && argequals != NULL)
2144 {
2145 option_data = argequals;
2146 if (*option_data == '=')
2147 {
2148 option_data++;
2149 longopwasequals = TRUE;
2150 }
2151 }
2152 break;
2153 }
2154 }
2155 }
2156
2157 if (op->one_char == 0)
2158 {
2159 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2160 pcregrep_exit(usage(2));
2161 }
2162 }
2163
2164 /* Jeffrey Friedl's debugging harness uses these additional options which
2165 are not in the right form for putting in the option table because they use
2166 only one hyphen, yet are more than one character long. By putting them
2167 separately here, they will not get displayed as part of the help() output,
2168 but I don't think Jeffrey will care about that. */
2169
2170 #ifdef JFRIEDL_DEBUG
2171 else if (strcmp(argv[i], "-pre") == 0) {
2172 jfriedl_prefix = argv[++i];
2173 continue;
2174 } else if (strcmp(argv[i], "-post") == 0) {
2175 jfriedl_postfix = argv[++i];
2176 continue;
2177 } else if (strcmp(argv[i], "-XT") == 0) {
2178 sscanf(argv[++i], "%d", &jfriedl_XT);
2179 continue;
2180 } else if (strcmp(argv[i], "-XR") == 0) {
2181 sscanf(argv[++i], "%d", &jfriedl_XR);
2182 continue;
2183 }
2184 #endif
2185
2186
2187 /* One-char options; many that have no data may be in a single argument; we
2188 continue till we hit the last one or one that needs data. */
2189
2190 else
2191 {
2192 char *s = argv[i] + 1;
2193 longop = FALSE;
2194 while (*s != 0)
2195 {
2196 for (op = optionlist; op->one_char != 0; op++)
2197 {
2198 if (*s == op->one_char) break;
2199 }
2200 if (op->one_char == 0)
2201 {
2202 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2203 *s, argv[i]);
2204 pcregrep_exit(usage(2));
2205 }
2206
2207 /* Check for a single-character option that has data: OP_OP_NUMBER
2208 is used for one that either has a numerical number or defaults, i.e. the
2209 data is optional. If a digit follows, there is data; if not, carry on
2210 with other single-character options in the same string. */
2211
2212 option_data = s+1;
2213 if (op->type == OP_OP_NUMBER)
2214 {
2215 if (isdigit((unsigned char)s[1])) break;
2216 }
2217 else /* Check for end or a dataless option */
2218 {
2219 if (op->type != OP_NODATA || s[1] == 0) break;
2220 }
2221
2222 /* Handle a single-character option with no data, then loop for the
2223 next character in the string. */
2224
2225 pcre_options = handle_option(*s++, pcre_options);
2226 }
2227 }
2228
2229 /* At this point we should have op pointing to a matched option. If the type
2230 is NO_DATA, it means that there is no data, and the option might set
2231 something in the PCRE options. */
2232
2233 if (op->type == OP_NODATA)
2234 {
2235 pcre_options = handle_option(op->one_char, pcre_options);
2236 continue;
2237 }
2238
2239 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2240 either has a value or defaults to something. It cannot have data in a
2241 separate item. At the moment, the only such options are "colo(u)r",
2242 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2243
2244 if (*option_data == 0 &&
2245 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2246 {
2247 switch (op->one_char)
2248 {
2249 case N_COLOUR:
2250 colour_option = (char *)"auto";
2251 break;
2252
2253 case 'o':
2254 only_matching = 0;
2255 break;
2256
2257 #ifdef JFRIEDL_DEBUG
2258 case 'S':
2259 S_arg = 0;
2260 break;
2261 #endif
2262 }
2263 continue;
2264 }
2265
2266 /* Otherwise, find the data string for the option. */
2267
2268 if (*option_data == 0)
2269 {
2270 if (i >= argc - 1 || longopwasequals)
2271 {
2272 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2273 pcregrep_exit(usage(2));
2274 }
2275 option_data = argv[++i];
2276 }
2277
2278 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2279 multiple times to create a list of patterns. */
2280
2281 if (op->type == OP_PATLIST)
2282 {
2283 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2284 {
2285 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2286 MAX_PATTERN_COUNT);
2287 return 2;
2288 }
2289 patterns[cmd_pattern_count++] = option_data;
2290 }
2291
2292 /* Otherwise, deal with single string or numeric data values. */
2293
2294 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2295 op->type != OP_OP_NUMBER)
2296 {
2297 *((char **)op->dataptr) = option_data;
2298 }
2299
2300 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2301 only for unpicking arguments, so just keep it simple. */
2302
2303 else
2304 {
2305 unsigned long int n = 0;
2306 char *endptr = option_data;
2307 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2308 while (isdigit((unsigned char)(*endptr)))
2309 n = n * 10 + (int)(*endptr++ - '0');
2310 if (*endptr != 0)
2311 {
2312 if (longop)
2313 {
2314 char *equals = strchr(op->long_name, '=');
2315 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2316 (int)(equals - op->long_name);
2317 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2318 option_data, nlen, op->long_name);
2319 }
2320 else
2321 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2322 option_data, op->one_char);
2323 pcregrep_exit(usage(2));
2324 }
2325 if (op->type == OP_LONGNUMBER)
2326 *((unsigned long int *)op->dataptr) = n;
2327 else
2328 *((int *)op->dataptr) = n;
2329 }
2330 }
2331
2332 /* Options have been decoded. If -C was used, its value is used as a default
2333 for -A and -B. */
2334
2335 if (both_context > 0)
2336 {
2337 if (after_context == 0) after_context = both_context;
2338 if (before_context == 0) before_context = both_context;
2339 }
2340
2341 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2342 However, the latter two set only_matching. */
2343
2344 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2345 (file_offsets && line_offsets))
2346 {
2347 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2348 "and/or --line-offsets\n");
2349 pcregrep_exit(usage(2));
2350 }
2351
2352 if (file_offsets || line_offsets) only_matching = 0;
2353
2354 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2355 LC_ALL environment variable is set, and if so, use it. */
2356
2357 if (locale == NULL)
2358 {
2359 locale = getenv("LC_ALL");
2360 locale_from = "LCC_ALL";
2361 }
2362
2363 if (locale == NULL)
2364 {
2365 locale = getenv("LC_CTYPE");
2366 locale_from = "LC_CTYPE";
2367 }
2368
2369 /* If a locale has been provided, set it, and generate the tables the PCRE
2370 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2371
2372 if (locale != NULL)
2373 {
2374 if (setlocale(LC_CTYPE, locale) == NULL)
2375 {
2376 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2377 locale, locale_from);
2378 return 2;
2379 }
2380 pcretables = pcre_maketables();
2381 }
2382
2383 /* Sort out colouring */
2384
2385 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2386 {
2387 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2388 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2389 else
2390 {
2391 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2392 colour_option);
2393 return 2;
2394 }
2395 if (do_colour)
2396 {
2397 char *cs = getenv("PCREGREP_COLOUR");
2398 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2399 if (cs != NULL) colour_string = cs;
2400 }
2401 }
2402
2403 /* Interpret the newline type; the default settings are Unix-like. */
2404
2405 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2406 {
2407 pcre_options |= PCRE_NEWLINE_CR;
2408 endlinetype = EL_CR;
2409 }
2410 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2411 {
2412 pcre_options |= PCRE_NEWLINE_LF;
2413 endlinetype = EL_LF;
2414 }
2415 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2416 {
2417 pcre_options |= PCRE_NEWLINE_CRLF;
2418 endlinetype = EL_CRLF;
2419 }
2420 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2421 {
2422 pcre_options |= PCRE_NEWLINE_ANY;
2423 endlinetype = EL_ANY;
2424 }
2425 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2426 {
2427 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2428 endlinetype = EL_ANYCRLF;
2429 }
2430 else
2431 {
2432 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2433 return 2;
2434 }
2435
2436 /* Interpret the text values for -d and -D */
2437
2438 if (dee_option != NULL)
2439 {
2440 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2441 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2442 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2443 else
2444 {
2445 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2446 return 2;
2447 }
2448 }
2449
2450 if (DEE_option != NULL)
2451 {
2452 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2453 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2454 else
2455 {
2456 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2457 return 2;
2458 }
2459 }
2460
2461 /* Check the values for Jeffrey Friedl's debugging options. */
2462
2463 #ifdef JFRIEDL_DEBUG
2464 if (S_arg > 9)
2465 {
2466 fprintf(stderr, "pcregrep: bad value for -S option\n");
2467 return 2;
2468 }
2469 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2470 {
2471 if (jfriedl_XT == 0) jfriedl_XT = 1;
2472 if (jfriedl_XR == 0) jfriedl_XR = 1;
2473 }
2474 #endif
2475
2476 /* Get memory to store the pattern and hints lists. */
2477
2478 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2479 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2480
2481 if (pattern_list == NULL || hints_list == NULL)
2482 {
2483 fprintf(stderr, "pcregrep: malloc failed\n");
2484 goto EXIT2;
2485 }
2486
2487 /* If no patterns were provided by -e, and there is no file provided by -f,
2488 the first argument is the one and only pattern, and it must exist. */
2489
2490 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2491 {
2492 if (i >= argc) return usage(2);
2493 patterns[cmd_pattern_count++] = argv[i++];
2494 }
2495
2496 /* Compile the patterns that were provided on the command line, either by
2497 multiple uses of -e or as a single unkeyed pattern. */
2498
2499 for (j = 0; j < cmd_pattern_count; j++)
2500 {
2501 if (!compile_pattern(patterns[j], pcre_options, NULL,
2502 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2503 goto EXIT2;
2504 }
2505
2506 /* Compile the regular expressions that are provided in a file. */
2507
2508 if (pattern_filename != NULL)
2509 {
2510 int linenumber = 0;
2511 FILE *f;
2512 char *filename;
2513 char buffer[MBUFTHIRD];
2514
2515 if (strcmp(pattern_filename, "-") == 0)
2516 {
2517 f = stdin;
2518 filename = stdin_name;
2519 }
2520 else
2521 {
2522 f = fopen(pattern_filename, "r");
2523 if (f == NULL)
2524 {
2525 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2526 strerror(errno));
2527 goto EXIT2;
2528 }
2529 filename = pattern_filename;
2530 }
2531
2532 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2533 {
2534 char *s = buffer + (int)strlen(buffer);
2535 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2536 *s = 0;
2537 linenumber++;
2538 if (buffer[0] == 0) continue; /* Skip blank lines */
2539 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2540 goto EXIT2;
2541 }
2542
2543 if (f != stdin) fclose(f);
2544 }
2545
2546 /* Study the regular expressions, as we will be running them many times */
2547
2548 for (j = 0; j < pattern_count; j++)
2549 {
2550 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2551 if (error != NULL)
2552 {
2553 char s[16];
2554 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2555 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2556 goto EXIT2;
2557 }
2558 hint_count++;
2559 }
2560
2561 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2562 pcre_extra block for each pattern. */
2563
2564 if (match_limit > 0 || match_limit_recursion > 0)
2565 {
2566 for (j = 0; j < pattern_count; j++)
2567 {
2568 if (hints_list[j] == NULL)
2569 {
2570 hints_list[j] = malloc(sizeof(pcre_extra));
2571 if (hints_list[j] == NULL)
2572 {
2573 fprintf(stderr, "pcregrep: malloc failed\n");
2574 pcregrep_exit(2);
2575 }
2576 }
2577 if (match_limit > 0)
2578 {
2579 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2580 hints_list[j]->match_limit = match_limit;
2581 }
2582 if (match_limit_recursion > 0)
2583 {
2584 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2585 hints_list[j]->match_limit_recursion = match_limit_recursion;
2586 }
2587 }
2588 }
2589
2590 /* If there are include or exclude patterns, compile them. */
2591
2592 if (exclude_pattern != NULL)
2593 {
2594 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2595 pcretables);
2596 if (exclude_compiled == NULL)
2597 {
2598 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2599 errptr, error);
2600 goto EXIT2;
2601 }
2602 }
2603
2604 if (include_pattern != NULL)
2605 {
2606 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2607 pcretables);
2608 if (include_compiled == NULL)
2609 {
2610 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2611 errptr, error);
2612 goto EXIT2;
2613 }
2614 }
2615
2616 if (exclude_dir_pattern != NULL)
2617 {
2618 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2619 pcretables);
2620 if (exclude_dir_compiled == NULL)
2621 {
2622 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2623 errptr, error);
2624 goto EXIT2;
2625 }
2626 }
2627
2628 if (include_dir_pattern != NULL)
2629 {
2630 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2631 pcretables);
2632 if (include_dir_compiled == NULL)
2633 {
2634 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2635 errptr, error);
2636 goto EXIT2;
2637 }
2638 }
2639
2640 /* If there are no further arguments, do the business on stdin and exit. */
2641
2642 if (i >= argc)
2643 {
2644 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2645 goto EXIT;
2646 }
2647
2648 /* Otherwise, work through the remaining arguments as files or directories.
2649 Pass in the fact that there is only one argument at top level - this suppresses
2650 the file name if the argument is not a directory and filenames are not
2651 otherwise forced. */
2652
2653 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2654
2655 for (; i < argc; i++)
2656 {
2657 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2658 only_one_at_top);
2659 if (frc > 1) rc = frc;
2660 else if (frc == 0 && rc == 1) rc = 0;
2661 }
2662
2663 EXIT:
2664 if (pattern_list != NULL)
2665 {
2666 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2667 free(pattern_list);
2668 }
2669 if (hints_list != NULL)
2670 {
2671 for (i = 0; i < hint_count; i++)
2672 {
2673 if (hints_list[i] != NULL) free(hints_list[i]);
2674 }
2675 free(hints_list);
2676 }
2677 pcregrep_exit(rc);
2678
2679 EXIT2:
2680 rc = 2;
2681 goto EXIT;
2682 }
2683
2684 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12