/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 558 - (show annotations) (download)
Tue Oct 26 15:26:45 2010 UTC (4 years ago) by ph10
File MIME type: text/plain
File size: 72624 byte(s)
Fix missing code for missing strtoul() and strerror(). 

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2010 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #ifdef SUPPORT_LIBZ
59 #include <zlib.h>
60 #endif
61
62 #ifdef SUPPORT_LIBBZ2
63 #include <bzlib.h>
64 #endif
65
66 #include "pcre.h"
67
68 #define FALSE 0
69 #define TRUE 1
70
71 typedef int BOOL;
72
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
75
76 #if BUFSIZ > 8192
77 #define MBUFTHIRD BUFSIZ
78 #else
79 #define MBUFTHIRD 8192
80 #endif
81
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
85
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87
88 /* File reading styles */
89
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91
92 /* Actions for the -d and -D options */
93
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
96
97 /* Actions for special processing options (flag bits) */
98
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
102
103 /* Line ending types */
104
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
112
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114
115
116
117 /*************************************************
118 * Global variables *
119 *************************************************/
120
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
122 regular code. */
123
124 #ifdef JFRIEDL_DEBUG
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
130 #endif
131
132 static int endlinetype;
133
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *newline = NULL;
139 static char *pattern_filename = NULL;
140 static char *stdin_name = (char *)"(standard input)";
141 static char *locale = NULL;
142
143 static const unsigned char *pcretables = NULL;
144
145 static int pattern_count = 0;
146 static pcre **pattern_list = NULL;
147 static pcre_extra **hints_list = NULL;
148
149 static char *include_pattern = NULL;
150 static char *exclude_pattern = NULL;
151 static char *include_dir_pattern = NULL;
152 static char *exclude_dir_pattern = NULL;
153
154 static pcre *include_compiled = NULL;
155 static pcre *exclude_compiled = NULL;
156 static pcre *include_dir_compiled = NULL;
157 static pcre *exclude_dir_compiled = NULL;
158
159 static int after_context = 0;
160 static int before_context = 0;
161 static int both_context = 0;
162 static int dee_action = dee_READ;
163 static int DEE_action = DEE_READ;
164 static int error_count = 0;
165 static int filenames = FN_DEFAULT;
166 static int process_options = 0;
167
168 static BOOL count_only = FALSE;
169 static BOOL do_colour = FALSE;
170 static BOOL file_offsets = FALSE;
171 static BOOL hyphenpending = FALSE;
172 static BOOL invert = FALSE;
173 static BOOL line_buffered = FALSE;
174 static BOOL line_offsets = FALSE;
175 static BOOL multiline = FALSE;
176 static BOOL number = FALSE;
177 static BOOL omit_zero_count = FALSE;
178 static BOOL only_matching = FALSE;
179 static BOOL quiet = FALSE;
180 static BOOL silent = FALSE;
181 static BOOL utf8 = FALSE;
182
183 /* Structure for options and list of them */
184
185 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
186 OP_PATLIST };
187
188 typedef struct option_item {
189 int type;
190 int one_char;
191 void *dataptr;
192 const char *long_name;
193 const char *help_text;
194 } option_item;
195
196 /* Options without a single-letter equivalent get a negative value. This can be
197 used to identify them. */
198
199 #define N_COLOUR (-1)
200 #define N_EXCLUDE (-2)
201 #define N_EXCLUDE_DIR (-3)
202 #define N_HELP (-4)
203 #define N_INCLUDE (-5)
204 #define N_INCLUDE_DIR (-6)
205 #define N_LABEL (-7)
206 #define N_LOCALE (-8)
207 #define N_NULL (-9)
208 #define N_LOFFSETS (-10)
209 #define N_FOFFSETS (-11)
210 #define N_LBUFFER (-12)
211
212 static option_item optionlist[] = {
213 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
214 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
215 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
216 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
217 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
218 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
219 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
220 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
221 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
222 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
223 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
224 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
225 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
226 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
227 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
228 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
229 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
230 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
231 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
232 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
233 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
234 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
235 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
236 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
237 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
238 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
239 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
240 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
241 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
242 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
243 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
244 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
245 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
246 #ifdef JFRIEDL_DEBUG
247 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
248 #endif
249 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
250 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
251 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
252 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
253 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
254 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
255 { OP_NODATA, 0, NULL, NULL, NULL }
256 };
257
258 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
259 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
260 that the combination of -w and -x has the same effect as -x on its own, so we
261 can treat them as the same. */
262
263 static const char *prefix[] = {
264 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
265
266 static const char *suffix[] = {
267 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
268
269 /* UTF-8 tables - used only when the newline setting is "any". */
270
271 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
272
273 const char utf8_table4[] = {
274 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
275 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
276 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
277 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
278
279
280
281 /*************************************************
282 * OS-specific functions *
283 *************************************************/
284
285 /* These functions are defined so that they can be made system specific,
286 although at present the only ones are for Unix, Win32, and for "no support". */
287
288
289 /************* Directory scanning in Unix ***********/
290
291 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
292 #include <sys/types.h>
293 #include <sys/stat.h>
294 #include <dirent.h>
295
296 typedef DIR directory_type;
297
298 static int
299 isdirectory(char *filename)
300 {
301 struct stat statbuf;
302 if (stat(filename, &statbuf) < 0)
303 return 0; /* In the expectation that opening as a file will fail */
304 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
305 }
306
307 static directory_type *
308 opendirectory(char *filename)
309 {
310 return opendir(filename);
311 }
312
313 static char *
314 readdirectory(directory_type *dir)
315 {
316 for (;;)
317 {
318 struct dirent *dent = readdir(dir);
319 if (dent == NULL) return NULL;
320 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
321 return dent->d_name;
322 }
323 /* Control never reaches here */
324 }
325
326 static void
327 closedirectory(directory_type *dir)
328 {
329 closedir(dir);
330 }
331
332
333 /************* Test for regular file in Unix **********/
334
335 static int
336 isregfile(char *filename)
337 {
338 struct stat statbuf;
339 if (stat(filename, &statbuf) < 0)
340 return 1; /* In the expectation that opening as a file will fail */
341 return (statbuf.st_mode & S_IFMT) == S_IFREG;
342 }
343
344
345 /************* Test for a terminal in Unix **********/
346
347 static BOOL
348 is_stdout_tty(void)
349 {
350 return isatty(fileno(stdout));
351 }
352
353 static BOOL
354 is_file_tty(FILE *f)
355 {
356 return isatty(fileno(f));
357 }
358
359
360 /************* Directory scanning in Win32 ***********/
361
362 /* I (Philip Hazel) have no means of testing this code. It was contributed by
363 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
364 when it did not exist. David Byron added a patch that moved the #include of
365 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
366 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
367 undefined when it is indeed undefined. */
368
369 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
370
371 #ifndef STRICT
372 # define STRICT
373 #endif
374 #ifndef WIN32_LEAN_AND_MEAN
375 # define WIN32_LEAN_AND_MEAN
376 #endif
377
378 #include <windows.h>
379
380 #ifndef INVALID_FILE_ATTRIBUTES
381 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
382 #endif
383
384 typedef struct directory_type
385 {
386 HANDLE handle;
387 BOOL first;
388 WIN32_FIND_DATA data;
389 } directory_type;
390
391 int
392 isdirectory(char *filename)
393 {
394 DWORD attr = GetFileAttributes(filename);
395 if (attr == INVALID_FILE_ATTRIBUTES)
396 return 0;
397 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
398 }
399
400 directory_type *
401 opendirectory(char *filename)
402 {
403 size_t len;
404 char *pattern;
405 directory_type *dir;
406 DWORD err;
407 len = strlen(filename);
408 pattern = (char *) malloc(len + 3);
409 dir = (directory_type *) malloc(sizeof(*dir));
410 if ((pattern == NULL) || (dir == NULL))
411 {
412 fprintf(stderr, "pcregrep: malloc failed\n");
413 exit(2);
414 }
415 memcpy(pattern, filename, len);
416 memcpy(&(pattern[len]), "\\*", 3);
417 dir->handle = FindFirstFile(pattern, &(dir->data));
418 if (dir->handle != INVALID_HANDLE_VALUE)
419 {
420 free(pattern);
421 dir->first = TRUE;
422 return dir;
423 }
424 err = GetLastError();
425 free(pattern);
426 free(dir);
427 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
428 return NULL;
429 }
430
431 char *
432 readdirectory(directory_type *dir)
433 {
434 for (;;)
435 {
436 if (!dir->first)
437 {
438 if (!FindNextFile(dir->handle, &(dir->data)))
439 return NULL;
440 }
441 else
442 {
443 dir->first = FALSE;
444 }
445 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
446 return dir->data.cFileName;
447 }
448 #ifndef _MSC_VER
449 return NULL; /* Keep compiler happy; never executed */
450 #endif
451 }
452
453 void
454 closedirectory(directory_type *dir)
455 {
456 FindClose(dir->handle);
457 free(dir);
458 }
459
460
461 /************* Test for regular file in Win32 **********/
462
463 /* I don't know how to do this, or if it can be done; assume all paths are
464 regular if they are not directories. */
465
466 int isregfile(char *filename)
467 {
468 return !isdirectory(filename);
469 }
470
471
472 /************* Test for a terminal in Win32 **********/
473
474 /* I don't know how to do this; assume never */
475
476 static BOOL
477 is_stdout_tty(void)
478 {
479 return FALSE;
480 }
481
482 static BOOL
483 is_file_tty(FILE *f)
484 {
485 return FALSE;
486 }
487
488
489 /************* Directory scanning when we can't do it ***********/
490
491 /* The type is void, and apart from isdirectory(), the functions do nothing. */
492
493 #else
494
495 typedef void directory_type;
496
497 int isdirectory(char *filename) { return 0; }
498 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
499 char *readdirectory(directory_type *dir) { return (char*)0;}
500 void closedirectory(directory_type *dir) {}
501
502
503 /************* Test for regular when we can't do it **********/
504
505 /* Assume all files are regular. */
506
507 int isregfile(char *filename) { return 1; }
508
509
510 /************* Test for a terminal when we can't do it **********/
511
512 static BOOL
513 is_stdout_tty(void)
514 {
515 return FALSE;
516 }
517
518 static BOOL
519 is_file_tty(FILE *f)
520 {
521 return FALSE;
522 }
523
524 #endif
525
526
527
528 #ifndef HAVE_STRERROR
529 /*************************************************
530 * Provide strerror() for non-ANSI libraries *
531 *************************************************/
532
533 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
534 in their libraries, but can provide the same facility by this simple
535 alternative function. */
536
537 extern int sys_nerr;
538 extern char *sys_errlist[];
539
540 char *
541 strerror(int n)
542 {
543 if (n < 0 || n >= sys_nerr) return "unknown error number";
544 return sys_errlist[n];
545 }
546 #endif /* HAVE_STRERROR */
547
548
549
550 /*************************************************
551 * Read one line of input *
552 *************************************************/
553
554 /* Normally, input is read using fread() into a large buffer, so many lines may
555 be read at once. However, doing this for tty input means that no output appears
556 until a lot of input has been typed. Instead, tty input is handled line by
557 line. We cannot use fgets() for this, because it does not stop at a binary
558 zero, and therefore there is no way of telling how many characters it has read,
559 because there may be binary zeros embedded in the data.
560
561 Arguments:
562 buffer the buffer to read into
563 length the maximum number of characters to read
564 f the file
565
566 Returns: the number of characters read, zero at end of file
567 */
568
569 static int
570 read_one_line(char *buffer, int length, FILE *f)
571 {
572 int c;
573 int yield = 0;
574 while ((c = fgetc(f)) != EOF)
575 {
576 buffer[yield++] = c;
577 if (c == '\n' || yield >= length) break;
578 }
579 return yield;
580 }
581
582
583
584 /*************************************************
585 * Find end of line *
586 *************************************************/
587
588 /* The length of the endline sequence that is found is set via lenptr. This may
589 be zero at the very end of the file if there is no line-ending sequence there.
590
591 Arguments:
592 p current position in line
593 endptr end of available data
594 lenptr where to put the length of the eol sequence
595
596 Returns: pointer to the last byte of the line
597 */
598
599 static char *
600 end_of_line(char *p, char *endptr, int *lenptr)
601 {
602 switch(endlinetype)
603 {
604 default: /* Just in case */
605 case EL_LF:
606 while (p < endptr && *p != '\n') p++;
607 if (p < endptr)
608 {
609 *lenptr = 1;
610 return p + 1;
611 }
612 *lenptr = 0;
613 return endptr;
614
615 case EL_CR:
616 while (p < endptr && *p != '\r') p++;
617 if (p < endptr)
618 {
619 *lenptr = 1;
620 return p + 1;
621 }
622 *lenptr = 0;
623 return endptr;
624
625 case EL_CRLF:
626 for (;;)
627 {
628 while (p < endptr && *p != '\r') p++;
629 if (++p >= endptr)
630 {
631 *lenptr = 0;
632 return endptr;
633 }
634 if (*p == '\n')
635 {
636 *lenptr = 2;
637 return p + 1;
638 }
639 }
640 break;
641
642 case EL_ANYCRLF:
643 while (p < endptr)
644 {
645 int extra = 0;
646 register int c = *((unsigned char *)p);
647
648 if (utf8 && c >= 0xc0)
649 {
650 int gcii, gcss;
651 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
652 gcss = 6*extra;
653 c = (c & utf8_table3[extra]) << gcss;
654 for (gcii = 1; gcii <= extra; gcii++)
655 {
656 gcss -= 6;
657 c |= (p[gcii] & 0x3f) << gcss;
658 }
659 }
660
661 p += 1 + extra;
662
663 switch (c)
664 {
665 case 0x0a: /* LF */
666 *lenptr = 1;
667 return p;
668
669 case 0x0d: /* CR */
670 if (p < endptr && *p == 0x0a)
671 {
672 *lenptr = 2;
673 p++;
674 }
675 else *lenptr = 1;
676 return p;
677
678 default:
679 break;
680 }
681 } /* End of loop for ANYCRLF case */
682
683 *lenptr = 0; /* Must have hit the end */
684 return endptr;
685
686 case EL_ANY:
687 while (p < endptr)
688 {
689 int extra = 0;
690 register int c = *((unsigned char *)p);
691
692 if (utf8 && c >= 0xc0)
693 {
694 int gcii, gcss;
695 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
696 gcss = 6*extra;
697 c = (c & utf8_table3[extra]) << gcss;
698 for (gcii = 1; gcii <= extra; gcii++)
699 {
700 gcss -= 6;
701 c |= (p[gcii] & 0x3f) << gcss;
702 }
703 }
704
705 p += 1 + extra;
706
707 switch (c)
708 {
709 case 0x0a: /* LF */
710 case 0x0b: /* VT */
711 case 0x0c: /* FF */
712 *lenptr = 1;
713 return p;
714
715 case 0x0d: /* CR */
716 if (p < endptr && *p == 0x0a)
717 {
718 *lenptr = 2;
719 p++;
720 }
721 else *lenptr = 1;
722 return p;
723
724 case 0x85: /* NEL */
725 *lenptr = utf8? 2 : 1;
726 return p;
727
728 case 0x2028: /* LS */
729 case 0x2029: /* PS */
730 *lenptr = 3;
731 return p;
732
733 default:
734 break;
735 }
736 } /* End of loop for ANY case */
737
738 *lenptr = 0; /* Must have hit the end */
739 return endptr;
740 } /* End of overall switch */
741 }
742
743
744
745 /*************************************************
746 * Find start of previous line *
747 *************************************************/
748
749 /* This is called when looking back for before lines to print.
750
751 Arguments:
752 p start of the subsequent line
753 startptr start of available data
754
755 Returns: pointer to the start of the previous line
756 */
757
758 static char *
759 previous_line(char *p, char *startptr)
760 {
761 switch(endlinetype)
762 {
763 default: /* Just in case */
764 case EL_LF:
765 p--;
766 while (p > startptr && p[-1] != '\n') p--;
767 return p;
768
769 case EL_CR:
770 p--;
771 while (p > startptr && p[-1] != '\n') p--;
772 return p;
773
774 case EL_CRLF:
775 for (;;)
776 {
777 p -= 2;
778 while (p > startptr && p[-1] != '\n') p--;
779 if (p <= startptr + 1 || p[-2] == '\r') return p;
780 }
781 return p; /* But control should never get here */
782
783 case EL_ANY:
784 case EL_ANYCRLF:
785 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
786 if (utf8) while ((*p & 0xc0) == 0x80) p--;
787
788 while (p > startptr)
789 {
790 register int c;
791 char *pp = p - 1;
792
793 if (utf8)
794 {
795 int extra = 0;
796 while ((*pp & 0xc0) == 0x80) pp--;
797 c = *((unsigned char *)pp);
798 if (c >= 0xc0)
799 {
800 int gcii, gcss;
801 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
802 gcss = 6*extra;
803 c = (c & utf8_table3[extra]) << gcss;
804 for (gcii = 1; gcii <= extra; gcii++)
805 {
806 gcss -= 6;
807 c |= (pp[gcii] & 0x3f) << gcss;
808 }
809 }
810 }
811 else c = *((unsigned char *)pp);
812
813 if (endlinetype == EL_ANYCRLF) switch (c)
814 {
815 case 0x0a: /* LF */
816 case 0x0d: /* CR */
817 return p;
818
819 default:
820 break;
821 }
822
823 else switch (c)
824 {
825 case 0x0a: /* LF */
826 case 0x0b: /* VT */
827 case 0x0c: /* FF */
828 case 0x0d: /* CR */
829 case 0x85: /* NEL */
830 case 0x2028: /* LS */
831 case 0x2029: /* PS */
832 return p;
833
834 default:
835 break;
836 }
837
838 p = pp; /* Back one character */
839 } /* End of loop for ANY case */
840
841 return startptr; /* Hit start of data */
842 } /* End of overall switch */
843 }
844
845
846
847
848
849 /*************************************************
850 * Print the previous "after" lines *
851 *************************************************/
852
853 /* This is called if we are about to lose said lines because of buffer filling,
854 and at the end of the file. The data in the line is written using fwrite() so
855 that a binary zero does not terminate it.
856
857 Arguments:
858 lastmatchnumber the number of the last matching line, plus one
859 lastmatchrestart where we restarted after the last match
860 endptr end of available data
861 printname filename for printing
862
863 Returns: nothing
864 */
865
866 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
867 char *endptr, char *printname)
868 {
869 if (after_context > 0 && lastmatchnumber > 0)
870 {
871 int count = 0;
872 while (lastmatchrestart < endptr && count++ < after_context)
873 {
874 int ellength;
875 char *pp = lastmatchrestart;
876 if (printname != NULL) fprintf(stdout, "%s-", printname);
877 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
878 pp = end_of_line(pp, endptr, &ellength);
879 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
880 lastmatchrestart = pp;
881 }
882 hyphenpending = TRUE;
883 }
884 }
885
886
887
888 /*************************************************
889 * Apply patterns to subject till one matches *
890 *************************************************/
891
892 /* This function is called to run through all patterns, looking for a match. It
893 is used multiple times for the same subject when colouring is enabled, in order
894 to find all possible matches.
895
896 Arguments:
897 matchptr the start of the subject
898 length the length of the subject to match
899 offsets the offets vector to fill in
900 mrc address of where to put the result of pcre_exec()
901
902 Returns: TRUE if there was a match
903 FALSE if there was no match
904 invert if there was a non-fatal error
905 */
906
907 static BOOL
908 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
909 {
910 int i;
911 for (i = 0; i < pattern_count; i++)
912 {
913 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
914 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
915 if (*mrc >= 0) return TRUE;
916 if (*mrc == PCRE_ERROR_NOMATCH) continue;
917 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
918 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
919 fprintf(stderr, "this text:\n");
920 FWRITE(matchptr, 1, length, stderr); /* In case binary zero included */
921 fprintf(stderr, "\n");
922 if (error_count == 0 &&
923 (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
924 {
925 fprintf(stderr, "pcregrep: error %d means that a resource limit "
926 "was exceeded\n", *mrc);
927 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
928 }
929 if (error_count++ > 20)
930 {
931 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
932 exit(2);
933 }
934 return invert; /* No more matching; don't show the line again */
935 }
936
937 return FALSE; /* No match, no errors */
938 }
939
940
941
942 /*************************************************
943 * Grep an individual file *
944 *************************************************/
945
946 /* This is called from grep_or_recurse() below. It uses a buffer that is three
947 times the value of MBUFTHIRD. The matching point is never allowed to stray into
948 the top third of the buffer, thus keeping more of the file available for
949 context printing or for multiline scanning. For large files, the pointer will
950 be in the middle third most of the time, so the bottom third is available for
951 "before" context printing.
952
953 Arguments:
954 handle the fopened FILE stream for a normal file
955 the gzFile pointer when reading is via libz
956 the BZFILE pointer when reading is via libbz2
957 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
958 printname the file name if it is to be printed for each match
959 or NULL if the file name is not to be printed
960 it cannot be NULL if filenames[_nomatch]_only is set
961
962 Returns: 0 if there was at least one match
963 1 otherwise (no matches)
964 2 if there is a read error on a .bz2 file
965 */
966
967 static int
968 pcregrep(void *handle, int frtype, char *printname)
969 {
970 int rc = 1;
971 int linenumber = 1;
972 int lastmatchnumber = 0;
973 int count = 0;
974 int filepos = 0;
975 int offsets[OFFSET_SIZE];
976 char *lastmatchrestart = NULL;
977 char buffer[3*MBUFTHIRD];
978 char *ptr = buffer;
979 char *endptr;
980 size_t bufflength;
981 BOOL endhyphenpending = FALSE;
982 BOOL input_line_buffered = line_buffered;
983 FILE *in = NULL; /* Ensure initialized */
984
985 #ifdef SUPPORT_LIBZ
986 gzFile ingz = NULL;
987 #endif
988
989 #ifdef SUPPORT_LIBBZ2
990 BZFILE *inbz2 = NULL;
991 #endif
992
993
994 /* Do the first read into the start of the buffer and set up the pointer to end
995 of what we have. In the case of libz, a non-zipped .gz file will be read as a
996 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
997 fail. */
998
999 #ifdef SUPPORT_LIBZ
1000 if (frtype == FR_LIBZ)
1001 {
1002 ingz = (gzFile)handle;
1003 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1004 }
1005 else
1006 #endif
1007
1008 #ifdef SUPPORT_LIBBZ2
1009 if (frtype == FR_LIBBZ2)
1010 {
1011 inbz2 = (BZFILE *)handle;
1012 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1013 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1014 } /* without the cast it is unsigned. */
1015 else
1016 #endif
1017
1018 {
1019 in = (FILE *)handle;
1020 if (is_file_tty(in)) input_line_buffered = TRUE;
1021 bufflength = input_line_buffered?
1022 read_one_line(buffer, 3*MBUFTHIRD, in) :
1023 fread(buffer, 1, 3*MBUFTHIRD, in);
1024 }
1025
1026 endptr = buffer + bufflength;
1027
1028 /* Loop while the current pointer is not at the end of the file. For large
1029 files, endptr will be at the end of the buffer when we are in the middle of the
1030 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1031 way, the buffer is shifted left and re-filled. */
1032
1033 while (ptr < endptr)
1034 {
1035 int endlinelength;
1036 int mrc = 0;
1037 BOOL match;
1038 char *matchptr = ptr;
1039 char *t = ptr;
1040 size_t length, linelength;
1041
1042 /* At this point, ptr is at the start of a line. We need to find the length
1043 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1044 length remainder of the data in the buffer. Otherwise, it is the length of
1045 the next line, excluding the terminating newline. After matching, we always
1046 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1047 option is used for compiling, so that any match is constrained to be in the
1048 first line. */
1049
1050 t = end_of_line(t, endptr, &endlinelength);
1051 linelength = t - ptr - endlinelength;
1052 length = multiline? (size_t)(endptr - ptr) : linelength;
1053
1054 /* Extra processing for Jeffrey Friedl's debugging. */
1055
1056 #ifdef JFRIEDL_DEBUG
1057 if (jfriedl_XT || jfriedl_XR)
1058 {
1059 #include <sys/time.h>
1060 #include <time.h>
1061 struct timeval start_time, end_time;
1062 struct timezone dummy;
1063 int i;
1064
1065 if (jfriedl_XT)
1066 {
1067 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1068 const char *orig = ptr;
1069 ptr = malloc(newlen + 1);
1070 if (!ptr) {
1071 printf("out of memory");
1072 exit(2);
1073 }
1074 endptr = ptr;
1075 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1076 for (i = 0; i < jfriedl_XT; i++) {
1077 strncpy(endptr, orig, length);
1078 endptr += length;
1079 }
1080 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1081 length = newlen;
1082 }
1083
1084 if (gettimeofday(&start_time, &dummy) != 0)
1085 perror("bad gettimeofday");
1086
1087
1088 for (i = 0; i < jfriedl_XR; i++)
1089 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1090 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1091
1092 if (gettimeofday(&end_time, &dummy) != 0)
1093 perror("bad gettimeofday");
1094
1095 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1096 -
1097 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1098
1099 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1100 return 0;
1101 }
1102 #endif
1103
1104 /* We come back here after a match when the -o option (only_matching) is set,
1105 in order to find any further matches in the same line. */
1106
1107 ONLY_MATCHING_RESTART:
1108
1109 /* Run through all the patterns until one matches or there is an error other
1110 than NOMATCH. This code is in a subroutine so that it can be re-used for
1111 finding subsequent matches when colouring matched lines. */
1112
1113 match = match_patterns(matchptr, length, offsets, &mrc);
1114
1115 /* If it's a match or a not-match (as required), do what's wanted. */
1116
1117 if (match != invert)
1118 {
1119 BOOL hyphenprinted = FALSE;
1120
1121 /* We've failed if we want a file that doesn't have any matches. */
1122
1123 if (filenames == FN_NOMATCH_ONLY) return 1;
1124
1125 /* Just count if just counting is wanted. */
1126
1127 if (count_only) count++;
1128
1129 /* If all we want is a file name, there is no need to scan any more lines
1130 in the file. */
1131
1132 else if (filenames == FN_MATCH_ONLY)
1133 {
1134 fprintf(stdout, "%s\n", printname);
1135 return 0;
1136 }
1137
1138 /* Likewise, if all we want is a yes/no answer. */
1139
1140 else if (quiet) return 0;
1141
1142 /* The --only-matching option prints just the substring that matched, and
1143 the --file-offsets and --line-offsets options output offsets for the
1144 matching substring (they both force --only-matching). None of these options
1145 prints any context. Afterwards, adjust the start and length, and then jump
1146 back to look for further matches in the same line. If we are in invert
1147 mode, however, nothing is printed - this could be still useful because the
1148 return code is set. */
1149
1150 else if (only_matching)
1151 {
1152 if (!invert)
1153 {
1154 if (printname != NULL) fprintf(stdout, "%s:", printname);
1155 if (number) fprintf(stdout, "%d:", linenumber);
1156 if (line_offsets)
1157 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1158 offsets[1] - offsets[0]);
1159 else if (file_offsets)
1160 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1161 offsets[1] - offsets[0]);
1162 else
1163 {
1164 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1165 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1166 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1167 }
1168 fprintf(stdout, "\n");
1169 matchptr += offsets[1];
1170 length -= offsets[1];
1171 match = FALSE;
1172 goto ONLY_MATCHING_RESTART;
1173 }
1174 }
1175
1176 /* This is the default case when none of the above options is set. We print
1177 the matching lines(s), possibly preceded and/or followed by other lines of
1178 context. */
1179
1180 else
1181 {
1182 /* See if there is a requirement to print some "after" lines from a
1183 previous match. We never print any overlaps. */
1184
1185 if (after_context > 0 && lastmatchnumber > 0)
1186 {
1187 int ellength;
1188 int linecount = 0;
1189 char *p = lastmatchrestart;
1190
1191 while (p < ptr && linecount < after_context)
1192 {
1193 p = end_of_line(p, ptr, &ellength);
1194 linecount++;
1195 }
1196
1197 /* It is important to advance lastmatchrestart during this printing so
1198 that it interacts correctly with any "before" printing below. Print
1199 each line's data using fwrite() in case there are binary zeroes. */
1200
1201 while (lastmatchrestart < p)
1202 {
1203 char *pp = lastmatchrestart;
1204 if (printname != NULL) fprintf(stdout, "%s-", printname);
1205 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1206 pp = end_of_line(pp, endptr, &ellength);
1207 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1208 lastmatchrestart = pp;
1209 }
1210 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1211 }
1212
1213 /* If there were non-contiguous lines printed above, insert hyphens. */
1214
1215 if (hyphenpending)
1216 {
1217 fprintf(stdout, "--\n");
1218 hyphenpending = FALSE;
1219 hyphenprinted = TRUE;
1220 }
1221
1222 /* See if there is a requirement to print some "before" lines for this
1223 match. Again, don't print overlaps. */
1224
1225 if (before_context > 0)
1226 {
1227 int linecount = 0;
1228 char *p = ptr;
1229
1230 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1231 linecount < before_context)
1232 {
1233 linecount++;
1234 p = previous_line(p, buffer);
1235 }
1236
1237 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1238 fprintf(stdout, "--\n");
1239
1240 while (p < ptr)
1241 {
1242 int ellength;
1243 char *pp = p;
1244 if (printname != NULL) fprintf(stdout, "%s-", printname);
1245 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1246 pp = end_of_line(pp, endptr, &ellength);
1247 FWRITE(p, 1, pp - p, stdout);
1248 p = pp;
1249 }
1250 }
1251
1252 /* Now print the matching line(s); ensure we set hyphenpending at the end
1253 of the file if any context lines are being output. */
1254
1255 if (after_context > 0 || before_context > 0)
1256 endhyphenpending = TRUE;
1257
1258 if (printname != NULL) fprintf(stdout, "%s:", printname);
1259 if (number) fprintf(stdout, "%d:", linenumber);
1260
1261 /* In multiline mode, we want to print to the end of the line in which
1262 the end of the matched string is found, so we adjust linelength and the
1263 line number appropriately, but only when there actually was a match
1264 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1265 the match will always be before the first newline sequence. */
1266
1267 if (multiline)
1268 {
1269 int ellength;
1270 char *endmatch = ptr;
1271 if (!invert)
1272 {
1273 endmatch += offsets[1];
1274 t = ptr;
1275 while (t < endmatch)
1276 {
1277 t = end_of_line(t, endptr, &ellength);
1278 if (t <= endmatch) linenumber++; else break;
1279 }
1280 }
1281 endmatch = end_of_line(endmatch, endptr, &ellength);
1282 linelength = endmatch - ptr - ellength;
1283 }
1284
1285 /*** NOTE: Use only fwrite() to output the data line, so that binary
1286 zeroes are treated as just another data character. */
1287
1288 /* This extra option, for Jeffrey Friedl's debugging requirements,
1289 replaces the matched string, or a specific captured string if it exists,
1290 with X. When this happens, colouring is ignored. */
1291
1292 #ifdef JFRIEDL_DEBUG
1293 if (S_arg >= 0 && S_arg < mrc)
1294 {
1295 int first = S_arg * 2;
1296 int last = first + 1;
1297 FWRITE(ptr, 1, offsets[first], stdout);
1298 fprintf(stdout, "X");
1299 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1300 }
1301 else
1302 #endif
1303
1304 /* We have to split the line(s) up if colouring, and search for further
1305 matches. */
1306
1307 if (do_colour)
1308 {
1309 int last_offset = 0;
1310 FWRITE(ptr, 1, offsets[0], stdout);
1311 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1312 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1313 fprintf(stdout, "%c[00m", 0x1b);
1314 for (;;)
1315 {
1316 last_offset += offsets[1];
1317 matchptr += offsets[1];
1318 length -= offsets[1];
1319 if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1320 FWRITE(matchptr, 1, offsets[0], stdout);
1321 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1322 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1323 fprintf(stdout, "%c[00m", 0x1b);
1324 }
1325 FWRITE(ptr + last_offset, 1,
1326 (linelength + endlinelength) - last_offset, stdout);
1327 }
1328
1329 /* Not colouring; no need to search for further matches */
1330
1331 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1332 }
1333
1334 /* End of doing what has to be done for a match. If --line-buffered was
1335 given, flush the output. */
1336
1337 if (line_buffered) fflush(stdout);
1338 rc = 0; /* Had some success */
1339
1340 /* Remember where the last match happened for after_context. We remember
1341 where we are about to restart, and that line's number. */
1342
1343 lastmatchrestart = ptr + linelength + endlinelength;
1344 lastmatchnumber = linenumber + 1;
1345 }
1346
1347 /* For a match in multiline inverted mode (which of course did not cause
1348 anything to be printed), we have to move on to the end of the match before
1349 proceeding. */
1350
1351 if (multiline && invert && match)
1352 {
1353 int ellength;
1354 char *endmatch = ptr + offsets[1];
1355 t = ptr;
1356 while (t < endmatch)
1357 {
1358 t = end_of_line(t, endptr, &ellength);
1359 if (t <= endmatch) linenumber++; else break;
1360 }
1361 endmatch = end_of_line(endmatch, endptr, &ellength);
1362 linelength = endmatch - ptr - ellength;
1363 }
1364
1365 /* Advance to after the newline and increment the line number. The file
1366 offset to the current line is maintained in filepos. */
1367
1368 ptr += linelength + endlinelength;
1369 filepos += (int)(linelength + endlinelength);
1370 linenumber++;
1371
1372 /* If input is line buffered, and the buffer is not yet full, read another
1373 line and add it into the buffer. */
1374
1375 if (input_line_buffered && bufflength < sizeof(buffer))
1376 {
1377 int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1378 bufflength += add;
1379 endptr += add;
1380 }
1381
1382 /* If we haven't yet reached the end of the file (the buffer is full), and
1383 the current point is in the top 1/3 of the buffer, slide the buffer down by
1384 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1385 about to be lost, print them. */
1386
1387 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1388 {
1389 if (after_context > 0 &&
1390 lastmatchnumber > 0 &&
1391 lastmatchrestart < buffer + MBUFTHIRD)
1392 {
1393 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1394 lastmatchnumber = 0;
1395 }
1396
1397 /* Now do the shuffle */
1398
1399 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1400 ptr -= MBUFTHIRD;
1401
1402 #ifdef SUPPORT_LIBZ
1403 if (frtype == FR_LIBZ)
1404 bufflength = 2*MBUFTHIRD +
1405 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1406 else
1407 #endif
1408
1409 #ifdef SUPPORT_LIBBZ2
1410 if (frtype == FR_LIBBZ2)
1411 bufflength = 2*MBUFTHIRD +
1412 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1413 else
1414 #endif
1415
1416 bufflength = 2*MBUFTHIRD +
1417 (input_line_buffered?
1418 read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1419 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1420 endptr = buffer + bufflength;
1421
1422 /* Adjust any last match point */
1423
1424 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1425 }
1426 } /* Loop through the whole file */
1427
1428 /* End of file; print final "after" lines if wanted; do_after_lines sets
1429 hyphenpending if it prints something. */
1430
1431 if (!only_matching && !count_only)
1432 {
1433 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1434 hyphenpending |= endhyphenpending;
1435 }
1436
1437 /* Print the file name if we are looking for those without matches and there
1438 were none. If we found a match, we won't have got this far. */
1439
1440 if (filenames == FN_NOMATCH_ONLY)
1441 {
1442 fprintf(stdout, "%s\n", printname);
1443 return 0;
1444 }
1445
1446 /* Print the match count if wanted */
1447
1448 if (count_only)
1449 {
1450 if (count > 0 || !omit_zero_count)
1451 {
1452 if (printname != NULL && filenames != FN_NONE)
1453 fprintf(stdout, "%s:", printname);
1454 fprintf(stdout, "%d\n", count);
1455 }
1456 }
1457
1458 return rc;
1459 }
1460
1461
1462
1463 /*************************************************
1464 * Grep a file or recurse into a directory *
1465 *************************************************/
1466
1467 /* Given a path name, if it's a directory, scan all the files if we are
1468 recursing; if it's a file, grep it.
1469
1470 Arguments:
1471 pathname the path to investigate
1472 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1473 only_one_at_top TRUE if the path is the only one at toplevel
1474
1475 Returns: 0 if there was at least one match
1476 1 if there were no matches
1477 2 there was some kind of error
1478
1479 However, file opening failures are suppressed if "silent" is set.
1480 */
1481
1482 static int
1483 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1484 {
1485 int rc = 1;
1486 int sep;
1487 int frtype;
1488 int pathlen;
1489 void *handle;
1490 FILE *in = NULL; /* Ensure initialized */
1491
1492 #ifdef SUPPORT_LIBZ
1493 gzFile ingz = NULL;
1494 #endif
1495
1496 #ifdef SUPPORT_LIBBZ2
1497 BZFILE *inbz2 = NULL;
1498 #endif
1499
1500 /* If the file name is "-" we scan stdin */
1501
1502 if (strcmp(pathname, "-") == 0)
1503 {
1504 return pcregrep(stdin, FR_PLAIN,
1505 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1506 stdin_name : NULL);
1507 }
1508
1509 /* If the file is a directory, skip if skipping or if we are recursing, scan
1510 each file and directory within it, subject to any include or exclude patterns
1511 that were set. The scanning code is localized so it can be made
1512 system-specific. */
1513
1514 if ((sep = isdirectory(pathname)) != 0)
1515 {
1516 if (dee_action == dee_SKIP) return 1;
1517 if (dee_action == dee_RECURSE)
1518 {
1519 char buffer[1024];
1520 char *nextfile;
1521 directory_type *dir = opendirectory(pathname);
1522
1523 if (dir == NULL)
1524 {
1525 if (!silent)
1526 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1527 strerror(errno));
1528 return 2;
1529 }
1530
1531 while ((nextfile = readdirectory(dir)) != NULL)
1532 {
1533 int frc, nflen;
1534 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1535 nflen = (int)(strlen(nextfile));
1536
1537 if (isdirectory(buffer))
1538 {
1539 if (exclude_dir_compiled != NULL &&
1540 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1541 continue;
1542
1543 if (include_dir_compiled != NULL &&
1544 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1545 continue;
1546 }
1547 else
1548 {
1549 if (exclude_compiled != NULL &&
1550 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1551 continue;
1552
1553 if (include_compiled != NULL &&
1554 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1555 continue;
1556 }
1557
1558 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1559 if (frc > 1) rc = frc;
1560 else if (frc == 0 && rc == 1) rc = 0;
1561 }
1562
1563 closedirectory(dir);
1564 return rc;
1565 }
1566 }
1567
1568 /* If the file is not a directory and not a regular file, skip it if that's
1569 been requested. */
1570
1571 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1572
1573 /* Control reaches here if we have a regular file, or if we have a directory
1574 and recursion or skipping was not requested, or if we have anything else and
1575 skipping was not requested. The scan proceeds. If this is the first and only
1576 argument at top level, we don't show the file name, unless we are only showing
1577 the file name, or the filename was forced (-H). */
1578
1579 pathlen = (int)(strlen(pathname));
1580
1581 /* Open using zlib if it is supported and the file name ends with .gz. */
1582
1583 #ifdef SUPPORT_LIBZ
1584 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1585 {
1586 ingz = gzopen(pathname, "rb");
1587 if (ingz == NULL)
1588 {
1589 if (!silent)
1590 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1591 strerror(errno));
1592 return 2;
1593 }
1594 handle = (void *)ingz;
1595 frtype = FR_LIBZ;
1596 }
1597 else
1598 #endif
1599
1600 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1601
1602 #ifdef SUPPORT_LIBBZ2
1603 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1604 {
1605 inbz2 = BZ2_bzopen(pathname, "rb");
1606 handle = (void *)inbz2;
1607 frtype = FR_LIBBZ2;
1608 }
1609 else
1610 #endif
1611
1612 /* Otherwise use plain fopen(). The label is so that we can come back here if
1613 an attempt to read a .bz2 file indicates that it really is a plain file. */
1614
1615 #ifdef SUPPORT_LIBBZ2
1616 PLAIN_FILE:
1617 #endif
1618 {
1619 in = fopen(pathname, "rb");
1620 handle = (void *)in;
1621 frtype = FR_PLAIN;
1622 }
1623
1624 /* All the opening methods return errno when they fail. */
1625
1626 if (handle == NULL)
1627 {
1628 if (!silent)
1629 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1630 strerror(errno));
1631 return 2;
1632 }
1633
1634 /* Now grep the file */
1635
1636 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1637 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1638
1639 /* Close in an appropriate manner. */
1640
1641 #ifdef SUPPORT_LIBZ
1642 if (frtype == FR_LIBZ)
1643 gzclose(ingz);
1644 else
1645 #endif
1646
1647 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1648 read failed. If the error indicates that the file isn't in fact bzipped, try
1649 again as a normal file. */
1650
1651 #ifdef SUPPORT_LIBBZ2
1652 if (frtype == FR_LIBBZ2)
1653 {
1654 if (rc == 2)
1655 {
1656 int errnum;
1657 const char *err = BZ2_bzerror(inbz2, &errnum);
1658 if (errnum == BZ_DATA_ERROR_MAGIC)
1659 {
1660 BZ2_bzclose(inbz2);
1661 goto PLAIN_FILE;
1662 }
1663 else if (!silent)
1664 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1665 pathname, err);
1666 }
1667 BZ2_bzclose(inbz2);
1668 }
1669 else
1670 #endif
1671
1672 /* Normal file close */
1673
1674 fclose(in);
1675
1676 /* Pass back the yield from pcregrep(). */
1677
1678 return rc;
1679 }
1680
1681
1682
1683
1684 /*************************************************
1685 * Usage function *
1686 *************************************************/
1687
1688 static int
1689 usage(int rc)
1690 {
1691 option_item *op;
1692 fprintf(stderr, "Usage: pcregrep [-");
1693 for (op = optionlist; op->one_char != 0; op++)
1694 {
1695 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1696 }
1697 fprintf(stderr, "] [long options] [pattern] [files]\n");
1698 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1699 "options.\n");
1700 return rc;
1701 }
1702
1703
1704
1705
1706 /*************************************************
1707 * Help function *
1708 *************************************************/
1709
1710 static void
1711 help(void)
1712 {
1713 option_item *op;
1714
1715 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1716 printf("Search for PATTERN in each FILE or standard input.\n");
1717 printf("PATTERN must be present if neither -e nor -f is used.\n");
1718 printf("\"-\" can be used as a file name to mean STDIN.\n");
1719
1720 #ifdef SUPPORT_LIBZ
1721 printf("Files whose names end in .gz are read using zlib.\n");
1722 #endif
1723
1724 #ifdef SUPPORT_LIBBZ2
1725 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1726 #endif
1727
1728 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1729 printf("Other files and the standard input are read as plain files.\n\n");
1730 #else
1731 printf("All files are read as plain files, without any interpretation.\n\n");
1732 #endif
1733
1734 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1735 printf("Options:\n");
1736
1737 for (op = optionlist; op->one_char != 0; op++)
1738 {
1739 int n;
1740 char s[4];
1741 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1742 n = 30 - printf(" %s --%s", s, op->long_name);
1743 if (n < 1) n = 1;
1744 printf("%.*s%s\n", n, " ", op->help_text);
1745 }
1746
1747 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1748 printf("trailing white space is removed and blank lines are ignored.\n");
1749 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1750
1751 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1752 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1753 }
1754
1755
1756
1757
1758 /*************************************************
1759 * Handle a single-letter, no data option *
1760 *************************************************/
1761
1762 static int
1763 handle_option(int letter, int options)
1764 {
1765 switch(letter)
1766 {
1767 case N_FOFFSETS: file_offsets = TRUE; break;
1768 case N_HELP: help(); exit(0);
1769 case N_LOFFSETS: line_offsets = number = TRUE; break;
1770 case N_LBUFFER: line_buffered = TRUE; break;
1771 case 'c': count_only = TRUE; break;
1772 case 'F': process_options |= PO_FIXED_STRINGS; break;
1773 case 'H': filenames = FN_FORCE; break;
1774 case 'h': filenames = FN_NONE; break;
1775 case 'i': options |= PCRE_CASELESS; break;
1776 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1777 case 'L': filenames = FN_NOMATCH_ONLY; break;
1778 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1779 case 'n': number = TRUE; break;
1780 case 'o': only_matching = TRUE; break;
1781 case 'q': quiet = TRUE; break;
1782 case 'r': dee_action = dee_RECURSE; break;
1783 case 's': silent = TRUE; break;
1784 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1785 case 'v': invert = TRUE; break;
1786 case 'w': process_options |= PO_WORD_MATCH; break;
1787 case 'x': process_options |= PO_LINE_MATCH; break;
1788
1789 case 'V':
1790 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1791 exit(0);
1792 break;
1793
1794 default:
1795 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1796 exit(usage(2));
1797 }
1798
1799 return options;
1800 }
1801
1802
1803
1804
1805 /*************************************************
1806 * Construct printed ordinal *
1807 *************************************************/
1808
1809 /* This turns a number into "1st", "3rd", etc. */
1810
1811 static char *
1812 ordin(int n)
1813 {
1814 static char buffer[8];
1815 char *p = buffer;
1816 sprintf(p, "%d", n);
1817 while (*p != 0) p++;
1818 switch (n%10)
1819 {
1820 case 1: strcpy(p, "st"); break;
1821 case 2: strcpy(p, "nd"); break;
1822 case 3: strcpy(p, "rd"); break;
1823 default: strcpy(p, "th"); break;
1824 }
1825 return buffer;
1826 }
1827
1828
1829
1830 /*************************************************
1831 * Compile a single pattern *
1832 *************************************************/
1833
1834 /* When the -F option has been used, this is called for each substring.
1835 Otherwise it's called for each supplied pattern.
1836
1837 Arguments:
1838 pattern the pattern string
1839 options the PCRE options
1840 filename the file name, or NULL for a command-line pattern
1841 count 0 if this is the only command line pattern, or
1842 number of the command line pattern, or
1843 linenumber for a pattern from a file
1844
1845 Returns: TRUE on success, FALSE after an error
1846 */
1847
1848 static BOOL
1849 compile_single_pattern(char *pattern, int options, char *filename, int count)
1850 {
1851 char buffer[MBUFTHIRD + 16];
1852 const char *error;
1853 int errptr;
1854
1855 if (pattern_count >= MAX_PATTERN_COUNT)
1856 {
1857 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1858 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1859 return FALSE;
1860 }
1861
1862 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1863 suffix[process_options]);
1864 pattern_list[pattern_count] =
1865 pcre_compile(buffer, options, &error, &errptr, pcretables);
1866 if (pattern_list[pattern_count] != NULL)
1867 {
1868 pattern_count++;
1869 return TRUE;
1870 }
1871
1872 /* Handle compile errors */
1873
1874 errptr -= (int)strlen(prefix[process_options]);
1875 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1876
1877 if (filename == NULL)
1878 {
1879 if (count == 0)
1880 fprintf(stderr, "pcregrep: Error in command-line regex "
1881 "at offset %d: %s\n", errptr, error);
1882 else
1883 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1884 "at offset %d: %s\n", ordin(count), errptr, error);
1885 }
1886 else
1887 {
1888 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1889 "at offset %d: %s\n", count, filename, errptr, error);
1890 }
1891
1892 return FALSE;
1893 }
1894
1895
1896
1897 /*************************************************
1898 * Compile one supplied pattern *
1899 *************************************************/
1900
1901 /* When the -F option has been used, each string may be a list of strings,
1902 separated by line breaks. They will be matched literally.
1903
1904 Arguments:
1905 pattern the pattern string
1906 options the PCRE options
1907 filename the file name, or NULL for a command-line pattern
1908 count 0 if this is the only command line pattern, or
1909 number of the command line pattern, or
1910 linenumber for a pattern from a file
1911
1912 Returns: TRUE on success, FALSE after an error
1913 */
1914
1915 static BOOL
1916 compile_pattern(char *pattern, int options, char *filename, int count)
1917 {
1918 if ((process_options & PO_FIXED_STRINGS) != 0)
1919 {
1920 char *eop = pattern + strlen(pattern);
1921 char buffer[MBUFTHIRD];
1922 for(;;)
1923 {
1924 int ellength;
1925 char *p = end_of_line(pattern, eop, &ellength);
1926 if (ellength == 0)
1927 return compile_single_pattern(pattern, options, filename, count);
1928 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1929 pattern = p;
1930 if (!compile_single_pattern(buffer, options, filename, count))
1931 return FALSE;
1932 }
1933 }
1934 else return compile_single_pattern(pattern, options, filename, count);
1935 }
1936
1937
1938
1939 /*************************************************
1940 * Main program *
1941 *************************************************/
1942
1943 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1944
1945 int
1946 main(int argc, char **argv)
1947 {
1948 int i, j;
1949 int rc = 1;
1950 int pcre_options = 0;
1951 int cmd_pattern_count = 0;
1952 int hint_count = 0;
1953 int errptr;
1954 BOOL only_one_at_top;
1955 char *patterns[MAX_PATTERN_COUNT];
1956 const char *locale_from = "--locale";
1957 const char *error;
1958
1959 /* Set the default line ending value from the default in the PCRE library;
1960 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1961 Note that the return values from pcre_config(), though derived from the ASCII
1962 codes, are the same in EBCDIC environments, so we must use the actual values
1963 rather than escapes such as as '\r'. */
1964
1965 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1966 switch(i)
1967 {
1968 default: newline = (char *)"lf"; break;
1969 case 13: newline = (char *)"cr"; break;
1970 case (13 << 8) | 10: newline = (char *)"crlf"; break;
1971 case -1: newline = (char *)"any"; break;
1972 case -2: newline = (char *)"anycrlf"; break;
1973 }
1974
1975 /* Process the options */
1976
1977 for (i = 1; i < argc; i++)
1978 {
1979 option_item *op = NULL;
1980 char *option_data = (char *)""; /* default to keep compiler happy */
1981 BOOL longop;
1982 BOOL longopwasequals = FALSE;
1983
1984 if (argv[i][0] != '-') break;
1985
1986 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1987 but only if we have previously had -e or -f to define the patterns. */
1988
1989 if (argv[i][1] == 0)
1990 {
1991 if (pattern_filename != NULL || pattern_count > 0) break;
1992 else exit(usage(2));
1993 }
1994
1995 /* Handle a long name option, or -- to terminate the options */
1996
1997 if (argv[i][1] == '-')
1998 {
1999 char *arg = argv[i] + 2;
2000 char *argequals = strchr(arg, '=');
2001
2002 if (*arg == 0) /* -- terminates options */
2003 {
2004 i++;
2005 break; /* out of the options-handling loop */
2006 }
2007
2008 longop = TRUE;
2009
2010 /* Some long options have data that follows after =, for example file=name.
2011 Some options have variations in the long name spelling: specifically, we
2012 allow "regexp" because GNU grep allows it, though I personally go along
2013 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2014 These options are entered in the table as "regex(p)". Options can be in
2015 both these categories. */
2016
2017 for (op = optionlist; op->one_char != 0; op++)
2018 {
2019 char *opbra = strchr(op->long_name, '(');
2020 char *equals = strchr(op->long_name, '=');
2021
2022 /* Handle options with only one spelling of the name */
2023
2024 if (opbra == NULL) /* Does not contain '(' */
2025 {
2026 if (equals == NULL) /* Not thing=data case */
2027 {
2028 if (strcmp(arg, op->long_name) == 0) break;
2029 }
2030 else /* Special case xxx=data */
2031 {
2032 int oplen = (int)(equals - op->long_name);
2033 int arglen = (argequals == NULL)?
2034 (int)strlen(arg) : (int)(argequals - arg);
2035 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2036 {
2037 option_data = arg + arglen;
2038 if (*option_data == '=')
2039 {
2040 option_data++;
2041 longopwasequals = TRUE;
2042 }
2043 break;
2044 }
2045 }
2046 }
2047
2048 /* Handle options with an alternate spelling of the name */
2049
2050 else
2051 {
2052 char buff1[24];
2053 char buff2[24];
2054
2055 int baselen = (int)(opbra - op->long_name);
2056 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2057 int arglen = (argequals == NULL || equals == NULL)?
2058 (int)strlen(arg) : (int)(argequals - arg);
2059
2060 sprintf(buff1, "%.*s", baselen, op->long_name);
2061 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2062
2063 if (strncmp(arg, buff1, arglen) == 0 ||
2064 strncmp(arg, buff2, arglen) == 0)
2065 {
2066 if (equals != NULL && argequals != NULL)
2067 {
2068 option_data = argequals;
2069 if (*option_data == '=')
2070 {
2071 option_data++;
2072 longopwasequals = TRUE;
2073 }
2074 }
2075 break;
2076 }
2077 }
2078 }
2079
2080 if (op->one_char == 0)
2081 {
2082 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2083 exit(usage(2));
2084 }
2085 }
2086
2087 /* Jeffrey Friedl's debugging harness uses these additional options which
2088 are not in the right form for putting in the option table because they use
2089 only one hyphen, yet are more than one character long. By putting them
2090 separately here, they will not get displayed as part of the help() output,
2091 but I don't think Jeffrey will care about that. */
2092
2093 #ifdef JFRIEDL_DEBUG
2094 else if (strcmp(argv[i], "-pre") == 0) {
2095 jfriedl_prefix = argv[++i];
2096 continue;
2097 } else if (strcmp(argv[i], "-post") == 0) {
2098 jfriedl_postfix = argv[++i];
2099 continue;
2100 } else if (strcmp(argv[i], "-XT") == 0) {
2101 sscanf(argv[++i], "%d", &jfriedl_XT);
2102 continue;
2103 } else if (strcmp(argv[i], "-XR") == 0) {
2104 sscanf(argv[++i], "%d", &jfriedl_XR);
2105 continue;
2106 }
2107 #endif
2108
2109
2110 /* One-char options; many that have no data may be in a single argument; we
2111 continue till we hit the last one or one that needs data. */
2112
2113 else
2114 {
2115 char *s = argv[i] + 1;
2116 longop = FALSE;
2117 while (*s != 0)
2118 {
2119 for (op = optionlist; op->one_char != 0; op++)
2120 { if (*s == op->one_char) break; }
2121 if (op->one_char == 0)
2122 {
2123 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2124 *s, argv[i]);
2125 exit(usage(2));
2126 }
2127 if (op->type != OP_NODATA || s[1] == 0)
2128 {
2129 option_data = s+1;
2130 break;
2131 }
2132 pcre_options = handle_option(*s++, pcre_options);
2133 }
2134 }
2135
2136 /* At this point we should have op pointing to a matched option. If the type
2137 is NO_DATA, it means that there is no data, and the option might set
2138 something in the PCRE options. */
2139
2140 if (op->type == OP_NODATA)
2141 {
2142 pcre_options = handle_option(op->one_char, pcre_options);
2143 continue;
2144 }
2145
2146 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2147 either has a value or defaults to something. It cannot have data in a
2148 separate item. At the moment, the only such options are "colo(u)r" and
2149 Jeffrey Friedl's special -S debugging option. */
2150
2151 if (*option_data == 0 &&
2152 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2153 {
2154 switch (op->one_char)
2155 {
2156 case N_COLOUR:
2157 colour_option = (char *)"auto";
2158 break;
2159 #ifdef JFRIEDL_DEBUG
2160 case 'S':
2161 S_arg = 0;
2162 break;
2163 #endif
2164 }
2165 continue;
2166 }
2167
2168 /* Otherwise, find the data string for the option. */
2169
2170 if (*option_data == 0)
2171 {
2172 if (i >= argc - 1 || longopwasequals)
2173 {
2174 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2175 exit(usage(2));
2176 }
2177 option_data = argv[++i];
2178 }
2179
2180 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2181 multiple times to create a list of patterns. */
2182
2183 if (op->type == OP_PATLIST)
2184 {
2185 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2186 {
2187 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2188 MAX_PATTERN_COUNT);
2189 return 2;
2190 }
2191 patterns[cmd_pattern_count++] = option_data;
2192 }
2193
2194 /* Otherwise, deal with single string or numeric data values. */
2195
2196 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2197 {
2198 *((char **)op->dataptr) = option_data;
2199 }
2200
2201 /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2202 only for unpicking arguments, so just keep it simple. */
2203
2204 else
2205 {
2206 int n = 0;
2207 char *endptr = option_data;
2208 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2209 while (isdigit((unsigned char)(*endptr)))
2210 n = n * 10 + (int)(*endptr++ - '0');
2211 if (*endptr != 0)
2212 {
2213 if (longop)
2214 {
2215 char *equals = strchr(op->long_name, '=');
2216 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2217 (int)(equals - op->long_name);
2218 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2219 option_data, nlen, op->long_name);
2220 }
2221 else
2222 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2223 option_data, op->one_char);
2224 exit(usage(2));
2225 }
2226 *((int *)op->dataptr) = n;
2227 }
2228 }
2229
2230 /* Options have been decoded. If -C was used, its value is used as a default
2231 for -A and -B. */
2232
2233 if (both_context > 0)
2234 {
2235 if (after_context == 0) after_context = both_context;
2236 if (before_context == 0) before_context = both_context;
2237 }
2238
2239 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2240 However, the latter two set the only_matching flag. */
2241
2242 if ((only_matching && (file_offsets || line_offsets)) ||
2243 (file_offsets && line_offsets))
2244 {
2245 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2246 "and/or --line-offsets\n");
2247 exit(usage(2));
2248 }
2249
2250 if (file_offsets || line_offsets) only_matching = TRUE;
2251
2252 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2253 LC_ALL environment variable is set, and if so, use it. */
2254
2255 if (locale == NULL)
2256 {
2257 locale = getenv("LC_ALL");
2258 locale_from = "LCC_ALL";
2259 }
2260
2261 if (locale == NULL)
2262 {
2263 locale = getenv("LC_CTYPE");
2264 locale_from = "LC_CTYPE";
2265 }
2266
2267 /* If a locale has been provided, set it, and generate the tables the PCRE
2268 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2269
2270 if (locale != NULL)
2271 {
2272 if (setlocale(LC_CTYPE, locale) == NULL)
2273 {
2274 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2275 locale, locale_from);
2276 return 2;
2277 }
2278 pcretables = pcre_maketables();
2279 }
2280
2281 /* Sort out colouring */
2282
2283 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2284 {
2285 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2286 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2287 else
2288 {
2289 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2290 colour_option);
2291 return 2;
2292 }
2293 if (do_colour)
2294 {
2295 char *cs = getenv("PCREGREP_COLOUR");
2296 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2297 if (cs != NULL) colour_string = cs;
2298 }
2299 }
2300
2301 /* Interpret the newline type; the default settings are Unix-like. */
2302
2303 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2304 {
2305 pcre_options |= PCRE_NEWLINE_CR;
2306 endlinetype = EL_CR;
2307 }
2308 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2309 {
2310 pcre_options |= PCRE_NEWLINE_LF;
2311 endlinetype = EL_LF;
2312 }
2313 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2314 {
2315 pcre_options |= PCRE_NEWLINE_CRLF;
2316 endlinetype = EL_CRLF;
2317 }
2318 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2319 {
2320 pcre_options |= PCRE_NEWLINE_ANY;
2321 endlinetype = EL_ANY;
2322 }
2323 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2324 {
2325 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2326 endlinetype = EL_ANYCRLF;
2327 }
2328 else
2329 {
2330 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2331 return 2;
2332 }
2333
2334 /* Interpret the text values for -d and -D */
2335
2336 if (dee_option != NULL)
2337 {
2338 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2339 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2340 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2341 else
2342 {
2343 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2344 return 2;
2345 }
2346 }
2347
2348 if (DEE_option != NULL)
2349 {
2350 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2351 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2352 else
2353 {
2354 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2355 return 2;
2356 }
2357 }
2358
2359 /* Check the values for Jeffrey Friedl's debugging options. */
2360
2361 #ifdef JFRIEDL_DEBUG
2362 if (S_arg > 9)
2363 {
2364 fprintf(stderr, "pcregrep: bad value for -S option\n");
2365 return 2;
2366 }
2367 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2368 {
2369 if (jfriedl_XT == 0) jfriedl_XT = 1;
2370 if (jfriedl_XR == 0) jfriedl_XR = 1;
2371 }
2372 #endif
2373
2374 /* Get memory to store the pattern and hints lists. */
2375
2376 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2377 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2378
2379 if (pattern_list == NULL || hints_list == NULL)
2380 {
2381 fprintf(stderr, "pcregrep: malloc failed\n");
2382 goto EXIT2;
2383 }
2384
2385 /* If no patterns were provided by -e, and there is no file provided by -f,
2386 the first argument is the one and only pattern, and it must exist. */
2387
2388 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2389 {
2390 if (i >= argc) return usage(2);
2391 patterns[cmd_pattern_count++] = argv[i++];
2392 }
2393
2394 /* Compile the patterns that were provided on the command line, either by
2395 multiple uses of -e or as a single unkeyed pattern. */
2396
2397 for (j = 0; j < cmd_pattern_count; j++)
2398 {
2399 if (!compile_pattern(patterns[j], pcre_options, NULL,
2400 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2401 goto EXIT2;
2402 }
2403
2404 /* Compile the regular expressions that are provided in a file. */
2405
2406 if (pattern_filename != NULL)
2407 {
2408 int linenumber = 0;
2409 FILE *f;
2410 char *filename;
2411 char buffer[MBUFTHIRD];
2412
2413 if (strcmp(pattern_filename, "-") == 0)
2414 {
2415 f = stdin;
2416 filename = stdin_name;
2417 }
2418 else
2419 {
2420 f = fopen(pattern_filename, "r");
2421 if (f == NULL)
2422 {
2423 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2424 strerror(errno));
2425 goto EXIT2;
2426 }
2427 filename = pattern_filename;
2428 }
2429
2430 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2431 {
2432 char *s = buffer + (int)strlen(buffer);
2433 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2434 *s = 0;
2435 linenumber++;
2436 if (buffer[0] == 0) continue; /* Skip blank lines */
2437 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2438 goto EXIT2;
2439 }
2440
2441 if (f != stdin) fclose(f);
2442 }
2443
2444 /* Study the regular expressions, as we will be running them many times */
2445
2446 for (j = 0; j < pattern_count; j++)
2447 {
2448 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2449 if (error != NULL)
2450 {
2451 char s[16];
2452 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2453 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2454 goto EXIT2;
2455 }
2456 hint_count++;
2457 }
2458
2459 /* If there are include or exclude patterns, compile them. */
2460
2461 if (exclude_pattern != NULL)
2462 {
2463 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2464 pcretables);
2465 if (exclude_compiled == NULL)
2466 {
2467 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2468 errptr, error);
2469 goto EXIT2;
2470 }
2471 }
2472
2473 if (include_pattern != NULL)
2474 {
2475 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2476 pcretables);
2477 if (include_compiled == NULL)
2478 {
2479 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2480 errptr, error);
2481 goto EXIT2;
2482 }
2483 }
2484
2485 if (exclude_dir_pattern != NULL)
2486 {
2487 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2488 pcretables);
2489 if (exclude_dir_compiled == NULL)
2490 {
2491 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2492 errptr, error);
2493 goto EXIT2;
2494 }
2495 }
2496
2497 if (include_dir_pattern != NULL)
2498 {
2499 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2500 pcretables);
2501 if (include_dir_compiled == NULL)
2502 {
2503 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2504 errptr, error);
2505 goto EXIT2;
2506 }
2507 }
2508
2509 /* If there are no further arguments, do the business on stdin and exit. */
2510
2511 if (i >= argc)
2512 {
2513 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2514 goto EXIT;
2515 }
2516
2517 /* Otherwise, work through the remaining arguments as files or directories.
2518 Pass in the fact that there is only one argument at top level - this suppresses
2519 the file name if the argument is not a directory and filenames are not
2520 otherwise forced. */
2521
2522 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2523
2524 for (; i < argc; i++)
2525 {
2526 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2527 only_one_at_top);
2528 if (frc > 1) rc = frc;
2529 else if (frc == 0 && rc == 1) rc = 0;
2530 }
2531
2532 EXIT:
2533 if (pattern_list != NULL)
2534 {
2535 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2536 free(pattern_list);
2537 }
2538 if (hints_list != NULL)
2539 {
2540 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2541 free(hints_list);
2542 }
2543 return rc;
2544
2545 EXIT2:
2546 rc = 2;
2547 goto EXIT;
2548 }
2549
2550 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12