/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 117 - (show annotations) (download)
Fri Mar 9 15:59:06 2007 UTC (7 years, 6 months ago) by ph10
File MIME type: text/plain
File size: 57116 byte(s)
Update copyright years to 2007.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 # include <config.h>
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53 #include <unistd.h>
54
55 #include "pcre.h"
56
57 #define FALSE 0
58 #define TRUE 1
59
60 typedef int BOOL;
61
62 #define MAX_PATTERN_COUNT 100
63
64 #if BUFSIZ > 8192
65 #define MBUFTHIRD BUFSIZ
66 #else
67 #define MBUFTHIRD 8192
68 #endif
69
70 /* Values for the "filenames" variable, which specifies options for file name
71 output. The order is important; it is assumed that a file name is wanted for
72 all values greater than FN_DEFAULT. */
73
74 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
75
76 /* Actions for the -d and -D options */
77
78 enum { dee_READ, dee_SKIP, dee_RECURSE };
79 enum { DEE_READ, DEE_SKIP };
80
81 /* Actions for special processing options (flag bits) */
82
83 #define PO_WORD_MATCH 0x0001
84 #define PO_LINE_MATCH 0x0002
85 #define PO_FIXED_STRINGS 0x0004
86
87 /* Line ending types */
88
89 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };
90
91
92
93 /*************************************************
94 * Global variables *
95 *************************************************/
96
97 /* Jeffrey Friedl has some debugging requirements that are not part of the
98 regular code. */
99
100 #ifdef JFRIEDL_DEBUG
101 static int S_arg = -1;
102 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
103 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
104 static const char *jfriedl_prefix = "";
105 static const char *jfriedl_postfix = "";
106 #endif
107
108 static int endlinetype;
109
110 static char *colour_string = (char *)"1;31";
111 static char *colour_option = NULL;
112 static char *dee_option = NULL;
113 static char *DEE_option = NULL;
114 static char *newline = NULL;
115 static char *pattern_filename = NULL;
116 static char *stdin_name = (char *)"(standard input)";
117 static char *locale = NULL;
118
119 static const unsigned char *pcretables = NULL;
120
121 static int pattern_count = 0;
122 static pcre **pattern_list;
123 static pcre_extra **hints_list;
124
125 static char *include_pattern = NULL;
126 static char *exclude_pattern = NULL;
127
128 static pcre *include_compiled = NULL;
129 static pcre *exclude_compiled = NULL;
130
131 static int after_context = 0;
132 static int before_context = 0;
133 static int both_context = 0;
134 static int dee_action = dee_READ;
135 static int DEE_action = DEE_READ;
136 static int error_count = 0;
137 static int filenames = FN_DEFAULT;
138 static int process_options = 0;
139
140 static BOOL count_only = FALSE;
141 static BOOL do_colour = FALSE;
142 static BOOL hyphenpending = FALSE;
143 static BOOL invert = FALSE;
144 static BOOL multiline = FALSE;
145 static BOOL number = FALSE;
146 static BOOL only_matching = FALSE;
147 static BOOL quiet = FALSE;
148 static BOOL silent = FALSE;
149 static BOOL utf8 = FALSE;
150
151 /* Structure for options and list of them */
152
153 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
154 OP_PATLIST };
155
156 typedef struct option_item {
157 int type;
158 int one_char;
159 void *dataptr;
160 const char *long_name;
161 const char *help_text;
162 } option_item;
163
164 /* Options without a single-letter equivalent get a negative value. This can be
165 used to identify them. */
166
167 #define N_COLOUR (-1)
168 #define N_EXCLUDE (-2)
169 #define N_HELP (-3)
170 #define N_INCLUDE (-4)
171 #define N_LABEL (-5)
172 #define N_LOCALE (-6)
173 #define N_NULL (-7)
174
175 static option_item optionlist[] = {
176 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
177 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
178 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
179 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
180 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
181 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
182 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
183 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
184 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
185 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
186 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
187 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
188 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
189 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
190 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
191 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
192 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
193 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
194 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
195 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
196 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
197 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LR, CRLF)" },
198 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
199 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
200 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
201 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
202 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
203 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
204 #ifdef JFRIEDL_DEBUG
205 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
206 #endif
207 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
208 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
209 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
210 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
211 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
212 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
213 { OP_NODATA, 0, NULL, NULL, NULL }
214 };
215
216 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
217 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
218 that the combination of -w and -x has the same effect as -x on its own, so we
219 can treat them as the same. */
220
221 static const char *prefix[] = {
222 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
223
224 static const char *suffix[] = {
225 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
226
227 /* UTF-8 tables - used only when the newline setting is "all". */
228
229 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
230
231 const char utf8_table4[] = {
232 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
233 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
234 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
235 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
236
237
238
239 /*************************************************
240 * OS-specific functions *
241 *************************************************/
242
243 /* These functions are defined so that they can be made system specific,
244 although at present the only ones are for Unix, Win32, and for "no support". */
245
246
247 /************* Directory scanning in Unix ***********/
248
249 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
250 #include <sys/types.h>
251 #include <sys/stat.h>
252 #include <dirent.h>
253
254 typedef DIR directory_type;
255
256 static int
257 isdirectory(char *filename)
258 {
259 struct stat statbuf;
260 if (stat(filename, &statbuf) < 0)
261 return 0; /* In the expectation that opening as a file will fail */
262 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
263 }
264
265 static directory_type *
266 opendirectory(char *filename)
267 {
268 return opendir(filename);
269 }
270
271 static char *
272 readdirectory(directory_type *dir)
273 {
274 for (;;)
275 {
276 struct dirent *dent = readdir(dir);
277 if (dent == NULL) return NULL;
278 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
279 return dent->d_name;
280 }
281 return NULL; /* Keep compiler happy; never executed */
282 }
283
284 static void
285 closedirectory(directory_type *dir)
286 {
287 closedir(dir);
288 }
289
290
291 /************* Test for regular file in Unix **********/
292
293 static int
294 isregfile(char *filename)
295 {
296 struct stat statbuf;
297 if (stat(filename, &statbuf) < 0)
298 return 1; /* In the expectation that opening as a file will fail */
299 return (statbuf.st_mode & S_IFMT) == S_IFREG;
300 }
301
302
303 /************* Test stdout for being a terminal in Unix **********/
304
305 static BOOL
306 is_stdout_tty(void)
307 {
308 return isatty(fileno(stdout));
309 }
310
311
312 /************* Directory scanning in Win32 ***********/
313
314 /* I (Philip Hazel) have no means of testing this code. It was contributed by
315 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
316 when it did not exist. */
317
318
319 #elif HAVE_WINDOWS_H
320
321 #ifndef STRICT
322 # define STRICT
323 #endif
324 #ifndef WIN32_LEAN_AND_MEAN
325 # define WIN32_LEAN_AND_MEAN
326 #endif
327 #ifndef INVALID_FILE_ATTRIBUTES
328 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
329 #endif
330
331 #include <windows.h>
332
333 typedef struct directory_type
334 {
335 HANDLE handle;
336 BOOL first;
337 WIN32_FIND_DATA data;
338 } directory_type;
339
340 int
341 isdirectory(char *filename)
342 {
343 DWORD attr = GetFileAttributes(filename);
344 if (attr == INVALID_FILE_ATTRIBUTES)
345 return 0;
346 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
347 }
348
349 directory_type *
350 opendirectory(char *filename)
351 {
352 size_t len;
353 char *pattern;
354 directory_type *dir;
355 DWORD err;
356 len = strlen(filename);
357 pattern = (char *) malloc(len + 3);
358 dir = (directory_type *) malloc(sizeof(*dir));
359 if ((pattern == NULL) || (dir == NULL))
360 {
361 fprintf(stderr, "pcregrep: malloc failed\n");
362 exit(2);
363 }
364 memcpy(pattern, filename, len);
365 memcpy(&(pattern[len]), "\\*", 3);
366 dir->handle = FindFirstFile(pattern, &(dir->data));
367 if (dir->handle != INVALID_HANDLE_VALUE)
368 {
369 free(pattern);
370 dir->first = TRUE;
371 return dir;
372 }
373 err = GetLastError();
374 free(pattern);
375 free(dir);
376 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
377 return NULL;
378 }
379
380 char *
381 readdirectory(directory_type *dir)
382 {
383 for (;;)
384 {
385 if (!dir->first)
386 {
387 if (!FindNextFile(dir->handle, &(dir->data)))
388 return NULL;
389 }
390 else
391 {
392 dir->first = FALSE;
393 }
394 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
395 return dir->data.cFileName;
396 }
397 #ifndef _MSC_VER
398 return NULL; /* Keep compiler happy; never executed */
399 #endif
400 }
401
402 void
403 closedirectory(directory_type *dir)
404 {
405 FindClose(dir->handle);
406 free(dir);
407 }
408
409
410 /************* Test for regular file in Win32 **********/
411
412 /* I don't know how to do this, or if it can be done; assume all paths are
413 regular if they are not directories. */
414
415 int isregfile(char *filename)
416 {
417 return !isdirectory(filename)
418 }
419
420
421 /************* Test stdout for being a terminal in Win32 **********/
422
423 /* I don't know how to do this; assume never */
424
425 static BOOL
426 is_stdout_tty(void)
427 {
428 FALSE;
429 }
430
431
432 /************* Directory scanning when we can't do it ***********/
433
434 /* The type is void, and apart from isdirectory(), the functions do nothing. */
435
436 #else
437
438 typedef void directory_type;
439
440 int isdirectory(char *filename) { return 0; }
441 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
442 char *readdirectory(directory_type *dir) { return (char*)0;}
443 void closedirectory(directory_type *dir) {}
444
445
446 /************* Test for regular when we can't do it **********/
447
448 /* Assume all files are regular. */
449
450 int isregfile(char *filename) { return 1; }
451
452
453 /************* Test stdout for being a terminal when we can't do it **********/
454
455 static BOOL
456 is_stdout_tty(void)
457 {
458 return FALSE;
459 }
460
461
462 #endif
463
464
465
466 #if ! HAVE_STRERROR
467 /*************************************************
468 * Provide strerror() for non-ANSI libraries *
469 *************************************************/
470
471 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
472 in their libraries, but can provide the same facility by this simple
473 alternative function. */
474
475 extern int sys_nerr;
476 extern char *sys_errlist[];
477
478 char *
479 strerror(int n)
480 {
481 if (n < 0 || n >= sys_nerr) return "unknown error number";
482 return sys_errlist[n];
483 }
484 #endif /* HAVE_STRERROR */
485
486
487
488 /*************************************************
489 * Find end of line *
490 *************************************************/
491
492 /* The length of the endline sequence that is found is set via lenptr. This may
493 be zero at the very end of the file if there is no line-ending sequence there.
494
495 Arguments:
496 p current position in line
497 endptr end of available data
498 lenptr where to put the length of the eol sequence
499
500 Returns: pointer to the last byte of the line
501 */
502
503 static char *
504 end_of_line(char *p, char *endptr, int *lenptr)
505 {
506 switch(endlinetype)
507 {
508 default: /* Just in case */
509 case EL_LF:
510 while (p < endptr && *p != '\n') p++;
511 if (p < endptr)
512 {
513 *lenptr = 1;
514 return p + 1;
515 }
516 *lenptr = 0;
517 return endptr;
518
519 case EL_CR:
520 while (p < endptr && *p != '\r') p++;
521 if (p < endptr)
522 {
523 *lenptr = 1;
524 return p + 1;
525 }
526 *lenptr = 0;
527 return endptr;
528
529 case EL_CRLF:
530 for (;;)
531 {
532 while (p < endptr && *p != '\r') p++;
533 if (++p >= endptr)
534 {
535 *lenptr = 0;
536 return endptr;
537 }
538 if (*p == '\n')
539 {
540 *lenptr = 2;
541 return p + 1;
542 }
543 }
544 break;
545
546 case EL_ANY:
547 while (p < endptr)
548 {
549 int extra = 0;
550 register int c = *((unsigned char *)p);
551
552 if (utf8 && c >= 0xc0)
553 {
554 int gcii, gcss;
555 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
556 gcss = 6*extra;
557 c = (c & utf8_table3[extra]) << gcss;
558 for (gcii = 1; gcii <= extra; gcii++)
559 {
560 gcss -= 6;
561 c |= (p[gcii] & 0x3f) << gcss;
562 }
563 }
564
565 p += 1 + extra;
566
567 switch (c)
568 {
569 case 0x0a: /* LF */
570 case 0x0b: /* VT */
571 case 0x0c: /* FF */
572 *lenptr = 1;
573 return p;
574
575 case 0x0d: /* CR */
576 if (p < endptr && *p == 0x0a)
577 {
578 *lenptr = 2;
579 p++;
580 }
581 else *lenptr = 1;
582 return p;
583
584 case 0x85: /* NEL */
585 *lenptr = utf8? 2 : 1;
586 return p;
587
588 case 0x2028: /* LS */
589 case 0x2029: /* PS */
590 *lenptr = 3;
591 return p;
592
593 default:
594 break;
595 }
596 } /* End of loop for ANY case */
597
598 *lenptr = 0; /* Must have hit the end */
599 return endptr;
600 } /* End of overall switch */
601 }
602
603
604
605 /*************************************************
606 * Find start of previous line *
607 *************************************************/
608
609 /* This is called when looking back for before lines to print.
610
611 Arguments:
612 p start of the subsequent line
613 startptr start of available data
614
615 Returns: pointer to the start of the previous line
616 */
617
618 static char *
619 previous_line(char *p, char *startptr)
620 {
621 switch(endlinetype)
622 {
623 default: /* Just in case */
624 case EL_LF:
625 p--;
626 while (p > startptr && p[-1] != '\n') p--;
627 return p;
628
629 case EL_CR:
630 p--;
631 while (p > startptr && p[-1] != '\n') p--;
632 return p;
633
634 case EL_CRLF:
635 for (;;)
636 {
637 p -= 2;
638 while (p > startptr && p[-1] != '\n') p--;
639 if (p <= startptr + 1 || p[-2] == '\r') return p;
640 }
641 return p; /* But control should never get here */
642
643 case EL_ANY:
644 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
645 if (utf8) while ((*p & 0xc0) == 0x80) p--;
646
647 while (p > startptr)
648 {
649 register int c;
650 char *pp = p - 1;
651
652 if (utf8)
653 {
654 int extra = 0;
655 while ((*pp & 0xc0) == 0x80) pp--;
656 c = *((unsigned char *)pp);
657 if (c >= 0xc0)
658 {
659 int gcii, gcss;
660 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
661 gcss = 6*extra;
662 c = (c & utf8_table3[extra]) << gcss;
663 for (gcii = 1; gcii <= extra; gcii++)
664 {
665 gcss -= 6;
666 c |= (pp[gcii] & 0x3f) << gcss;
667 }
668 }
669 }
670 else c = *((unsigned char *)pp);
671
672 switch (c)
673 {
674 case 0x0a: /* LF */
675 case 0x0b: /* VT */
676 case 0x0c: /* FF */
677 case 0x0d: /* CR */
678 case 0x85: /* NEL */
679 case 0x2028: /* LS */
680 case 0x2029: /* PS */
681 return p;
682
683 default:
684 break;
685 }
686
687 p = pp; /* Back one character */
688 } /* End of loop for ANY case */
689
690 return startptr; /* Hit start of data */
691 } /* End of overall switch */
692 }
693
694
695
696
697
698 /*************************************************
699 * Print the previous "after" lines *
700 *************************************************/
701
702 /* This is called if we are about to lose said lines because of buffer filling,
703 and at the end of the file. The data in the line is written using fwrite() so
704 that a binary zero does not terminate it.
705
706 Arguments:
707 lastmatchnumber the number of the last matching line, plus one
708 lastmatchrestart where we restarted after the last match
709 endptr end of available data
710 printname filename for printing
711
712 Returns: nothing
713 */
714
715 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
716 char *endptr, char *printname)
717 {
718 if (after_context > 0 && lastmatchnumber > 0)
719 {
720 int count = 0;
721 while (lastmatchrestart < endptr && count++ < after_context)
722 {
723 int ellength;
724 char *pp = lastmatchrestart;
725 if (printname != NULL) fprintf(stdout, "%s-", printname);
726 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
727 pp = end_of_line(pp, endptr, &ellength);
728 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
729 lastmatchrestart = pp;
730 }
731 hyphenpending = TRUE;
732 }
733 }
734
735
736
737 /*************************************************
738 * Grep an individual file *
739 *************************************************/
740
741 /* This is called from grep_or_recurse() below. It uses a buffer that is three
742 times the value of MBUFTHIRD. The matching point is never allowed to stray into
743 the top third of the buffer, thus keeping more of the file available for
744 context printing or for multiline scanning. For large files, the pointer will
745 be in the middle third most of the time, so the bottom third is available for
746 "before" context printing.
747
748 Arguments:
749 in the fopened FILE stream
750 printname the file name if it is to be printed for each match
751 or NULL if the file name is not to be printed
752 it cannot be NULL if filenames[_nomatch]_only is set
753
754 Returns: 0 if there was at least one match
755 1 otherwise (no matches)
756 */
757
758 static int
759 pcregrep(FILE *in, char *printname)
760 {
761 int rc = 1;
762 int linenumber = 1;
763 int lastmatchnumber = 0;
764 int count = 0;
765 int offsets[99];
766 char *lastmatchrestart = NULL;
767 char buffer[3*MBUFTHIRD];
768 char *ptr = buffer;
769 char *endptr;
770 size_t bufflength;
771 BOOL endhyphenpending = FALSE;
772
773 /* Do the first read into the start of the buffer and set up the pointer to
774 end of what we have. */
775
776 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
777 endptr = buffer + bufflength;
778
779 /* Loop while the current pointer is not at the end of the file. For large
780 files, endptr will be at the end of the buffer when we are in the middle of the
781 file, but ptr will never get there, because as soon as it gets over 2/3 of the
782 way, the buffer is shifted left and re-filled. */
783
784 while (ptr < endptr)
785 {
786 int i, endlinelength;
787 int mrc = 0;
788 BOOL match = FALSE;
789 char *t = ptr;
790 size_t length, linelength;
791
792 /* At this point, ptr is at the start of a line. We need to find the length
793 of the subject string to pass to pcre_exec(). In multiline mode, it is the
794 length remainder of the data in the buffer. Otherwise, it is the length of
795 the next line. After matching, we always advance by the length of the next
796 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
797 that any match is constrained to be in the first line. */
798
799 t = end_of_line(t, endptr, &endlinelength);
800 linelength = t - ptr - endlinelength;
801 length = multiline? endptr - ptr : linelength;
802
803 /* Extra processing for Jeffrey Friedl's debugging. */
804
805 #ifdef JFRIEDL_DEBUG
806 if (jfriedl_XT || jfriedl_XR)
807 {
808 #include <sys/time.h>
809 #include <time.h>
810 struct timeval start_time, end_time;
811 struct timezone dummy;
812
813 if (jfriedl_XT)
814 {
815 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
816 const char *orig = ptr;
817 ptr = malloc(newlen + 1);
818 if (!ptr) {
819 printf("out of memory");
820 exit(2);
821 }
822 endptr = ptr;
823 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
824 for (i = 0; i < jfriedl_XT; i++) {
825 strncpy(endptr, orig, length);
826 endptr += length;
827 }
828 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
829 length = newlen;
830 }
831
832 if (gettimeofday(&start_time, &dummy) != 0)
833 perror("bad gettimeofday");
834
835
836 for (i = 0; i < jfriedl_XR; i++)
837 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
838
839 if (gettimeofday(&end_time, &dummy) != 0)
840 perror("bad gettimeofday");
841
842 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
843 -
844 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
845
846 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
847 return 0;
848 }
849 #endif
850
851
852 /* Run through all the patterns until one matches. Note that we don't include
853 the final newline in the subject string. */
854
855 for (i = 0; i < pattern_count; i++)
856 {
857 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
858 offsets, 99);
859 if (mrc >= 0) { match = TRUE; break; }
860 if (mrc != PCRE_ERROR_NOMATCH)
861 {
862 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
863 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
864 fprintf(stderr, "this line:\n");
865 fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
866 fprintf(stderr, "\n");
867 if (error_count == 0 &&
868 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
869 {
870 fprintf(stderr, "pcregrep: error %d means that a resource limit "
871 "was exceeded\n", mrc);
872 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
873 }
874 if (error_count++ > 20)
875 {
876 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
877 exit(2);
878 }
879 match = invert; /* No more matching; don't show the line again */
880 break;
881 }
882 }
883
884 /* If it's a match or a not-match (as required), do what's wanted. */
885
886 if (match != invert)
887 {
888 BOOL hyphenprinted = FALSE;
889
890 /* We've failed if we want a file that doesn't have any matches. */
891
892 if (filenames == FN_NOMATCH_ONLY) return 1;
893
894 /* Just count if just counting is wanted. */
895
896 if (count_only) count++;
897
898 /* If all we want is a file name, there is no need to scan any more lines
899 in the file. */
900
901 else if (filenames == FN_ONLY)
902 {
903 fprintf(stdout, "%s\n", printname);
904 return 0;
905 }
906
907 /* Likewise, if all we want is a yes/no answer. */
908
909 else if (quiet) return 0;
910
911 /* The --only-matching option prints just the substring that matched, and
912 does not pring any context. */
913
914 else if (only_matching)
915 {
916 if (printname != NULL) fprintf(stdout, "%s:", printname);
917 if (number) fprintf(stdout, "%d:", linenumber);
918 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
919 fprintf(stdout, "\n");
920 }
921
922 /* This is the default case when none of the above options is set. We print
923 the matching lines(s), possibly preceded and/or followed by other lines of
924 context. */
925
926 else
927 {
928 /* See if there is a requirement to print some "after" lines from a
929 previous match. We never print any overlaps. */
930
931 if (after_context > 0 && lastmatchnumber > 0)
932 {
933 int ellength;
934 int linecount = 0;
935 char *p = lastmatchrestart;
936
937 while (p < ptr && linecount < after_context)
938 {
939 p = end_of_line(p, ptr, &ellength);
940 linecount++;
941 }
942
943 /* It is important to advance lastmatchrestart during this printing so
944 that it interacts correctly with any "before" printing below. Print
945 each line's data using fwrite() in case there are binary zeroes. */
946
947 while (lastmatchrestart < p)
948 {
949 char *pp = lastmatchrestart;
950 if (printname != NULL) fprintf(stdout, "%s-", printname);
951 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
952 pp = end_of_line(pp, endptr, &ellength);
953 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
954 lastmatchrestart = pp;
955 }
956 if (lastmatchrestart != ptr) hyphenpending = TRUE;
957 }
958
959 /* If there were non-contiguous lines printed above, insert hyphens. */
960
961 if (hyphenpending)
962 {
963 fprintf(stdout, "--\n");
964 hyphenpending = FALSE;
965 hyphenprinted = TRUE;
966 }
967
968 /* See if there is a requirement to print some "before" lines for this
969 match. Again, don't print overlaps. */
970
971 if (before_context > 0)
972 {
973 int linecount = 0;
974 char *p = ptr;
975
976 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
977 linecount < before_context)
978 {
979 linecount++;
980 p = previous_line(p, buffer);
981 }
982
983 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
984 fprintf(stdout, "--\n");
985
986 while (p < ptr)
987 {
988 int ellength;
989 char *pp = p;
990 if (printname != NULL) fprintf(stdout, "%s-", printname);
991 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
992 pp = end_of_line(pp, endptr, &ellength);
993 fwrite(p, 1, pp - p, stdout);
994 p = pp;
995 }
996 }
997
998 /* Now print the matching line(s); ensure we set hyphenpending at the end
999 of the file if any context lines are being output. */
1000
1001 if (after_context > 0 || before_context > 0)
1002 endhyphenpending = TRUE;
1003
1004 if (printname != NULL) fprintf(stdout, "%s:", printname);
1005 if (number) fprintf(stdout, "%d:", linenumber);
1006
1007 /* In multiline mode, we want to print to the end of the line in which
1008 the end of the matched string is found, so we adjust linelength and the
1009 line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1010 start of the match will always be before the first newline sequence. */
1011
1012 if (multiline)
1013 {
1014 int ellength;
1015 char *endmatch = ptr + offsets[1];
1016 t = ptr;
1017 while (t < endmatch)
1018 {
1019 t = end_of_line(t, endptr, &ellength);
1020 if (t <= endmatch) linenumber++; else break;
1021 }
1022 endmatch = end_of_line(endmatch, endptr, &ellength);
1023 linelength = endmatch - ptr - ellength;
1024 }
1025
1026 /*** NOTE: Use only fwrite() to output the data line, so that binary
1027 zeroes are treated as just another data character. */
1028
1029 /* This extra option, for Jeffrey Friedl's debugging requirements,
1030 replaces the matched string, or a specific captured string if it exists,
1031 with X. When this happens, colouring is ignored. */
1032
1033 #ifdef JFRIEDL_DEBUG
1034 if (S_arg >= 0 && S_arg < mrc)
1035 {
1036 int first = S_arg * 2;
1037 int last = first + 1;
1038 fwrite(ptr, 1, offsets[first], stdout);
1039 fprintf(stdout, "X");
1040 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1041 }
1042 else
1043 #endif
1044
1045 /* We have to split the line(s) up if colouring. */
1046
1047 if (do_colour)
1048 {
1049 fwrite(ptr, 1, offsets[0], stdout);
1050 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1051 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1052 fprintf(stdout, "%c[00m", 0x1b);
1053 fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1054 }
1055 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1056 }
1057
1058 /* End of doing what has to be done for a match */
1059
1060 rc = 0; /* Had some success */
1061
1062 /* Remember where the last match happened for after_context. We remember
1063 where we are about to restart, and that line's number. */
1064
1065 lastmatchrestart = ptr + linelength + endlinelength;
1066 lastmatchnumber = linenumber + 1;
1067 }
1068
1069 /* Advance to after the newline and increment the line number. */
1070
1071 ptr += linelength + endlinelength;
1072 linenumber++;
1073
1074 /* If we haven't yet reached the end of the file (the buffer is full), and
1075 the current point is in the top 1/3 of the buffer, slide the buffer down by
1076 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1077 about to be lost, print them. */
1078
1079 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1080 {
1081 if (after_context > 0 &&
1082 lastmatchnumber > 0 &&
1083 lastmatchrestart < buffer + MBUFTHIRD)
1084 {
1085 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1086 lastmatchnumber = 0;
1087 }
1088
1089 /* Now do the shuffle */
1090
1091 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1092 ptr -= MBUFTHIRD;
1093 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1094 endptr = buffer + bufflength;
1095
1096 /* Adjust any last match point */
1097
1098 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1099 }
1100 } /* Loop through the whole file */
1101
1102 /* End of file; print final "after" lines if wanted; do_after_lines sets
1103 hyphenpending if it prints something. */
1104
1105 if (!only_matching && !count_only)
1106 {
1107 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1108 hyphenpending |= endhyphenpending;
1109 }
1110
1111 /* Print the file name if we are looking for those without matches and there
1112 were none. If we found a match, we won't have got this far. */
1113
1114 if (filenames == FN_NOMATCH_ONLY)
1115 {
1116 fprintf(stdout, "%s\n", printname);
1117 return 0;
1118 }
1119
1120 /* Print the match count if wanted */
1121
1122 if (count_only)
1123 {
1124 if (printname != NULL) fprintf(stdout, "%s:", printname);
1125 fprintf(stdout, "%d\n", count);
1126 }
1127
1128 return rc;
1129 }
1130
1131
1132
1133 /*************************************************
1134 * Grep a file or recurse into a directory *
1135 *************************************************/
1136
1137 /* Given a path name, if it's a directory, scan all the files if we are
1138 recursing; if it's a file, grep it.
1139
1140 Arguments:
1141 pathname the path to investigate
1142 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1143 only_one_at_top TRUE if the path is the only one at toplevel
1144
1145 Returns: 0 if there was at least one match
1146 1 if there were no matches
1147 2 there was some kind of error
1148
1149 However, file opening failures are suppressed if "silent" is set.
1150 */
1151
1152 static int
1153 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1154 {
1155 int rc = 1;
1156 int sep;
1157 FILE *in;
1158
1159 /* If the file name is "-" we scan stdin */
1160
1161 if (strcmp(pathname, "-") == 0)
1162 {
1163 return pcregrep(stdin,
1164 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1165 stdin_name : NULL);
1166 }
1167
1168
1169 /* If the file is a directory, skip if skipping or if we are recursing, scan
1170 each file within it, subject to any include or exclude patterns that were set.
1171 The scanning code is localized so it can be made system-specific. */
1172
1173 if ((sep = isdirectory(pathname)) != 0)
1174 {
1175 if (dee_action == dee_SKIP) return 1;
1176 if (dee_action == dee_RECURSE)
1177 {
1178 char buffer[1024];
1179 char *nextfile;
1180 directory_type *dir = opendirectory(pathname);
1181
1182 if (dir == NULL)
1183 {
1184 if (!silent)
1185 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1186 strerror(errno));
1187 return 2;
1188 }
1189
1190 while ((nextfile = readdirectory(dir)) != NULL)
1191 {
1192 int frc, blen;
1193 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1194 blen = strlen(buffer);
1195
1196 if (exclude_compiled != NULL &&
1197 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1198 continue;
1199
1200 if (include_compiled != NULL &&
1201 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1202 continue;
1203
1204 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1205 if (frc > 1) rc = frc;
1206 else if (frc == 0 && rc == 1) rc = 0;
1207 }
1208
1209 closedirectory(dir);
1210 return rc;
1211 }
1212 }
1213
1214 /* If the file is not a directory and not a regular file, skip it if that's
1215 been requested. */
1216
1217 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1218
1219 /* Control reaches here if we have a regular file, or if we have a directory
1220 and recursion or skipping was not requested, or if we have anything else and
1221 skipping was not requested. The scan proceeds. If this is the first and only
1222 argument at top level, we don't show the file name, unless we are only showing
1223 the file name, or the filename was forced (-H). */
1224
1225 in = fopen(pathname, "r");
1226 if (in == NULL)
1227 {
1228 if (!silent)
1229 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1230 strerror(errno));
1231 return 2;
1232 }
1233
1234 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1235 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1236
1237 fclose(in);
1238 return rc;
1239 }
1240
1241
1242
1243
1244 /*************************************************
1245 * Usage function *
1246 *************************************************/
1247
1248 static int
1249 usage(int rc)
1250 {
1251 option_item *op;
1252 fprintf(stderr, "Usage: pcregrep [-");
1253 for (op = optionlist; op->one_char != 0; op++)
1254 {
1255 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1256 }
1257 fprintf(stderr, "] [long options] [pattern] [files]\n");
1258 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1259 return rc;
1260 }
1261
1262
1263
1264
1265 /*************************************************
1266 * Help function *
1267 *************************************************/
1268
1269 static void
1270 help(void)
1271 {
1272 option_item *op;
1273
1274 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1275 printf("Search for PATTERN in each FILE or standard input.\n");
1276 printf("PATTERN must be present if neither -e nor -f is used.\n");
1277 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1278 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1279
1280 printf("Options:\n");
1281
1282 for (op = optionlist; op->one_char != 0; op++)
1283 {
1284 int n;
1285 char s[4];
1286 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1287 printf(" %s --%s%n", s, op->long_name, &n);
1288 n = 30 - n;
1289 if (n < 1) n = 1;
1290 printf("%.*s%s\n", n, " ", op->help_text);
1291 }
1292
1293 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1294 printf("trailing white space is removed and blank lines are ignored.\n");
1295 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1296
1297 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1298 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1299 }
1300
1301
1302
1303
1304 /*************************************************
1305 * Handle a single-letter, no data option *
1306 *************************************************/
1307
1308 static int
1309 handle_option(int letter, int options)
1310 {
1311 switch(letter)
1312 {
1313 case N_HELP: help(); exit(0);
1314 case 'c': count_only = TRUE; break;
1315 case 'F': process_options |= PO_FIXED_STRINGS; break;
1316 case 'H': filenames = FN_FORCE; break;
1317 case 'h': filenames = FN_NONE; break;
1318 case 'i': options |= PCRE_CASELESS; break;
1319 case 'l': filenames = FN_ONLY; break;
1320 case 'L': filenames = FN_NOMATCH_ONLY; break;
1321 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1322 case 'n': number = TRUE; break;
1323 case 'o': only_matching = TRUE; break;
1324 case 'q': quiet = TRUE; break;
1325 case 'r': dee_action = dee_RECURSE; break;
1326 case 's': silent = TRUE; break;
1327 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1328 case 'v': invert = TRUE; break;
1329 case 'w': process_options |= PO_WORD_MATCH; break;
1330 case 'x': process_options |= PO_LINE_MATCH; break;
1331
1332 case 'V':
1333 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1334 exit(0);
1335 break;
1336
1337 default:
1338 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1339 exit(usage(2));
1340 }
1341
1342 return options;
1343 }
1344
1345
1346
1347
1348 /*************************************************
1349 * Construct printed ordinal *
1350 *************************************************/
1351
1352 /* This turns a number into "1st", "3rd", etc. */
1353
1354 static char *
1355 ordin(int n)
1356 {
1357 static char buffer[8];
1358 char *p = buffer;
1359 sprintf(p, "%d", n);
1360 while (*p != 0) p++;
1361 switch (n%10)
1362 {
1363 case 1: strcpy(p, "st"); break;
1364 case 2: strcpy(p, "nd"); break;
1365 case 3: strcpy(p, "rd"); break;
1366 default: strcpy(p, "th"); break;
1367 }
1368 return buffer;
1369 }
1370
1371
1372
1373 /*************************************************
1374 * Compile a single pattern *
1375 *************************************************/
1376
1377 /* When the -F option has been used, this is called for each substring.
1378 Otherwise it's called for each supplied pattern.
1379
1380 Arguments:
1381 pattern the pattern string
1382 options the PCRE options
1383 filename the file name, or NULL for a command-line pattern
1384 count 0 if this is the only command line pattern, or
1385 number of the command line pattern, or
1386 linenumber for a pattern from a file
1387
1388 Returns: TRUE on success, FALSE after an error
1389 */
1390
1391 static BOOL
1392 compile_single_pattern(char *pattern, int options, char *filename, int count)
1393 {
1394 char buffer[MBUFTHIRD + 16];
1395 const char *error;
1396 int errptr;
1397
1398 if (pattern_count >= MAX_PATTERN_COUNT)
1399 {
1400 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1401 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1402 return FALSE;
1403 }
1404
1405 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1406 suffix[process_options]);
1407 pattern_list[pattern_count] =
1408 pcre_compile(buffer, options, &error, &errptr, pcretables);
1409 if (pattern_list[pattern_count++] != NULL) return TRUE;
1410
1411 /* Handle compile errors */
1412
1413 errptr -= (int)strlen(prefix[process_options]);
1414 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1415
1416 if (filename == NULL)
1417 {
1418 if (count == 0)
1419 fprintf(stderr, "pcregrep: Error in command-line regex "
1420 "at offset %d: %s\n", errptr, error);
1421 else
1422 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1423 "at offset %d: %s\n", ordin(count), errptr, error);
1424 }
1425 else
1426 {
1427 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1428 "at offset %d: %s\n", count, filename, errptr, error);
1429 }
1430
1431 return FALSE;
1432 }
1433
1434
1435
1436 /*************************************************
1437 * Compile one supplied pattern *
1438 *************************************************/
1439
1440 /* When the -F option has been used, each string may be a list of strings,
1441 separated by line breaks. They will be matched literally.
1442
1443 Arguments:
1444 pattern the pattern string
1445 options the PCRE options
1446 filename the file name, or NULL for a command-line pattern
1447 count 0 if this is the only command line pattern, or
1448 number of the command line pattern, or
1449 linenumber for a pattern from a file
1450
1451 Returns: TRUE on success, FALSE after an error
1452 */
1453
1454 static BOOL
1455 compile_pattern(char *pattern, int options, char *filename, int count)
1456 {
1457 if ((process_options & PO_FIXED_STRINGS) != 0)
1458 {
1459 char *eop = pattern + strlen(pattern);
1460 char buffer[MBUFTHIRD];
1461 for(;;)
1462 {
1463 int ellength;
1464 char *p = end_of_line(pattern, eop, &ellength);
1465 if (ellength == 0)
1466 return compile_single_pattern(pattern, options, filename, count);
1467 sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1468 pattern = p;
1469 if (!compile_single_pattern(buffer, options, filename, count))
1470 return FALSE;
1471 }
1472 }
1473 else return compile_single_pattern(pattern, options, filename, count);
1474 }
1475
1476
1477
1478 /*************************************************
1479 * Main program *
1480 *************************************************/
1481
1482 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1483
1484 int
1485 main(int argc, char **argv)
1486 {
1487 int i, j;
1488 int rc = 1;
1489 int pcre_options = 0;
1490 int cmd_pattern_count = 0;
1491 int errptr;
1492 BOOL only_one_at_top;
1493 char *patterns[MAX_PATTERN_COUNT];
1494 const char *locale_from = "--locale";
1495 const char *error;
1496
1497 /* Set the default line ending value from the default in the PCRE library;
1498 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1499 */
1500
1501 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1502 switch(i)
1503 {
1504 default: newline = (char *)"lf"; break;
1505 case '\r': newline = (char *)"cr"; break;
1506 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1507 case -1: newline = (char *)"any"; break;
1508 }
1509
1510 /* Process the options */
1511
1512 for (i = 1; i < argc; i++)
1513 {
1514 option_item *op = NULL;
1515 char *option_data = (char *)""; /* default to keep compiler happy */
1516 BOOL longop;
1517 BOOL longopwasequals = FALSE;
1518
1519 if (argv[i][0] != '-') break;
1520
1521 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1522 but only if we have previously had -e or -f to define the patterns. */
1523
1524 if (argv[i][1] == 0)
1525 {
1526 if (pattern_filename != NULL || pattern_count > 0) break;
1527 else exit(usage(2));
1528 }
1529
1530 /* Handle a long name option, or -- to terminate the options */
1531
1532 if (argv[i][1] == '-')
1533 {
1534 char *arg = argv[i] + 2;
1535 char *argequals = strchr(arg, '=');
1536
1537 if (*arg == 0) /* -- terminates options */
1538 {
1539 i++;
1540 break; /* out of the options-handling loop */
1541 }
1542
1543 longop = TRUE;
1544
1545 /* Some long options have data that follows after =, for example file=name.
1546 Some options have variations in the long name spelling: specifically, we
1547 allow "regexp" because GNU grep allows it, though I personally go along
1548 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1549 These options are entered in the table as "regex(p)". No option is in both
1550 these categories, fortunately. */
1551
1552 for (op = optionlist; op->one_char != 0; op++)
1553 {
1554 char *opbra = strchr(op->long_name, '(');
1555 char *equals = strchr(op->long_name, '=');
1556 if (opbra == NULL) /* Not a (p) case */
1557 {
1558 if (equals == NULL) /* Not thing=data case */
1559 {
1560 if (strcmp(arg, op->long_name) == 0) break;
1561 }
1562 else /* Special case xxx=data */
1563 {
1564 int oplen = equals - op->long_name;
1565 int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1566 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1567 {
1568 option_data = arg + arglen;
1569 if (*option_data == '=')
1570 {
1571 option_data++;
1572 longopwasequals = TRUE;
1573 }
1574 break;
1575 }
1576 }
1577 }
1578 else /* Special case xxxx(p) */
1579 {
1580 char buff1[24];
1581 char buff2[24];
1582 int baselen = opbra - op->long_name;
1583 sprintf(buff1, "%.*s", baselen, op->long_name);
1584 sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1585 opbra + 1);
1586 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1587 break;
1588 }
1589 }
1590
1591 if (op->one_char == 0)
1592 {
1593 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1594 exit(usage(2));
1595 }
1596 }
1597
1598
1599 /* Jeffrey Friedl's debugging harness uses these additional options which
1600 are not in the right form for putting in the option table because they use
1601 only one hyphen, yet are more than one character long. By putting them
1602 separately here, they will not get displayed as part of the help() output,
1603 but I don't think Jeffrey will care about that. */
1604
1605 #ifdef JFRIEDL_DEBUG
1606 else if (strcmp(argv[i], "-pre") == 0) {
1607 jfriedl_prefix = argv[++i];
1608 continue;
1609 } else if (strcmp(argv[i], "-post") == 0) {
1610 jfriedl_postfix = argv[++i];
1611 continue;
1612 } else if (strcmp(argv[i], "-XT") == 0) {
1613 sscanf(argv[++i], "%d", &jfriedl_XT);
1614 continue;
1615 } else if (strcmp(argv[i], "-XR") == 0) {
1616 sscanf(argv[++i], "%d", &jfriedl_XR);
1617 continue;
1618 }
1619 #endif
1620
1621
1622 /* One-char options; many that have no data may be in a single argument; we
1623 continue till we hit the last one or one that needs data. */
1624
1625 else
1626 {
1627 char *s = argv[i] + 1;
1628 longop = FALSE;
1629 while (*s != 0)
1630 {
1631 for (op = optionlist; op->one_char != 0; op++)
1632 { if (*s == op->one_char) break; }
1633 if (op->one_char == 0)
1634 {
1635 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1636 *s, argv[i]);
1637 exit(usage(2));
1638 }
1639 if (op->type != OP_NODATA || s[1] == 0)
1640 {
1641 option_data = s+1;
1642 break;
1643 }
1644 pcre_options = handle_option(*s++, pcre_options);
1645 }
1646 }
1647
1648 /* At this point we should have op pointing to a matched option. If the type
1649 is NO_DATA, it means that there is no data, and the option might set
1650 something in the PCRE options. */
1651
1652 if (op->type == OP_NODATA)
1653 {
1654 pcre_options = handle_option(op->one_char, pcre_options);
1655 continue;
1656 }
1657
1658 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1659 either has a value or defaults to something. It cannot have data in a
1660 separate item. At the moment, the only such options are "colo(u)r" and
1661 Jeffrey Friedl's special -S debugging option. */
1662
1663 if (*option_data == 0 &&
1664 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1665 {
1666 switch (op->one_char)
1667 {
1668 case N_COLOUR:
1669 colour_option = (char *)"auto";
1670 break;
1671 #ifdef JFRIEDL_DEBUG
1672 case 'S':
1673 S_arg = 0;
1674 break;
1675 #endif
1676 }
1677 continue;
1678 }
1679
1680 /* Otherwise, find the data string for the option. */
1681
1682 if (*option_data == 0)
1683 {
1684 if (i >= argc - 1 || longopwasequals)
1685 {
1686 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1687 exit(usage(2));
1688 }
1689 option_data = argv[++i];
1690 }
1691
1692 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1693 multiple times to create a list of patterns. */
1694
1695 if (op->type == OP_PATLIST)
1696 {
1697 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1698 {
1699 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1700 MAX_PATTERN_COUNT);
1701 return 2;
1702 }
1703 patterns[cmd_pattern_count++] = option_data;
1704 }
1705
1706 /* Otherwise, deal with single string or numeric data values. */
1707
1708 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1709 {
1710 *((char **)op->dataptr) = option_data;
1711 }
1712 else
1713 {
1714 char *endptr;
1715 int n = strtoul(option_data, &endptr, 10);
1716 if (*endptr != 0)
1717 {
1718 if (longop)
1719 {
1720 char *equals = strchr(op->long_name, '=');
1721 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1722 equals - op->long_name;
1723 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1724 option_data, nlen, op->long_name);
1725 }
1726 else
1727 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1728 option_data, op->one_char);
1729 exit(usage(2));
1730 }
1731 *((int *)op->dataptr) = n;
1732 }
1733 }
1734
1735 /* Options have been decoded. If -C was used, its value is used as a default
1736 for -A and -B. */
1737
1738 if (both_context > 0)
1739 {
1740 if (after_context == 0) after_context = both_context;
1741 if (before_context == 0) before_context = both_context;
1742 }
1743
1744 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1745 LC_ALL environment variable is set, and if so, use it. */
1746
1747 if (locale == NULL)
1748 {
1749 locale = getenv("LC_ALL");
1750 locale_from = "LCC_ALL";
1751 }
1752
1753 if (locale == NULL)
1754 {
1755 locale = getenv("LC_CTYPE");
1756 locale_from = "LC_CTYPE";
1757 }
1758
1759 /* If a locale has been provided, set it, and generate the tables the PCRE
1760 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1761
1762 if (locale != NULL)
1763 {
1764 if (setlocale(LC_CTYPE, locale) == NULL)
1765 {
1766 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1767 locale, locale_from);
1768 return 2;
1769 }
1770 pcretables = pcre_maketables();
1771 }
1772
1773 /* Sort out colouring */
1774
1775 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1776 {
1777 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1778 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1779 else
1780 {
1781 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1782 colour_option);
1783 return 2;
1784 }
1785 if (do_colour)
1786 {
1787 char *cs = getenv("PCREGREP_COLOUR");
1788 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1789 if (cs != NULL) colour_string = cs;
1790 }
1791 }
1792
1793 /* Interpret the newline type; the default settings are Unix-like. */
1794
1795 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1796 {
1797 pcre_options |= PCRE_NEWLINE_CR;
1798 endlinetype = EL_CR;
1799 }
1800 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1801 {
1802 pcre_options |= PCRE_NEWLINE_LF;
1803 endlinetype = EL_LF;
1804 }
1805 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1806 {
1807 pcre_options |= PCRE_NEWLINE_CRLF;
1808 endlinetype = EL_CRLF;
1809 }
1810 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1811 {
1812 pcre_options |= PCRE_NEWLINE_ANY;
1813 endlinetype = EL_ANY;
1814 }
1815 else
1816 {
1817 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1818 return 2;
1819 }
1820
1821 /* Interpret the text values for -d and -D */
1822
1823 if (dee_option != NULL)
1824 {
1825 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1826 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1827 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1828 else
1829 {
1830 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1831 return 2;
1832 }
1833 }
1834
1835 if (DEE_option != NULL)
1836 {
1837 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1838 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1839 else
1840 {
1841 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1842 return 2;
1843 }
1844 }
1845
1846 /* Check the values for Jeffrey Friedl's debugging options. */
1847
1848 #ifdef JFRIEDL_DEBUG
1849 if (S_arg > 9)
1850 {
1851 fprintf(stderr, "pcregrep: bad value for -S option\n");
1852 return 2;
1853 }
1854 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1855 {
1856 if (jfriedl_XT == 0) jfriedl_XT = 1;
1857 if (jfriedl_XR == 0) jfriedl_XR = 1;
1858 }
1859 #endif
1860
1861 /* Get memory to store the pattern and hints lists. */
1862
1863 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1864 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1865
1866 if (pattern_list == NULL || hints_list == NULL)
1867 {
1868 fprintf(stderr, "pcregrep: malloc failed\n");
1869 return 2;
1870 }
1871
1872 /* If no patterns were provided by -e, and there is no file provided by -f,
1873 the first argument is the one and only pattern, and it must exist. */
1874
1875 if (cmd_pattern_count == 0 && pattern_filename == NULL)
1876 {
1877 if (i >= argc) return usage(2);
1878 patterns[cmd_pattern_count++] = argv[i++];
1879 }
1880
1881 /* Compile the patterns that were provided on the command line, either by
1882 multiple uses of -e or as a single unkeyed pattern. */
1883
1884 for (j = 0; j < cmd_pattern_count; j++)
1885 {
1886 if (!compile_pattern(patterns[j], pcre_options, NULL,
1887 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1888 return 2;
1889 }
1890
1891 /* Compile the regular expressions that are provided in a file. */
1892
1893 if (pattern_filename != NULL)
1894 {
1895 int linenumber = 0;
1896 FILE *f;
1897 char *filename;
1898 char buffer[MBUFTHIRD];
1899
1900 if (strcmp(pattern_filename, "-") == 0)
1901 {
1902 f = stdin;
1903 filename = stdin_name;
1904 }
1905 else
1906 {
1907 f = fopen(pattern_filename, "r");
1908 if (f == NULL)
1909 {
1910 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1911 strerror(errno));
1912 return 2;
1913 }
1914 filename = pattern_filename;
1915 }
1916
1917 while (fgets(buffer, MBUFTHIRD, f) != NULL)
1918 {
1919 char *s = buffer + (int)strlen(buffer);
1920 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1921 *s = 0;
1922 linenumber++;
1923 if (buffer[0] == 0) continue; /* Skip blank lines */
1924 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1925 return 2;
1926 }
1927
1928 if (f != stdin) fclose(f);
1929 }
1930
1931 /* Study the regular expressions, as we will be running them many times */
1932
1933 for (j = 0; j < pattern_count; j++)
1934 {
1935 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1936 if (error != NULL)
1937 {
1938 char s[16];
1939 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1940 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1941 return 2;
1942 }
1943 }
1944
1945 /* If there are include or exclude patterns, compile them. */
1946
1947 if (exclude_pattern != NULL)
1948 {
1949 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1950 pcretables);
1951 if (exclude_compiled == NULL)
1952 {
1953 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1954 errptr, error);
1955 return 2;
1956 }
1957 }
1958
1959 if (include_pattern != NULL)
1960 {
1961 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1962 pcretables);
1963 if (include_compiled == NULL)
1964 {
1965 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1966 errptr, error);
1967 return 2;
1968 }
1969 }
1970
1971 /* If there are no further arguments, do the business on stdin and exit. */
1972
1973 if (i >= argc)
1974 return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1975
1976 /* Otherwise, work through the remaining arguments as files or directories.
1977 Pass in the fact that there is only one argument at top level - this suppresses
1978 the file name if the argument is not a directory and filenames are not
1979 otherwise forced. */
1980
1981 only_one_at_top = i == argc - 1; /* Catch initial value of i */
1982
1983 for (; i < argc; i++)
1984 {
1985 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1986 only_one_at_top);
1987 if (frc > 1) rc = frc;
1988 else if (frc == 0 && rc == 1) rc = 0;
1989 }
1990
1991 return rc;
1992 }
1993
1994 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12