/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 151 - (show annotations) (download)
Tue Apr 17 15:07:29 2007 UTC (7 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 58785 byte(s)
Tidies: added some casts and some missing #ifdefs.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 # include <config.h>
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53 #ifdef HAVE_UNISTD_H
54 # include <unistd.h>
55 #endif
56
57 #include <pcre.h>
58
59 #define FALSE 0
60 #define TRUE 1
61
62 typedef int BOOL;
63
64 #define MAX_PATTERN_COUNT 100
65
66 #if BUFSIZ > 8192
67 #define MBUFTHIRD BUFSIZ
68 #else
69 #define MBUFTHIRD 8192
70 #endif
71
72 /* Values for the "filenames" variable, which specifies options for file name
73 output. The order is important; it is assumed that a file name is wanted for
74 all values greater than FN_DEFAULT. */
75
76 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
77
78 /* Actions for the -d and -D options */
79
80 enum { dee_READ, dee_SKIP, dee_RECURSE };
81 enum { DEE_READ, DEE_SKIP };
82
83 /* Actions for special processing options (flag bits) */
84
85 #define PO_WORD_MATCH 0x0001
86 #define PO_LINE_MATCH 0x0002
87 #define PO_FIXED_STRINGS 0x0004
88
89 /* Line ending types */
90
91 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
92
93
94
95 /*************************************************
96 * Global variables *
97 *************************************************/
98
99 /* Jeffrey Friedl has some debugging requirements that are not part of the
100 regular code. */
101
102 #ifdef JFRIEDL_DEBUG
103 static int S_arg = -1;
104 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
105 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
106 static const char *jfriedl_prefix = "";
107 static const char *jfriedl_postfix = "";
108 #endif
109
110 static int endlinetype;
111
112 static char *colour_string = (char *)"1;31";
113 static char *colour_option = NULL;
114 static char *dee_option = NULL;
115 static char *DEE_option = NULL;
116 static char *newline = NULL;
117 static char *pattern_filename = NULL;
118 static char *stdin_name = (char *)"(standard input)";
119 static char *locale = NULL;
120
121 static const unsigned char *pcretables = NULL;
122
123 static int pattern_count = 0;
124 static pcre **pattern_list = NULL;
125 static pcre_extra **hints_list = NULL;
126
127 static char *include_pattern = NULL;
128 static char *exclude_pattern = NULL;
129
130 static pcre *include_compiled = NULL;
131 static pcre *exclude_compiled = NULL;
132
133 static int after_context = 0;
134 static int before_context = 0;
135 static int both_context = 0;
136 static int dee_action = dee_READ;
137 static int DEE_action = DEE_READ;
138 static int error_count = 0;
139 static int filenames = FN_DEFAULT;
140 static int process_options = 0;
141
142 static BOOL count_only = FALSE;
143 static BOOL do_colour = FALSE;
144 static BOOL hyphenpending = FALSE;
145 static BOOL invert = FALSE;
146 static BOOL multiline = FALSE;
147 static BOOL number = FALSE;
148 static BOOL only_matching = FALSE;
149 static BOOL quiet = FALSE;
150 static BOOL silent = FALSE;
151 static BOOL utf8 = FALSE;
152
153 /* Structure for options and list of them */
154
155 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
156 OP_PATLIST };
157
158 typedef struct option_item {
159 int type;
160 int one_char;
161 void *dataptr;
162 const char *long_name;
163 const char *help_text;
164 } option_item;
165
166 /* Options without a single-letter equivalent get a negative value. This can be
167 used to identify them. */
168
169 #define N_COLOUR (-1)
170 #define N_EXCLUDE (-2)
171 #define N_HELP (-3)
172 #define N_INCLUDE (-4)
173 #define N_LABEL (-5)
174 #define N_LOCALE (-6)
175 #define N_NULL (-7)
176
177 static option_item optionlist[] = {
178 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
179 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
180 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
181 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
182 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
183 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
184 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
185 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
186 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
187 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
188 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
189 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
190 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
191 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
192 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
193 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
194 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
195 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
196 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
197 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
198 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
199 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
200 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
201 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
202 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
203 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
204 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
205 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
206 #ifdef JFRIEDL_DEBUG
207 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
208 #endif
209 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
210 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
211 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
212 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
213 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
214 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
215 { OP_NODATA, 0, NULL, NULL, NULL }
216 };
217
218 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
219 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
220 that the combination of -w and -x has the same effect as -x on its own, so we
221 can treat them as the same. */
222
223 static const char *prefix[] = {
224 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
225
226 static const char *suffix[] = {
227 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
228
229 /* UTF-8 tables - used only when the newline setting is "any". */
230
231 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
232
233 const char utf8_table4[] = {
234 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
235 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
237 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
238
239
240
241 /*************************************************
242 * OS-specific functions *
243 *************************************************/
244
245 /* These functions are defined so that they can be made system specific,
246 although at present the only ones are for Unix, Win32, and for "no support". */
247
248
249 /************* Directory scanning in Unix ***********/
250
251 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
252 #include <sys/types.h>
253 #include <sys/stat.h>
254 #include <dirent.h>
255
256 typedef DIR directory_type;
257
258 static int
259 isdirectory(char *filename)
260 {
261 struct stat statbuf;
262 if (stat(filename, &statbuf) < 0)
263 return 0; /* In the expectation that opening as a file will fail */
264 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
265 }
266
267 static directory_type *
268 opendirectory(char *filename)
269 {
270 return opendir(filename);
271 }
272
273 static char *
274 readdirectory(directory_type *dir)
275 {
276 for (;;)
277 {
278 struct dirent *dent = readdir(dir);
279 if (dent == NULL) return NULL;
280 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
281 return dent->d_name;
282 }
283 /* Control never reaches here */
284 }
285
286 static void
287 closedirectory(directory_type *dir)
288 {
289 closedir(dir);
290 }
291
292
293 /************* Test for regular file in Unix **********/
294
295 static int
296 isregfile(char *filename)
297 {
298 struct stat statbuf;
299 if (stat(filename, &statbuf) < 0)
300 return 1; /* In the expectation that opening as a file will fail */
301 return (statbuf.st_mode & S_IFMT) == S_IFREG;
302 }
303
304
305 /************* Test stdout for being a terminal in Unix **********/
306
307 static BOOL
308 is_stdout_tty(void)
309 {
310 return isatty(fileno(stdout));
311 }
312
313
314 /************* Directory scanning in Win32 ***********/
315
316 /* I (Philip Hazel) have no means of testing this code. It was contributed by
317 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
318 when it did not exist. */
319
320
321 #elif HAVE_WINDOWS_H
322
323 #ifndef STRICT
324 # define STRICT
325 #endif
326 #ifndef WIN32_LEAN_AND_MEAN
327 # define WIN32_LEAN_AND_MEAN
328 #endif
329 #ifndef INVALID_FILE_ATTRIBUTES
330 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
331 #endif
332
333 #include <windows.h>
334
335 typedef struct directory_type
336 {
337 HANDLE handle;
338 BOOL first;
339 WIN32_FIND_DATA data;
340 } directory_type;
341
342 int
343 isdirectory(char *filename)
344 {
345 DWORD attr = GetFileAttributes(filename);
346 if (attr == INVALID_FILE_ATTRIBUTES)
347 return 0;
348 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
349 }
350
351 directory_type *
352 opendirectory(char *filename)
353 {
354 size_t len;
355 char *pattern;
356 directory_type *dir;
357 DWORD err;
358 len = strlen(filename);
359 pattern = (char *) malloc(len + 3);
360 dir = (directory_type *) malloc(sizeof(*dir));
361 if ((pattern == NULL) || (dir == NULL))
362 {
363 fprintf(stderr, "pcregrep: malloc failed\n");
364 exit(2);
365 }
366 memcpy(pattern, filename, len);
367 memcpy(&(pattern[len]), "\\*", 3);
368 dir->handle = FindFirstFile(pattern, &(dir->data));
369 if (dir->handle != INVALID_HANDLE_VALUE)
370 {
371 free(pattern);
372 dir->first = TRUE;
373 return dir;
374 }
375 err = GetLastError();
376 free(pattern);
377 free(dir);
378 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
379 return NULL;
380 }
381
382 char *
383 readdirectory(directory_type *dir)
384 {
385 for (;;)
386 {
387 if (!dir->first)
388 {
389 if (!FindNextFile(dir->handle, &(dir->data)))
390 return NULL;
391 }
392 else
393 {
394 dir->first = FALSE;
395 }
396 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
397 return dir->data.cFileName;
398 }
399 #ifndef _MSC_VER
400 return NULL; /* Keep compiler happy; never executed */
401 #endif
402 }
403
404 void
405 closedirectory(directory_type *dir)
406 {
407 FindClose(dir->handle);
408 free(dir);
409 }
410
411
412 /************* Test for regular file in Win32 **********/
413
414 /* I don't know how to do this, or if it can be done; assume all paths are
415 regular if they are not directories. */
416
417 int isregfile(char *filename)
418 {
419 return !isdirectory(filename)
420 }
421
422
423 /************* Test stdout for being a terminal in Win32 **********/
424
425 /* I don't know how to do this; assume never */
426
427 static BOOL
428 is_stdout_tty(void)
429 {
430 FALSE;
431 }
432
433
434 /************* Directory scanning when we can't do it ***********/
435
436 /* The type is void, and apart from isdirectory(), the functions do nothing. */
437
438 #else
439
440 typedef void directory_type;
441
442 int isdirectory(char *filename) { return 0; }
443 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
444 char *readdirectory(directory_type *dir) { return (char*)0;}
445 void closedirectory(directory_type *dir) {}
446
447
448 /************* Test for regular when we can't do it **********/
449
450 /* Assume all files are regular. */
451
452 int isregfile(char *filename) { return 1; }
453
454
455 /************* Test stdout for being a terminal when we can't do it **********/
456
457 static BOOL
458 is_stdout_tty(void)
459 {
460 return FALSE;
461 }
462
463
464 #endif
465
466
467
468 #ifndef HAVE_STRERROR
469 /*************************************************
470 * Provide strerror() for non-ANSI libraries *
471 *************************************************/
472
473 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
474 in their libraries, but can provide the same facility by this simple
475 alternative function. */
476
477 extern int sys_nerr;
478 extern char *sys_errlist[];
479
480 char *
481 strerror(int n)
482 {
483 if (n < 0 || n >= sys_nerr) return "unknown error number";
484 return sys_errlist[n];
485 }
486 #endif /* HAVE_STRERROR */
487
488
489
490 /*************************************************
491 * Find end of line *
492 *************************************************/
493
494 /* The length of the endline sequence that is found is set via lenptr. This may
495 be zero at the very end of the file if there is no line-ending sequence there.
496
497 Arguments:
498 p current position in line
499 endptr end of available data
500 lenptr where to put the length of the eol sequence
501
502 Returns: pointer to the last byte of the line
503 */
504
505 static char *
506 end_of_line(char *p, char *endptr, int *lenptr)
507 {
508 switch(endlinetype)
509 {
510 default: /* Just in case */
511 case EL_LF:
512 while (p < endptr && *p != '\n') p++;
513 if (p < endptr)
514 {
515 *lenptr = 1;
516 return p + 1;
517 }
518 *lenptr = 0;
519 return endptr;
520
521 case EL_CR:
522 while (p < endptr && *p != '\r') p++;
523 if (p < endptr)
524 {
525 *lenptr = 1;
526 return p + 1;
527 }
528 *lenptr = 0;
529 return endptr;
530
531 case EL_CRLF:
532 for (;;)
533 {
534 while (p < endptr && *p != '\r') p++;
535 if (++p >= endptr)
536 {
537 *lenptr = 0;
538 return endptr;
539 }
540 if (*p == '\n')
541 {
542 *lenptr = 2;
543 return p + 1;
544 }
545 }
546 break;
547
548 case EL_ANYCRLF:
549 while (p < endptr)
550 {
551 int extra = 0;
552 register int c = *((unsigned char *)p);
553
554 if (utf8 && c >= 0xc0)
555 {
556 int gcii, gcss;
557 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
558 gcss = 6*extra;
559 c = (c & utf8_table3[extra]) << gcss;
560 for (gcii = 1; gcii <= extra; gcii++)
561 {
562 gcss -= 6;
563 c |= (p[gcii] & 0x3f) << gcss;
564 }
565 }
566
567 p += 1 + extra;
568
569 switch (c)
570 {
571 case 0x0a: /* LF */
572 *lenptr = 1;
573 return p;
574
575 case 0x0d: /* CR */
576 if (p < endptr && *p == 0x0a)
577 {
578 *lenptr = 2;
579 p++;
580 }
581 else *lenptr = 1;
582 return p;
583
584 default:
585 break;
586 }
587 } /* End of loop for ANYCRLF case */
588
589 *lenptr = 0; /* Must have hit the end */
590 return endptr;
591
592 case EL_ANY:
593 while (p < endptr)
594 {
595 int extra = 0;
596 register int c = *((unsigned char *)p);
597
598 if (utf8 && c >= 0xc0)
599 {
600 int gcii, gcss;
601 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
602 gcss = 6*extra;
603 c = (c & utf8_table3[extra]) << gcss;
604 for (gcii = 1; gcii <= extra; gcii++)
605 {
606 gcss -= 6;
607 c |= (p[gcii] & 0x3f) << gcss;
608 }
609 }
610
611 p += 1 + extra;
612
613 switch (c)
614 {
615 case 0x0a: /* LF */
616 case 0x0b: /* VT */
617 case 0x0c: /* FF */
618 *lenptr = 1;
619 return p;
620
621 case 0x0d: /* CR */
622 if (p < endptr && *p == 0x0a)
623 {
624 *lenptr = 2;
625 p++;
626 }
627 else *lenptr = 1;
628 return p;
629
630 case 0x85: /* NEL */
631 *lenptr = utf8? 2 : 1;
632 return p;
633
634 case 0x2028: /* LS */
635 case 0x2029: /* PS */
636 *lenptr = 3;
637 return p;
638
639 default:
640 break;
641 }
642 } /* End of loop for ANY case */
643
644 *lenptr = 0; /* Must have hit the end */
645 return endptr;
646 } /* End of overall switch */
647 }
648
649
650
651 /*************************************************
652 * Find start of previous line *
653 *************************************************/
654
655 /* This is called when looking back for before lines to print.
656
657 Arguments:
658 p start of the subsequent line
659 startptr start of available data
660
661 Returns: pointer to the start of the previous line
662 */
663
664 static char *
665 previous_line(char *p, char *startptr)
666 {
667 switch(endlinetype)
668 {
669 default: /* Just in case */
670 case EL_LF:
671 p--;
672 while (p > startptr && p[-1] != '\n') p--;
673 return p;
674
675 case EL_CR:
676 p--;
677 while (p > startptr && p[-1] != '\n') p--;
678 return p;
679
680 case EL_CRLF:
681 for (;;)
682 {
683 p -= 2;
684 while (p > startptr && p[-1] != '\n') p--;
685 if (p <= startptr + 1 || p[-2] == '\r') return p;
686 }
687 return p; /* But control should never get here */
688
689 case EL_ANY:
690 case EL_ANYCRLF:
691 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
692 if (utf8) while ((*p & 0xc0) == 0x80) p--;
693
694 while (p > startptr)
695 {
696 register int c;
697 char *pp = p - 1;
698
699 if (utf8)
700 {
701 int extra = 0;
702 while ((*pp & 0xc0) == 0x80) pp--;
703 c = *((unsigned char *)pp);
704 if (c >= 0xc0)
705 {
706 int gcii, gcss;
707 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
708 gcss = 6*extra;
709 c = (c & utf8_table3[extra]) << gcss;
710 for (gcii = 1; gcii <= extra; gcii++)
711 {
712 gcss -= 6;
713 c |= (pp[gcii] & 0x3f) << gcss;
714 }
715 }
716 }
717 else c = *((unsigned char *)pp);
718
719 if (endlinetype == EL_ANYCRLF) switch (c)
720 {
721 case 0x0a: /* LF */
722 case 0x0d: /* CR */
723 return p;
724
725 default:
726 break;
727 }
728
729 else switch (c)
730 {
731 case 0x0a: /* LF */
732 case 0x0b: /* VT */
733 case 0x0c: /* FF */
734 case 0x0d: /* CR */
735 case 0x85: /* NEL */
736 case 0x2028: /* LS */
737 case 0x2029: /* PS */
738 return p;
739
740 default:
741 break;
742 }
743
744 p = pp; /* Back one character */
745 } /* End of loop for ANY case */
746
747 return startptr; /* Hit start of data */
748 } /* End of overall switch */
749 }
750
751
752
753
754
755 /*************************************************
756 * Print the previous "after" lines *
757 *************************************************/
758
759 /* This is called if we are about to lose said lines because of buffer filling,
760 and at the end of the file. The data in the line is written using fwrite() so
761 that a binary zero does not terminate it.
762
763 Arguments:
764 lastmatchnumber the number of the last matching line, plus one
765 lastmatchrestart where we restarted after the last match
766 endptr end of available data
767 printname filename for printing
768
769 Returns: nothing
770 */
771
772 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
773 char *endptr, char *printname)
774 {
775 if (after_context > 0 && lastmatchnumber > 0)
776 {
777 int count = 0;
778 while (lastmatchrestart < endptr && count++ < after_context)
779 {
780 int ellength;
781 char *pp = lastmatchrestart;
782 if (printname != NULL) fprintf(stdout, "%s-", printname);
783 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
784 pp = end_of_line(pp, endptr, &ellength);
785 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
786 lastmatchrestart = pp;
787 }
788 hyphenpending = TRUE;
789 }
790 }
791
792
793
794 /*************************************************
795 * Grep an individual file *
796 *************************************************/
797
798 /* This is called from grep_or_recurse() below. It uses a buffer that is three
799 times the value of MBUFTHIRD. The matching point is never allowed to stray into
800 the top third of the buffer, thus keeping more of the file available for
801 context printing or for multiline scanning. For large files, the pointer will
802 be in the middle third most of the time, so the bottom third is available for
803 "before" context printing.
804
805 Arguments:
806 in the fopened FILE stream
807 printname the file name if it is to be printed for each match
808 or NULL if the file name is not to be printed
809 it cannot be NULL if filenames[_nomatch]_only is set
810
811 Returns: 0 if there was at least one match
812 1 otherwise (no matches)
813 */
814
815 static int
816 pcregrep(FILE *in, char *printname)
817 {
818 int rc = 1;
819 int linenumber = 1;
820 int lastmatchnumber = 0;
821 int count = 0;
822 int offsets[99];
823 char *lastmatchrestart = NULL;
824 char buffer[3*MBUFTHIRD];
825 char *ptr = buffer;
826 char *endptr;
827 size_t bufflength;
828 BOOL endhyphenpending = FALSE;
829
830 /* Do the first read into the start of the buffer and set up the pointer to
831 end of what we have. */
832
833 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
834 endptr = buffer + bufflength;
835
836 /* Loop while the current pointer is not at the end of the file. For large
837 files, endptr will be at the end of the buffer when we are in the middle of the
838 file, but ptr will never get there, because as soon as it gets over 2/3 of the
839 way, the buffer is shifted left and re-filled. */
840
841 while (ptr < endptr)
842 {
843 int i, endlinelength;
844 int mrc = 0;
845 BOOL match = FALSE;
846 char *t = ptr;
847 size_t length, linelength;
848
849 /* At this point, ptr is at the start of a line. We need to find the length
850 of the subject string to pass to pcre_exec(). In multiline mode, it is the
851 length remainder of the data in the buffer. Otherwise, it is the length of
852 the next line. After matching, we always advance by the length of the next
853 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
854 that any match is constrained to be in the first line. */
855
856 t = end_of_line(t, endptr, &endlinelength);
857 linelength = t - ptr - endlinelength;
858 length = multiline? endptr - ptr : linelength;
859
860 /* Extra processing for Jeffrey Friedl's debugging. */
861
862 #ifdef JFRIEDL_DEBUG
863 if (jfriedl_XT || jfriedl_XR)
864 {
865 #include <sys/time.h>
866 #include <time.h>
867 struct timeval start_time, end_time;
868 struct timezone dummy;
869
870 if (jfriedl_XT)
871 {
872 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
873 const char *orig = ptr;
874 ptr = malloc(newlen + 1);
875 if (!ptr) {
876 printf("out of memory");
877 exit(2);
878 }
879 endptr = ptr;
880 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
881 for (i = 0; i < jfriedl_XT; i++) {
882 strncpy(endptr, orig, length);
883 endptr += length;
884 }
885 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
886 length = newlen;
887 }
888
889 if (gettimeofday(&start_time, &dummy) != 0)
890 perror("bad gettimeofday");
891
892
893 for (i = 0; i < jfriedl_XR; i++)
894 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
895
896 if (gettimeofday(&end_time, &dummy) != 0)
897 perror("bad gettimeofday");
898
899 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
900 -
901 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
902
903 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
904 return 0;
905 }
906 #endif
907
908
909 /* Run through all the patterns until one matches. Note that we don't include
910 the final newline in the subject string. */
911
912 for (i = 0; i < pattern_count; i++)
913 {
914 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
915 offsets, 99);
916 if (mrc >= 0) { match = TRUE; break; }
917 if (mrc != PCRE_ERROR_NOMATCH)
918 {
919 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
920 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
921 fprintf(stderr, "this line:\n");
922 fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
923 fprintf(stderr, "\n");
924 if (error_count == 0 &&
925 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
926 {
927 fprintf(stderr, "pcregrep: error %d means that a resource limit "
928 "was exceeded\n", mrc);
929 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
930 }
931 if (error_count++ > 20)
932 {
933 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
934 exit(2);
935 }
936 match = invert; /* No more matching; don't show the line again */
937 break;
938 }
939 }
940
941 /* If it's a match or a not-match (as required), do what's wanted. */
942
943 if (match != invert)
944 {
945 BOOL hyphenprinted = FALSE;
946
947 /* We've failed if we want a file that doesn't have any matches. */
948
949 if (filenames == FN_NOMATCH_ONLY) return 1;
950
951 /* Just count if just counting is wanted. */
952
953 if (count_only) count++;
954
955 /* If all we want is a file name, there is no need to scan any more lines
956 in the file. */
957
958 else if (filenames == FN_ONLY)
959 {
960 fprintf(stdout, "%s\n", printname);
961 return 0;
962 }
963
964 /* Likewise, if all we want is a yes/no answer. */
965
966 else if (quiet) return 0;
967
968 /* The --only-matching option prints just the substring that matched, and
969 does not pring any context. */
970
971 else if (only_matching)
972 {
973 if (printname != NULL) fprintf(stdout, "%s:", printname);
974 if (number) fprintf(stdout, "%d:", linenumber);
975 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
976 fprintf(stdout, "\n");
977 }
978
979 /* This is the default case when none of the above options is set. We print
980 the matching lines(s), possibly preceded and/or followed by other lines of
981 context. */
982
983 else
984 {
985 /* See if there is a requirement to print some "after" lines from a
986 previous match. We never print any overlaps. */
987
988 if (after_context > 0 && lastmatchnumber > 0)
989 {
990 int ellength;
991 int linecount = 0;
992 char *p = lastmatchrestart;
993
994 while (p < ptr && linecount < after_context)
995 {
996 p = end_of_line(p, ptr, &ellength);
997 linecount++;
998 }
999
1000 /* It is important to advance lastmatchrestart during this printing so
1001 that it interacts correctly with any "before" printing below. Print
1002 each line's data using fwrite() in case there are binary zeroes. */
1003
1004 while (lastmatchrestart < p)
1005 {
1006 char *pp = lastmatchrestart;
1007 if (printname != NULL) fprintf(stdout, "%s-", printname);
1008 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1009 pp = end_of_line(pp, endptr, &ellength);
1010 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1011 lastmatchrestart = pp;
1012 }
1013 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1014 }
1015
1016 /* If there were non-contiguous lines printed above, insert hyphens. */
1017
1018 if (hyphenpending)
1019 {
1020 fprintf(stdout, "--\n");
1021 hyphenpending = FALSE;
1022 hyphenprinted = TRUE;
1023 }
1024
1025 /* See if there is a requirement to print some "before" lines for this
1026 match. Again, don't print overlaps. */
1027
1028 if (before_context > 0)
1029 {
1030 int linecount = 0;
1031 char *p = ptr;
1032
1033 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1034 linecount < before_context)
1035 {
1036 linecount++;
1037 p = previous_line(p, buffer);
1038 }
1039
1040 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1041 fprintf(stdout, "--\n");
1042
1043 while (p < ptr)
1044 {
1045 int ellength;
1046 char *pp = p;
1047 if (printname != NULL) fprintf(stdout, "%s-", printname);
1048 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1049 pp = end_of_line(pp, endptr, &ellength);
1050 fwrite(p, 1, pp - p, stdout);
1051 p = pp;
1052 }
1053 }
1054
1055 /* Now print the matching line(s); ensure we set hyphenpending at the end
1056 of the file if any context lines are being output. */
1057
1058 if (after_context > 0 || before_context > 0)
1059 endhyphenpending = TRUE;
1060
1061 if (printname != NULL) fprintf(stdout, "%s:", printname);
1062 if (number) fprintf(stdout, "%d:", linenumber);
1063
1064 /* In multiline mode, we want to print to the end of the line in which
1065 the end of the matched string is found, so we adjust linelength and the
1066 line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1067 start of the match will always be before the first newline sequence. */
1068
1069 if (multiline)
1070 {
1071 int ellength;
1072 char *endmatch = ptr + offsets[1];
1073 t = ptr;
1074 while (t < endmatch)
1075 {
1076 t = end_of_line(t, endptr, &ellength);
1077 if (t <= endmatch) linenumber++; else break;
1078 }
1079 endmatch = end_of_line(endmatch, endptr, &ellength);
1080 linelength = endmatch - ptr - ellength;
1081 }
1082
1083 /*** NOTE: Use only fwrite() to output the data line, so that binary
1084 zeroes are treated as just another data character. */
1085
1086 /* This extra option, for Jeffrey Friedl's debugging requirements,
1087 replaces the matched string, or a specific captured string if it exists,
1088 with X. When this happens, colouring is ignored. */
1089
1090 #ifdef JFRIEDL_DEBUG
1091 if (S_arg >= 0 && S_arg < mrc)
1092 {
1093 int first = S_arg * 2;
1094 int last = first + 1;
1095 fwrite(ptr, 1, offsets[first], stdout);
1096 fprintf(stdout, "X");
1097 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1098 }
1099 else
1100 #endif
1101
1102 /* We have to split the line(s) up if colouring. */
1103
1104 if (do_colour)
1105 {
1106 fwrite(ptr, 1, offsets[0], stdout);
1107 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1108 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1109 fprintf(stdout, "%c[00m", 0x1b);
1110 fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1111 }
1112 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1113 }
1114
1115 /* End of doing what has to be done for a match */
1116
1117 rc = 0; /* Had some success */
1118
1119 /* Remember where the last match happened for after_context. We remember
1120 where we are about to restart, and that line's number. */
1121
1122 lastmatchrestart = ptr + linelength + endlinelength;
1123 lastmatchnumber = linenumber + 1;
1124 }
1125
1126 /* Advance to after the newline and increment the line number. */
1127
1128 ptr += linelength + endlinelength;
1129 linenumber++;
1130
1131 /* If we haven't yet reached the end of the file (the buffer is full), and
1132 the current point is in the top 1/3 of the buffer, slide the buffer down by
1133 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1134 about to be lost, print them. */
1135
1136 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1137 {
1138 if (after_context > 0 &&
1139 lastmatchnumber > 0 &&
1140 lastmatchrestart < buffer + MBUFTHIRD)
1141 {
1142 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1143 lastmatchnumber = 0;
1144 }
1145
1146 /* Now do the shuffle */
1147
1148 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1149 ptr -= MBUFTHIRD;
1150 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1151 endptr = buffer + bufflength;
1152
1153 /* Adjust any last match point */
1154
1155 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1156 }
1157 } /* Loop through the whole file */
1158
1159 /* End of file; print final "after" lines if wanted; do_after_lines sets
1160 hyphenpending if it prints something. */
1161
1162 if (!only_matching && !count_only)
1163 {
1164 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1165 hyphenpending |= endhyphenpending;
1166 }
1167
1168 /* Print the file name if we are looking for those without matches and there
1169 were none. If we found a match, we won't have got this far. */
1170
1171 if (filenames == FN_NOMATCH_ONLY)
1172 {
1173 fprintf(stdout, "%s\n", printname);
1174 return 0;
1175 }
1176
1177 /* Print the match count if wanted */
1178
1179 if (count_only)
1180 {
1181 if (printname != NULL) fprintf(stdout, "%s:", printname);
1182 fprintf(stdout, "%d\n", count);
1183 }
1184
1185 return rc;
1186 }
1187
1188
1189
1190 /*************************************************
1191 * Grep a file or recurse into a directory *
1192 *************************************************/
1193
1194 /* Given a path name, if it's a directory, scan all the files if we are
1195 recursing; if it's a file, grep it.
1196
1197 Arguments:
1198 pathname the path to investigate
1199 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1200 only_one_at_top TRUE if the path is the only one at toplevel
1201
1202 Returns: 0 if there was at least one match
1203 1 if there were no matches
1204 2 there was some kind of error
1205
1206 However, file opening failures are suppressed if "silent" is set.
1207 */
1208
1209 static int
1210 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1211 {
1212 int rc = 1;
1213 int sep;
1214 FILE *in;
1215
1216 /* If the file name is "-" we scan stdin */
1217
1218 if (strcmp(pathname, "-") == 0)
1219 {
1220 return pcregrep(stdin,
1221 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1222 stdin_name : NULL);
1223 }
1224
1225
1226 /* If the file is a directory, skip if skipping or if we are recursing, scan
1227 each file within it, subject to any include or exclude patterns that were set.
1228 The scanning code is localized so it can be made system-specific. */
1229
1230 if ((sep = isdirectory(pathname)) != 0)
1231 {
1232 if (dee_action == dee_SKIP) return 1;
1233 if (dee_action == dee_RECURSE)
1234 {
1235 char buffer[1024];
1236 char *nextfile;
1237 directory_type *dir = opendirectory(pathname);
1238
1239 if (dir == NULL)
1240 {
1241 if (!silent)
1242 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1243 strerror(errno));
1244 return 2;
1245 }
1246
1247 while ((nextfile = readdirectory(dir)) != NULL)
1248 {
1249 int frc, blen;
1250 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1251 blen = strlen(buffer);
1252
1253 if (exclude_compiled != NULL &&
1254 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1255 continue;
1256
1257 if (include_compiled != NULL &&
1258 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1259 continue;
1260
1261 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1262 if (frc > 1) rc = frc;
1263 else if (frc == 0 && rc == 1) rc = 0;
1264 }
1265
1266 closedirectory(dir);
1267 return rc;
1268 }
1269 }
1270
1271 /* If the file is not a directory and not a regular file, skip it if that's
1272 been requested. */
1273
1274 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1275
1276 /* Control reaches here if we have a regular file, or if we have a directory
1277 and recursion or skipping was not requested, or if we have anything else and
1278 skipping was not requested. The scan proceeds. If this is the first and only
1279 argument at top level, we don't show the file name, unless we are only showing
1280 the file name, or the filename was forced (-H). */
1281
1282 in = fopen(pathname, "r");
1283 if (in == NULL)
1284 {
1285 if (!silent)
1286 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1287 strerror(errno));
1288 return 2;
1289 }
1290
1291 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1292 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1293
1294 fclose(in);
1295 return rc;
1296 }
1297
1298
1299
1300
1301 /*************************************************
1302 * Usage function *
1303 *************************************************/
1304
1305 static int
1306 usage(int rc)
1307 {
1308 option_item *op;
1309 fprintf(stderr, "Usage: pcregrep [-");
1310 for (op = optionlist; op->one_char != 0; op++)
1311 {
1312 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1313 }
1314 fprintf(stderr, "] [long options] [pattern] [files]\n");
1315 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1316 return rc;
1317 }
1318
1319
1320
1321
1322 /*************************************************
1323 * Help function *
1324 *************************************************/
1325
1326 static void
1327 help(void)
1328 {
1329 option_item *op;
1330
1331 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1332 printf("Search for PATTERN in each FILE or standard input.\n");
1333 printf("PATTERN must be present if neither -e nor -f is used.\n");
1334 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1335 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1336
1337 printf("Options:\n");
1338
1339 for (op = optionlist; op->one_char != 0; op++)
1340 {
1341 int n;
1342 char s[4];
1343 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1344 printf(" %s --%s%n", s, op->long_name, &n);
1345 n = 30 - n;
1346 if (n < 1) n = 1;
1347 printf("%.*s%s\n", n, " ", op->help_text);
1348 }
1349
1350 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1351 printf("trailing white space is removed and blank lines are ignored.\n");
1352 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1353
1354 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1355 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1356 }
1357
1358
1359
1360
1361 /*************************************************
1362 * Handle a single-letter, no data option *
1363 *************************************************/
1364
1365 static int
1366 handle_option(int letter, int options)
1367 {
1368 switch(letter)
1369 {
1370 case N_HELP: help(); exit(0);
1371 case 'c': count_only = TRUE; break;
1372 case 'F': process_options |= PO_FIXED_STRINGS; break;
1373 case 'H': filenames = FN_FORCE; break;
1374 case 'h': filenames = FN_NONE; break;
1375 case 'i': options |= PCRE_CASELESS; break;
1376 case 'l': filenames = FN_ONLY; break;
1377 case 'L': filenames = FN_NOMATCH_ONLY; break;
1378 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1379 case 'n': number = TRUE; break;
1380 case 'o': only_matching = TRUE; break;
1381 case 'q': quiet = TRUE; break;
1382 case 'r': dee_action = dee_RECURSE; break;
1383 case 's': silent = TRUE; break;
1384 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1385 case 'v': invert = TRUE; break;
1386 case 'w': process_options |= PO_WORD_MATCH; break;
1387 case 'x': process_options |= PO_LINE_MATCH; break;
1388
1389 case 'V':
1390 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1391 exit(0);
1392 break;
1393
1394 default:
1395 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1396 exit(usage(2));
1397 }
1398
1399 return options;
1400 }
1401
1402
1403
1404
1405 /*************************************************
1406 * Construct printed ordinal *
1407 *************************************************/
1408
1409 /* This turns a number into "1st", "3rd", etc. */
1410
1411 static char *
1412 ordin(int n)
1413 {
1414 static char buffer[8];
1415 char *p = buffer;
1416 sprintf(p, "%d", n);
1417 while (*p != 0) p++;
1418 switch (n%10)
1419 {
1420 case 1: strcpy(p, "st"); break;
1421 case 2: strcpy(p, "nd"); break;
1422 case 3: strcpy(p, "rd"); break;
1423 default: strcpy(p, "th"); break;
1424 }
1425 return buffer;
1426 }
1427
1428
1429
1430 /*************************************************
1431 * Compile a single pattern *
1432 *************************************************/
1433
1434 /* When the -F option has been used, this is called for each substring.
1435 Otherwise it's called for each supplied pattern.
1436
1437 Arguments:
1438 pattern the pattern string
1439 options the PCRE options
1440 filename the file name, or NULL for a command-line pattern
1441 count 0 if this is the only command line pattern, or
1442 number of the command line pattern, or
1443 linenumber for a pattern from a file
1444
1445 Returns: TRUE on success, FALSE after an error
1446 */
1447
1448 static BOOL
1449 compile_single_pattern(char *pattern, int options, char *filename, int count)
1450 {
1451 char buffer[MBUFTHIRD + 16];
1452 const char *error;
1453 int errptr;
1454
1455 if (pattern_count >= MAX_PATTERN_COUNT)
1456 {
1457 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1458 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1459 return FALSE;
1460 }
1461
1462 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1463 suffix[process_options]);
1464 pattern_list[pattern_count] =
1465 pcre_compile(buffer, options, &error, &errptr, pcretables);
1466 if (pattern_list[pattern_count] != NULL)
1467 {
1468 pattern_count++;
1469 return TRUE;
1470 }
1471
1472 /* Handle compile errors */
1473
1474 errptr -= (int)strlen(prefix[process_options]);
1475 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1476
1477 if (filename == NULL)
1478 {
1479 if (count == 0)
1480 fprintf(stderr, "pcregrep: Error in command-line regex "
1481 "at offset %d: %s\n", errptr, error);
1482 else
1483 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1484 "at offset %d: %s\n", ordin(count), errptr, error);
1485 }
1486 else
1487 {
1488 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1489 "at offset %d: %s\n", count, filename, errptr, error);
1490 }
1491
1492 return FALSE;
1493 }
1494
1495
1496
1497 /*************************************************
1498 * Compile one supplied pattern *
1499 *************************************************/
1500
1501 /* When the -F option has been used, each string may be a list of strings,
1502 separated by line breaks. They will be matched literally.
1503
1504 Arguments:
1505 pattern the pattern string
1506 options the PCRE options
1507 filename the file name, or NULL for a command-line pattern
1508 count 0 if this is the only command line pattern, or
1509 number of the command line pattern, or
1510 linenumber for a pattern from a file
1511
1512 Returns: TRUE on success, FALSE after an error
1513 */
1514
1515 static BOOL
1516 compile_pattern(char *pattern, int options, char *filename, int count)
1517 {
1518 if ((process_options & PO_FIXED_STRINGS) != 0)
1519 {
1520 char *eop = pattern + strlen(pattern);
1521 char buffer[MBUFTHIRD];
1522 for(;;)
1523 {
1524 int ellength;
1525 char *p = end_of_line(pattern, eop, &ellength);
1526 if (ellength == 0)
1527 return compile_single_pattern(pattern, options, filename, count);
1528 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1529 pattern = p;
1530 if (!compile_single_pattern(buffer, options, filename, count))
1531 return FALSE;
1532 }
1533 }
1534 else return compile_single_pattern(pattern, options, filename, count);
1535 }
1536
1537
1538
1539 /*************************************************
1540 * Main program *
1541 *************************************************/
1542
1543 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1544
1545 int
1546 main(int argc, char **argv)
1547 {
1548 int i, j;
1549 int rc = 1;
1550 int pcre_options = 0;
1551 int cmd_pattern_count = 0;
1552 int hint_count = 0;
1553 int errptr;
1554 BOOL only_one_at_top;
1555 char *patterns[MAX_PATTERN_COUNT];
1556 const char *locale_from = "--locale";
1557 const char *error;
1558
1559 /* Set the default line ending value from the default in the PCRE library;
1560 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1561 */
1562
1563 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1564 switch(i)
1565 {
1566 default: newline = (char *)"lf"; break;
1567 case '\r': newline = (char *)"cr"; break;
1568 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1569 case -1: newline = (char *)"any"; break;
1570 case -2: newline = (char *)"anycrlf"; break;
1571 }
1572
1573 /* Process the options */
1574
1575 for (i = 1; i < argc; i++)
1576 {
1577 option_item *op = NULL;
1578 char *option_data = (char *)""; /* default to keep compiler happy */
1579 BOOL longop;
1580 BOOL longopwasequals = FALSE;
1581
1582 if (argv[i][0] != '-') break;
1583
1584 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1585 but only if we have previously had -e or -f to define the patterns. */
1586
1587 if (argv[i][1] == 0)
1588 {
1589 if (pattern_filename != NULL || pattern_count > 0) break;
1590 else exit(usage(2));
1591 }
1592
1593 /* Handle a long name option, or -- to terminate the options */
1594
1595 if (argv[i][1] == '-')
1596 {
1597 char *arg = argv[i] + 2;
1598 char *argequals = strchr(arg, '=');
1599
1600 if (*arg == 0) /* -- terminates options */
1601 {
1602 i++;
1603 break; /* out of the options-handling loop */
1604 }
1605
1606 longop = TRUE;
1607
1608 /* Some long options have data that follows after =, for example file=name.
1609 Some options have variations in the long name spelling: specifically, we
1610 allow "regexp" because GNU grep allows it, though I personally go along
1611 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1612 These options are entered in the table as "regex(p)". No option is in both
1613 these categories, fortunately. */
1614
1615 for (op = optionlist; op->one_char != 0; op++)
1616 {
1617 char *opbra = strchr(op->long_name, '(');
1618 char *equals = strchr(op->long_name, '=');
1619 if (opbra == NULL) /* Not a (p) case */
1620 {
1621 if (equals == NULL) /* Not thing=data case */
1622 {
1623 if (strcmp(arg, op->long_name) == 0) break;
1624 }
1625 else /* Special case xxx=data */
1626 {
1627 int oplen = equals - op->long_name;
1628 int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1629 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1630 {
1631 option_data = arg + arglen;
1632 if (*option_data == '=')
1633 {
1634 option_data++;
1635 longopwasequals = TRUE;
1636 }
1637 break;
1638 }
1639 }
1640 }
1641 else /* Special case xxxx(p) */
1642 {
1643 char buff1[24];
1644 char buff2[24];
1645 int baselen = opbra - op->long_name;
1646 sprintf(buff1, "%.*s", baselen, op->long_name);
1647 sprintf(buff2, "%s%.*s", buff1,
1648 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1649 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1650 break;
1651 }
1652 }
1653
1654 if (op->one_char == 0)
1655 {
1656 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1657 exit(usage(2));
1658 }
1659 }
1660
1661
1662 /* Jeffrey Friedl's debugging harness uses these additional options which
1663 are not in the right form for putting in the option table because they use
1664 only one hyphen, yet are more than one character long. By putting them
1665 separately here, they will not get displayed as part of the help() output,
1666 but I don't think Jeffrey will care about that. */
1667
1668 #ifdef JFRIEDL_DEBUG
1669 else if (strcmp(argv[i], "-pre") == 0) {
1670 jfriedl_prefix = argv[++i];
1671 continue;
1672 } else if (strcmp(argv[i], "-post") == 0) {
1673 jfriedl_postfix = argv[++i];
1674 continue;
1675 } else if (strcmp(argv[i], "-XT") == 0) {
1676 sscanf(argv[++i], "%d", &jfriedl_XT);
1677 continue;
1678 } else if (strcmp(argv[i], "-XR") == 0) {
1679 sscanf(argv[++i], "%d", &jfriedl_XR);
1680 continue;
1681 }
1682 #endif
1683
1684
1685 /* One-char options; many that have no data may be in a single argument; we
1686 continue till we hit the last one or one that needs data. */
1687
1688 else
1689 {
1690 char *s = argv[i] + 1;
1691 longop = FALSE;
1692 while (*s != 0)
1693 {
1694 for (op = optionlist; op->one_char != 0; op++)
1695 { if (*s == op->one_char) break; }
1696 if (op->one_char == 0)
1697 {
1698 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1699 *s, argv[i]);
1700 exit(usage(2));
1701 }
1702 if (op->type != OP_NODATA || s[1] == 0)
1703 {
1704 option_data = s+1;
1705 break;
1706 }
1707 pcre_options = handle_option(*s++, pcre_options);
1708 }
1709 }
1710
1711 /* At this point we should have op pointing to a matched option. If the type
1712 is NO_DATA, it means that there is no data, and the option might set
1713 something in the PCRE options. */
1714
1715 if (op->type == OP_NODATA)
1716 {
1717 pcre_options = handle_option(op->one_char, pcre_options);
1718 continue;
1719 }
1720
1721 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1722 either has a value or defaults to something. It cannot have data in a
1723 separate item. At the moment, the only such options are "colo(u)r" and
1724 Jeffrey Friedl's special -S debugging option. */
1725
1726 if (*option_data == 0 &&
1727 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1728 {
1729 switch (op->one_char)
1730 {
1731 case N_COLOUR:
1732 colour_option = (char *)"auto";
1733 break;
1734 #ifdef JFRIEDL_DEBUG
1735 case 'S':
1736 S_arg = 0;
1737 break;
1738 #endif
1739 }
1740 continue;
1741 }
1742
1743 /* Otherwise, find the data string for the option. */
1744
1745 if (*option_data == 0)
1746 {
1747 if (i >= argc - 1 || longopwasequals)
1748 {
1749 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1750 exit(usage(2));
1751 }
1752 option_data = argv[++i];
1753 }
1754
1755 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1756 multiple times to create a list of patterns. */
1757
1758 if (op->type == OP_PATLIST)
1759 {
1760 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1761 {
1762 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1763 MAX_PATTERN_COUNT);
1764 return 2;
1765 }
1766 patterns[cmd_pattern_count++] = option_data;
1767 }
1768
1769 /* Otherwise, deal with single string or numeric data values. */
1770
1771 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1772 {
1773 *((char **)op->dataptr) = option_data;
1774 }
1775 else
1776 {
1777 char *endptr;
1778 int n = strtoul(option_data, &endptr, 10);
1779 if (*endptr != 0)
1780 {
1781 if (longop)
1782 {
1783 char *equals = strchr(op->long_name, '=');
1784 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1785 equals - op->long_name;
1786 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1787 option_data, nlen, op->long_name);
1788 }
1789 else
1790 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1791 option_data, op->one_char);
1792 exit(usage(2));
1793 }
1794 *((int *)op->dataptr) = n;
1795 }
1796 }
1797
1798 /* Options have been decoded. If -C was used, its value is used as a default
1799 for -A and -B. */
1800
1801 if (both_context > 0)
1802 {
1803 if (after_context == 0) after_context = both_context;
1804 if (before_context == 0) before_context = both_context;
1805 }
1806
1807 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1808 LC_ALL environment variable is set, and if so, use it. */
1809
1810 if (locale == NULL)
1811 {
1812 locale = getenv("LC_ALL");
1813 locale_from = "LCC_ALL";
1814 }
1815
1816 if (locale == NULL)
1817 {
1818 locale = getenv("LC_CTYPE");
1819 locale_from = "LC_CTYPE";
1820 }
1821
1822 /* If a locale has been provided, set it, and generate the tables the PCRE
1823 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1824
1825 if (locale != NULL)
1826 {
1827 if (setlocale(LC_CTYPE, locale) == NULL)
1828 {
1829 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1830 locale, locale_from);
1831 return 2;
1832 }
1833 pcretables = pcre_maketables();
1834 }
1835
1836 /* Sort out colouring */
1837
1838 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1839 {
1840 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1841 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1842 else
1843 {
1844 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1845 colour_option);
1846 return 2;
1847 }
1848 if (do_colour)
1849 {
1850 char *cs = getenv("PCREGREP_COLOUR");
1851 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1852 if (cs != NULL) colour_string = cs;
1853 }
1854 }
1855
1856 /* Interpret the newline type; the default settings are Unix-like. */
1857
1858 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1859 {
1860 pcre_options |= PCRE_NEWLINE_CR;
1861 endlinetype = EL_CR;
1862 }
1863 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1864 {
1865 pcre_options |= PCRE_NEWLINE_LF;
1866 endlinetype = EL_LF;
1867 }
1868 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1869 {
1870 pcre_options |= PCRE_NEWLINE_CRLF;
1871 endlinetype = EL_CRLF;
1872 }
1873 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1874 {
1875 pcre_options |= PCRE_NEWLINE_ANY;
1876 endlinetype = EL_ANY;
1877 }
1878 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1879 {
1880 pcre_options |= PCRE_NEWLINE_ANYCRLF;
1881 endlinetype = EL_ANYCRLF;
1882 }
1883 else
1884 {
1885 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1886 return 2;
1887 }
1888
1889 /* Interpret the text values for -d and -D */
1890
1891 if (dee_option != NULL)
1892 {
1893 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1894 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1895 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1896 else
1897 {
1898 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1899 return 2;
1900 }
1901 }
1902
1903 if (DEE_option != NULL)
1904 {
1905 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1906 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1907 else
1908 {
1909 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1910 return 2;
1911 }
1912 }
1913
1914 /* Check the values for Jeffrey Friedl's debugging options. */
1915
1916 #ifdef JFRIEDL_DEBUG
1917 if (S_arg > 9)
1918 {
1919 fprintf(stderr, "pcregrep: bad value for -S option\n");
1920 return 2;
1921 }
1922 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1923 {
1924 if (jfriedl_XT == 0) jfriedl_XT = 1;
1925 if (jfriedl_XR == 0) jfriedl_XR = 1;
1926 }
1927 #endif
1928
1929 /* Get memory to store the pattern and hints lists. */
1930
1931 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1932 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1933
1934 if (pattern_list == NULL || hints_list == NULL)
1935 {
1936 fprintf(stderr, "pcregrep: malloc failed\n");
1937 goto EXIT2;
1938 }
1939
1940 /* If no patterns were provided by -e, and there is no file provided by -f,
1941 the first argument is the one and only pattern, and it must exist. */
1942
1943 if (cmd_pattern_count == 0 && pattern_filename == NULL)
1944 {
1945 if (i >= argc) return usage(2);
1946 patterns[cmd_pattern_count++] = argv[i++];
1947 }
1948
1949 /* Compile the patterns that were provided on the command line, either by
1950 multiple uses of -e or as a single unkeyed pattern. */
1951
1952 for (j = 0; j < cmd_pattern_count; j++)
1953 {
1954 if (!compile_pattern(patterns[j], pcre_options, NULL,
1955 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1956 goto EXIT2;
1957 }
1958
1959 /* Compile the regular expressions that are provided in a file. */
1960
1961 if (pattern_filename != NULL)
1962 {
1963 int linenumber = 0;
1964 FILE *f;
1965 char *filename;
1966 char buffer[MBUFTHIRD];
1967
1968 if (strcmp(pattern_filename, "-") == 0)
1969 {
1970 f = stdin;
1971 filename = stdin_name;
1972 }
1973 else
1974 {
1975 f = fopen(pattern_filename, "r");
1976 if (f == NULL)
1977 {
1978 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1979 strerror(errno));
1980 goto EXIT2;
1981 }
1982 filename = pattern_filename;
1983 }
1984
1985 while (fgets(buffer, MBUFTHIRD, f) != NULL)
1986 {
1987 char *s = buffer + (int)strlen(buffer);
1988 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1989 *s = 0;
1990 linenumber++;
1991 if (buffer[0] == 0) continue; /* Skip blank lines */
1992 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1993 goto EXIT2;
1994 }
1995
1996 if (f != stdin) fclose(f);
1997 }
1998
1999 /* Study the regular expressions, as we will be running them many times */
2000
2001 for (j = 0; j < pattern_count; j++)
2002 {
2003 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2004 if (error != NULL)
2005 {
2006 char s[16];
2007 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2008 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2009 goto EXIT2;
2010 }
2011 hint_count++;
2012 }
2013
2014 /* If there are include or exclude patterns, compile them. */
2015
2016 if (exclude_pattern != NULL)
2017 {
2018 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2019 pcretables);
2020 if (exclude_compiled == NULL)
2021 {
2022 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2023 errptr, error);
2024 goto EXIT2;
2025 }
2026 }
2027
2028 if (include_pattern != NULL)
2029 {
2030 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2031 pcretables);
2032 if (include_compiled == NULL)
2033 {
2034 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2035 errptr, error);
2036 goto EXIT2;
2037 }
2038 }
2039
2040 /* If there are no further arguments, do the business on stdin and exit. */
2041
2042 if (i >= argc)
2043 {
2044 rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2045 goto EXIT;
2046 }
2047
2048 /* Otherwise, work through the remaining arguments as files or directories.
2049 Pass in the fact that there is only one argument at top level - this suppresses
2050 the file name if the argument is not a directory and filenames are not
2051 otherwise forced. */
2052
2053 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2054
2055 for (; i < argc; i++)
2056 {
2057 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2058 only_one_at_top);
2059 if (frc > 1) rc = frc;
2060 else if (frc == 0 && rc == 1) rc = 0;
2061 }
2062
2063 EXIT:
2064 if (pattern_list != NULL)
2065 {
2066 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2067 free(pattern_list);
2068 }
2069 if (hints_list != NULL)
2070 {
2071 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2072 free(hints_list);
2073 }
2074 return rc;
2075
2076 EXIT2:
2077 rc = 2;
2078 goto EXIT;
2079 }
2080
2081 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12