/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 243 - (show annotations) (download)
Thu Sep 13 09:28:14 2007 UTC (7 years ago) by ph10
File MIME type: text/plain
File size: 59494 byte(s)
Detrailed files for 7.4-RC1 test release.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #include "pcre.h"
59
60 #define FALSE 0
61 #define TRUE 1
62
63 typedef int BOOL;
64
65 #define MAX_PATTERN_COUNT 100
66
67 #if BUFSIZ > 8192
68 #define MBUFTHIRD BUFSIZ
69 #else
70 #define MBUFTHIRD 8192
71 #endif
72
73 /* Values for the "filenames" variable, which specifies options for file name
74 output. The order is important; it is assumed that a file name is wanted for
75 all values greater than FN_DEFAULT. */
76
77 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78
79 /* Actions for the -d and -D options */
80
81 enum { dee_READ, dee_SKIP, dee_RECURSE };
82 enum { DEE_READ, DEE_SKIP };
83
84 /* Actions for special processing options (flag bits) */
85
86 #define PO_WORD_MATCH 0x0001
87 #define PO_LINE_MATCH 0x0002
88 #define PO_FIXED_STRINGS 0x0004
89
90 /* Line ending types */
91
92 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93
94
95
96 /*************************************************
97 * Global variables *
98 *************************************************/
99
100 /* Jeffrey Friedl has some debugging requirements that are not part of the
101 regular code. */
102
103 #ifdef JFRIEDL_DEBUG
104 static int S_arg = -1;
105 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107 static const char *jfriedl_prefix = "";
108 static const char *jfriedl_postfix = "";
109 #endif
110
111 static int endlinetype;
112
113 static char *colour_string = (char *)"1;31";
114 static char *colour_option = NULL;
115 static char *dee_option = NULL;
116 static char *DEE_option = NULL;
117 static char *newline = NULL;
118 static char *pattern_filename = NULL;
119 static char *stdin_name = (char *)"(standard input)";
120 static char *locale = NULL;
121
122 static const unsigned char *pcretables = NULL;
123
124 static int pattern_count = 0;
125 static pcre **pattern_list = NULL;
126 static pcre_extra **hints_list = NULL;
127
128 static char *include_pattern = NULL;
129 static char *exclude_pattern = NULL;
130
131 static pcre *include_compiled = NULL;
132 static pcre *exclude_compiled = NULL;
133
134 static int after_context = 0;
135 static int before_context = 0;
136 static int both_context = 0;
137 static int dee_action = dee_READ;
138 static int DEE_action = DEE_READ;
139 static int error_count = 0;
140 static int filenames = FN_DEFAULT;
141 static int process_options = 0;
142
143 static BOOL count_only = FALSE;
144 static BOOL do_colour = FALSE;
145 static BOOL hyphenpending = FALSE;
146 static BOOL invert = FALSE;
147 static BOOL multiline = FALSE;
148 static BOOL number = FALSE;
149 static BOOL only_matching = FALSE;
150 static BOOL quiet = FALSE;
151 static BOOL silent = FALSE;
152 static BOOL utf8 = FALSE;
153
154 /* Structure for options and list of them */
155
156 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
157 OP_PATLIST };
158
159 typedef struct option_item {
160 int type;
161 int one_char;
162 void *dataptr;
163 const char *long_name;
164 const char *help_text;
165 } option_item;
166
167 /* Options without a single-letter equivalent get a negative value. This can be
168 used to identify them. */
169
170 #define N_COLOUR (-1)
171 #define N_EXCLUDE (-2)
172 #define N_HELP (-3)
173 #define N_INCLUDE (-4)
174 #define N_LABEL (-5)
175 #define N_LOCALE (-6)
176 #define N_NULL (-7)
177
178 static option_item optionlist[] = {
179 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
180 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
181 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
182 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
183 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
184 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
185 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
186 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
187 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
188 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
189 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
190 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
191 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
192 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
193 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
194 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
195 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
196 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
197 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
198 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
199 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
200 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
202 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
203 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
204 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
205 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
206 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
207 #ifdef JFRIEDL_DEBUG
208 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
209 #endif
210 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
211 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
212 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
213 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
214 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
215 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
216 { OP_NODATA, 0, NULL, NULL, NULL }
217 };
218
219 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
220 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
221 that the combination of -w and -x has the same effect as -x on its own, so we
222 can treat them as the same. */
223
224 static const char *prefix[] = {
225 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
226
227 static const char *suffix[] = {
228 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
229
230 /* UTF-8 tables - used only when the newline setting is "any". */
231
232 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233
234 const char utf8_table4[] = {
235 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
237 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
238 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
239
240
241
242 /*************************************************
243 * OS-specific functions *
244 *************************************************/
245
246 /* These functions are defined so that they can be made system specific,
247 although at present the only ones are for Unix, Win32, and for "no support". */
248
249
250 /************* Directory scanning in Unix ***********/
251
252 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253 #include <sys/types.h>
254 #include <sys/stat.h>
255 #include <dirent.h>
256
257 typedef DIR directory_type;
258
259 static int
260 isdirectory(char *filename)
261 {
262 struct stat statbuf;
263 if (stat(filename, &statbuf) < 0)
264 return 0; /* In the expectation that opening as a file will fail */
265 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
266 }
267
268 static directory_type *
269 opendirectory(char *filename)
270 {
271 return opendir(filename);
272 }
273
274 static char *
275 readdirectory(directory_type *dir)
276 {
277 for (;;)
278 {
279 struct dirent *dent = readdir(dir);
280 if (dent == NULL) return NULL;
281 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282 return dent->d_name;
283 }
284 /* Control never reaches here */
285 }
286
287 static void
288 closedirectory(directory_type *dir)
289 {
290 closedir(dir);
291 }
292
293
294 /************* Test for regular file in Unix **********/
295
296 static int
297 isregfile(char *filename)
298 {
299 struct stat statbuf;
300 if (stat(filename, &statbuf) < 0)
301 return 1; /* In the expectation that opening as a file will fail */
302 return (statbuf.st_mode & S_IFMT) == S_IFREG;
303 }
304
305
306 /************* Test stdout for being a terminal in Unix **********/
307
308 static BOOL
309 is_stdout_tty(void)
310 {
311 return isatty(fileno(stdout));
312 }
313
314
315 /************* Directory scanning in Win32 ***********/
316
317 /* I (Philip Hazel) have no means of testing this code. It was contributed by
318 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
319 when it did not exist. */
320
321
322 #elif HAVE_WINDOWS_H
323
324 #ifndef STRICT
325 # define STRICT
326 #endif
327 #ifndef WIN32_LEAN_AND_MEAN
328 # define WIN32_LEAN_AND_MEAN
329 #endif
330 #ifndef INVALID_FILE_ATTRIBUTES
331 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
332 #endif
333
334 #include <windows.h>
335
336 typedef struct directory_type
337 {
338 HANDLE handle;
339 BOOL first;
340 WIN32_FIND_DATA data;
341 } directory_type;
342
343 int
344 isdirectory(char *filename)
345 {
346 DWORD attr = GetFileAttributes(filename);
347 if (attr == INVALID_FILE_ATTRIBUTES)
348 return 0;
349 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
350 }
351
352 directory_type *
353 opendirectory(char *filename)
354 {
355 size_t len;
356 char *pattern;
357 directory_type *dir;
358 DWORD err;
359 len = strlen(filename);
360 pattern = (char *) malloc(len + 3);
361 dir = (directory_type *) malloc(sizeof(*dir));
362 if ((pattern == NULL) || (dir == NULL))
363 {
364 fprintf(stderr, "pcregrep: malloc failed\n");
365 exit(2);
366 }
367 memcpy(pattern, filename, len);
368 memcpy(&(pattern[len]), "\\*", 3);
369 dir->handle = FindFirstFile(pattern, &(dir->data));
370 if (dir->handle != INVALID_HANDLE_VALUE)
371 {
372 free(pattern);
373 dir->first = TRUE;
374 return dir;
375 }
376 err = GetLastError();
377 free(pattern);
378 free(dir);
379 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
380 return NULL;
381 }
382
383 char *
384 readdirectory(directory_type *dir)
385 {
386 for (;;)
387 {
388 if (!dir->first)
389 {
390 if (!FindNextFile(dir->handle, &(dir->data)))
391 return NULL;
392 }
393 else
394 {
395 dir->first = FALSE;
396 }
397 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
398 return dir->data.cFileName;
399 }
400 #ifndef _MSC_VER
401 return NULL; /* Keep compiler happy; never executed */
402 #endif
403 }
404
405 void
406 closedirectory(directory_type *dir)
407 {
408 FindClose(dir->handle);
409 free(dir);
410 }
411
412
413 /************* Test for regular file in Win32 **********/
414
415 /* I don't know how to do this, or if it can be done; assume all paths are
416 regular if they are not directories. */
417
418 int isregfile(char *filename)
419 {
420 return !isdirectory(filename)
421 }
422
423
424 /************* Test stdout for being a terminal in Win32 **********/
425
426 /* I don't know how to do this; assume never */
427
428 static BOOL
429 is_stdout_tty(void)
430 {
431 FALSE;
432 }
433
434
435 /************* Directory scanning when we can't do it ***********/
436
437 /* The type is void, and apart from isdirectory(), the functions do nothing. */
438
439 #else
440
441 typedef void directory_type;
442
443 int isdirectory(char *filename) { return 0; }
444 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445 char *readdirectory(directory_type *dir) { return (char*)0;}
446 void closedirectory(directory_type *dir) {}
447
448
449 /************* Test for regular when we can't do it **********/
450
451 /* Assume all files are regular. */
452
453 int isregfile(char *filename) { return 1; }
454
455
456 /************* Test stdout for being a terminal when we can't do it **********/
457
458 static BOOL
459 is_stdout_tty(void)
460 {
461 return FALSE;
462 }
463
464
465 #endif
466
467
468
469 #ifndef HAVE_STRERROR
470 /*************************************************
471 * Provide strerror() for non-ANSI libraries *
472 *************************************************/
473
474 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
475 in their libraries, but can provide the same facility by this simple
476 alternative function. */
477
478 extern int sys_nerr;
479 extern char *sys_errlist[];
480
481 char *
482 strerror(int n)
483 {
484 if (n < 0 || n >= sys_nerr) return "unknown error number";
485 return sys_errlist[n];
486 }
487 #endif /* HAVE_STRERROR */
488
489
490
491 /*************************************************
492 * Find end of line *
493 *************************************************/
494
495 /* The length of the endline sequence that is found is set via lenptr. This may
496 be zero at the very end of the file if there is no line-ending sequence there.
497
498 Arguments:
499 p current position in line
500 endptr end of available data
501 lenptr where to put the length of the eol sequence
502
503 Returns: pointer to the last byte of the line
504 */
505
506 static char *
507 end_of_line(char *p, char *endptr, int *lenptr)
508 {
509 switch(endlinetype)
510 {
511 default: /* Just in case */
512 case EL_LF:
513 while (p < endptr && *p != '\n') p++;
514 if (p < endptr)
515 {
516 *lenptr = 1;
517 return p + 1;
518 }
519 *lenptr = 0;
520 return endptr;
521
522 case EL_CR:
523 while (p < endptr && *p != '\r') p++;
524 if (p < endptr)
525 {
526 *lenptr = 1;
527 return p + 1;
528 }
529 *lenptr = 0;
530 return endptr;
531
532 case EL_CRLF:
533 for (;;)
534 {
535 while (p < endptr && *p != '\r') p++;
536 if (++p >= endptr)
537 {
538 *lenptr = 0;
539 return endptr;
540 }
541 if (*p == '\n')
542 {
543 *lenptr = 2;
544 return p + 1;
545 }
546 }
547 break;
548
549 case EL_ANYCRLF:
550 while (p < endptr)
551 {
552 int extra = 0;
553 register int c = *((unsigned char *)p);
554
555 if (utf8 && c >= 0xc0)
556 {
557 int gcii, gcss;
558 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
559 gcss = 6*extra;
560 c = (c & utf8_table3[extra]) << gcss;
561 for (gcii = 1; gcii <= extra; gcii++)
562 {
563 gcss -= 6;
564 c |= (p[gcii] & 0x3f) << gcss;
565 }
566 }
567
568 p += 1 + extra;
569
570 switch (c)
571 {
572 case 0x0a: /* LF */
573 *lenptr = 1;
574 return p;
575
576 case 0x0d: /* CR */
577 if (p < endptr && *p == 0x0a)
578 {
579 *lenptr = 2;
580 p++;
581 }
582 else *lenptr = 1;
583 return p;
584
585 default:
586 break;
587 }
588 } /* End of loop for ANYCRLF case */
589
590 *lenptr = 0; /* Must have hit the end */
591 return endptr;
592
593 case EL_ANY:
594 while (p < endptr)
595 {
596 int extra = 0;
597 register int c = *((unsigned char *)p);
598
599 if (utf8 && c >= 0xc0)
600 {
601 int gcii, gcss;
602 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
603 gcss = 6*extra;
604 c = (c & utf8_table3[extra]) << gcss;
605 for (gcii = 1; gcii <= extra; gcii++)
606 {
607 gcss -= 6;
608 c |= (p[gcii] & 0x3f) << gcss;
609 }
610 }
611
612 p += 1 + extra;
613
614 switch (c)
615 {
616 case 0x0a: /* LF */
617 case 0x0b: /* VT */
618 case 0x0c: /* FF */
619 *lenptr = 1;
620 return p;
621
622 case 0x0d: /* CR */
623 if (p < endptr && *p == 0x0a)
624 {
625 *lenptr = 2;
626 p++;
627 }
628 else *lenptr = 1;
629 return p;
630
631 case 0x85: /* NEL */
632 *lenptr = utf8? 2 : 1;
633 return p;
634
635 case 0x2028: /* LS */
636 case 0x2029: /* PS */
637 *lenptr = 3;
638 return p;
639
640 default:
641 break;
642 }
643 } /* End of loop for ANY case */
644
645 *lenptr = 0; /* Must have hit the end */
646 return endptr;
647 } /* End of overall switch */
648 }
649
650
651
652 /*************************************************
653 * Find start of previous line *
654 *************************************************/
655
656 /* This is called when looking back for before lines to print.
657
658 Arguments:
659 p start of the subsequent line
660 startptr start of available data
661
662 Returns: pointer to the start of the previous line
663 */
664
665 static char *
666 previous_line(char *p, char *startptr)
667 {
668 switch(endlinetype)
669 {
670 default: /* Just in case */
671 case EL_LF:
672 p--;
673 while (p > startptr && p[-1] != '\n') p--;
674 return p;
675
676 case EL_CR:
677 p--;
678 while (p > startptr && p[-1] != '\n') p--;
679 return p;
680
681 case EL_CRLF:
682 for (;;)
683 {
684 p -= 2;
685 while (p > startptr && p[-1] != '\n') p--;
686 if (p <= startptr + 1 || p[-2] == '\r') return p;
687 }
688 return p; /* But control should never get here */
689
690 case EL_ANY:
691 case EL_ANYCRLF:
692 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693 if (utf8) while ((*p & 0xc0) == 0x80) p--;
694
695 while (p > startptr)
696 {
697 register int c;
698 char *pp = p - 1;
699
700 if (utf8)
701 {
702 int extra = 0;
703 while ((*pp & 0xc0) == 0x80) pp--;
704 c = *((unsigned char *)pp);
705 if (c >= 0xc0)
706 {
707 int gcii, gcss;
708 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
709 gcss = 6*extra;
710 c = (c & utf8_table3[extra]) << gcss;
711 for (gcii = 1; gcii <= extra; gcii++)
712 {
713 gcss -= 6;
714 c |= (pp[gcii] & 0x3f) << gcss;
715 }
716 }
717 }
718 else c = *((unsigned char *)pp);
719
720 if (endlinetype == EL_ANYCRLF) switch (c)
721 {
722 case 0x0a: /* LF */
723 case 0x0d: /* CR */
724 return p;
725
726 default:
727 break;
728 }
729
730 else switch (c)
731 {
732 case 0x0a: /* LF */
733 case 0x0b: /* VT */
734 case 0x0c: /* FF */
735 case 0x0d: /* CR */
736 case 0x85: /* NEL */
737 case 0x2028: /* LS */
738 case 0x2029: /* PS */
739 return p;
740
741 default:
742 break;
743 }
744
745 p = pp; /* Back one character */
746 } /* End of loop for ANY case */
747
748 return startptr; /* Hit start of data */
749 } /* End of overall switch */
750 }
751
752
753
754
755
756 /*************************************************
757 * Print the previous "after" lines *
758 *************************************************/
759
760 /* This is called if we are about to lose said lines because of buffer filling,
761 and at the end of the file. The data in the line is written using fwrite() so
762 that a binary zero does not terminate it.
763
764 Arguments:
765 lastmatchnumber the number of the last matching line, plus one
766 lastmatchrestart where we restarted after the last match
767 endptr end of available data
768 printname filename for printing
769
770 Returns: nothing
771 */
772
773 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
774 char *endptr, char *printname)
775 {
776 if (after_context > 0 && lastmatchnumber > 0)
777 {
778 int count = 0;
779 while (lastmatchrestart < endptr && count++ < after_context)
780 {
781 int ellength;
782 char *pp = lastmatchrestart;
783 if (printname != NULL) fprintf(stdout, "%s-", printname);
784 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
785 pp = end_of_line(pp, endptr, &ellength);
786 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
787 lastmatchrestart = pp;
788 }
789 hyphenpending = TRUE;
790 }
791 }
792
793
794
795 /*************************************************
796 * Grep an individual file *
797 *************************************************/
798
799 /* This is called from grep_or_recurse() below. It uses a buffer that is three
800 times the value of MBUFTHIRD. The matching point is never allowed to stray into
801 the top third of the buffer, thus keeping more of the file available for
802 context printing or for multiline scanning. For large files, the pointer will
803 be in the middle third most of the time, so the bottom third is available for
804 "before" context printing.
805
806 Arguments:
807 in the fopened FILE stream
808 printname the file name if it is to be printed for each match
809 or NULL if the file name is not to be printed
810 it cannot be NULL if filenames[_nomatch]_only is set
811
812 Returns: 0 if there was at least one match
813 1 otherwise (no matches)
814 */
815
816 static int
817 pcregrep(FILE *in, char *printname)
818 {
819 int rc = 1;
820 int linenumber = 1;
821 int lastmatchnumber = 0;
822 int count = 0;
823 int offsets[99];
824 char *lastmatchrestart = NULL;
825 char buffer[3*MBUFTHIRD];
826 char *ptr = buffer;
827 char *endptr;
828 size_t bufflength;
829 BOOL endhyphenpending = FALSE;
830
831 /* Do the first read into the start of the buffer and set up the pointer to
832 end of what we have. */
833
834 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
835 endptr = buffer + bufflength;
836
837 /* Loop while the current pointer is not at the end of the file. For large
838 files, endptr will be at the end of the buffer when we are in the middle of the
839 file, but ptr will never get there, because as soon as it gets over 2/3 of the
840 way, the buffer is shifted left and re-filled. */
841
842 while (ptr < endptr)
843 {
844 int i, endlinelength;
845 int mrc = 0;
846 BOOL match = FALSE;
847 char *t = ptr;
848 size_t length, linelength;
849
850 /* At this point, ptr is at the start of a line. We need to find the length
851 of the subject string to pass to pcre_exec(). In multiline mode, it is the
852 length remainder of the data in the buffer. Otherwise, it is the length of
853 the next line. After matching, we always advance by the length of the next
854 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
855 that any match is constrained to be in the first line. */
856
857 t = end_of_line(t, endptr, &endlinelength);
858 linelength = t - ptr - endlinelength;
859 length = multiline? (size_t)(endptr - ptr) : linelength;
860
861 /* Extra processing for Jeffrey Friedl's debugging. */
862
863 #ifdef JFRIEDL_DEBUG
864 if (jfriedl_XT || jfriedl_XR)
865 {
866 #include <sys/time.h>
867 #include <time.h>
868 struct timeval start_time, end_time;
869 struct timezone dummy;
870
871 if (jfriedl_XT)
872 {
873 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
874 const char *orig = ptr;
875 ptr = malloc(newlen + 1);
876 if (!ptr) {
877 printf("out of memory");
878 exit(2);
879 }
880 endptr = ptr;
881 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
882 for (i = 0; i < jfriedl_XT; i++) {
883 strncpy(endptr, orig, length);
884 endptr += length;
885 }
886 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
887 length = newlen;
888 }
889
890 if (gettimeofday(&start_time, &dummy) != 0)
891 perror("bad gettimeofday");
892
893
894 for (i = 0; i < jfriedl_XR; i++)
895 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
896
897 if (gettimeofday(&end_time, &dummy) != 0)
898 perror("bad gettimeofday");
899
900 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
901 -
902 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
903
904 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
905 return 0;
906 }
907 #endif
908
909
910 /* Run through all the patterns until one matches. Note that we don't include
911 the final newline in the subject string. */
912
913 for (i = 0; i < pattern_count; i++)
914 {
915 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
916 offsets, 99);
917 if (mrc >= 0) { match = TRUE; break; }
918 if (mrc != PCRE_ERROR_NOMATCH)
919 {
920 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
921 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
922 fprintf(stderr, "this line:\n");
923 fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
924 fprintf(stderr, "\n");
925 if (error_count == 0 &&
926 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
927 {
928 fprintf(stderr, "pcregrep: error %d means that a resource limit "
929 "was exceeded\n", mrc);
930 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
931 }
932 if (error_count++ > 20)
933 {
934 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
935 exit(2);
936 }
937 match = invert; /* No more matching; don't show the line again */
938 break;
939 }
940 }
941
942 /* If it's a match or a not-match (as required), do what's wanted. */
943
944 if (match != invert)
945 {
946 BOOL hyphenprinted = FALSE;
947
948 /* We've failed if we want a file that doesn't have any matches. */
949
950 if (filenames == FN_NOMATCH_ONLY) return 1;
951
952 /* Just count if just counting is wanted. */
953
954 if (count_only) count++;
955
956 /* If all we want is a file name, there is no need to scan any more lines
957 in the file. */
958
959 else if (filenames == FN_ONLY)
960 {
961 fprintf(stdout, "%s\n", printname);
962 return 0;
963 }
964
965 /* Likewise, if all we want is a yes/no answer. */
966
967 else if (quiet) return 0;
968
969 /* The --only-matching option prints just the substring that matched, and
970 does not pring any context. */
971
972 else if (only_matching)
973 {
974 if (printname != NULL) fprintf(stdout, "%s:", printname);
975 if (number) fprintf(stdout, "%d:", linenumber);
976 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
977 fprintf(stdout, "\n");
978 }
979
980 /* This is the default case when none of the above options is set. We print
981 the matching lines(s), possibly preceded and/or followed by other lines of
982 context. */
983
984 else
985 {
986 /* See if there is a requirement to print some "after" lines from a
987 previous match. We never print any overlaps. */
988
989 if (after_context > 0 && lastmatchnumber > 0)
990 {
991 int ellength;
992 int linecount = 0;
993 char *p = lastmatchrestart;
994
995 while (p < ptr && linecount < after_context)
996 {
997 p = end_of_line(p, ptr, &ellength);
998 linecount++;
999 }
1000
1001 /* It is important to advance lastmatchrestart during this printing so
1002 that it interacts correctly with any "before" printing below. Print
1003 each line's data using fwrite() in case there are binary zeroes. */
1004
1005 while (lastmatchrestart < p)
1006 {
1007 char *pp = lastmatchrestart;
1008 if (printname != NULL) fprintf(stdout, "%s-", printname);
1009 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1010 pp = end_of_line(pp, endptr, &ellength);
1011 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1012 lastmatchrestart = pp;
1013 }
1014 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1015 }
1016
1017 /* If there were non-contiguous lines printed above, insert hyphens. */
1018
1019 if (hyphenpending)
1020 {
1021 fprintf(stdout, "--\n");
1022 hyphenpending = FALSE;
1023 hyphenprinted = TRUE;
1024 }
1025
1026 /* See if there is a requirement to print some "before" lines for this
1027 match. Again, don't print overlaps. */
1028
1029 if (before_context > 0)
1030 {
1031 int linecount = 0;
1032 char *p = ptr;
1033
1034 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1035 linecount < before_context)
1036 {
1037 linecount++;
1038 p = previous_line(p, buffer);
1039 }
1040
1041 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1042 fprintf(stdout, "--\n");
1043
1044 while (p < ptr)
1045 {
1046 int ellength;
1047 char *pp = p;
1048 if (printname != NULL) fprintf(stdout, "%s-", printname);
1049 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1050 pp = end_of_line(pp, endptr, &ellength);
1051 fwrite(p, 1, pp - p, stdout);
1052 p = pp;
1053 }
1054 }
1055
1056 /* Now print the matching line(s); ensure we set hyphenpending at the end
1057 of the file if any context lines are being output. */
1058
1059 if (after_context > 0 || before_context > 0)
1060 endhyphenpending = TRUE;
1061
1062 if (printname != NULL) fprintf(stdout, "%s:", printname);
1063 if (number) fprintf(stdout, "%d:", linenumber);
1064
1065 /* In multiline mode, we want to print to the end of the line in which
1066 the end of the matched string is found, so we adjust linelength and the
1067 line number appropriately, but only when there actually was a match
1068 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1069 the match will always be before the first newline sequence. */
1070
1071 if (multiline)
1072 {
1073 int ellength;
1074 char *endmatch = ptr;
1075 if (!invert)
1076 {
1077 endmatch += offsets[1];
1078 t = ptr;
1079 while (t < endmatch)
1080 {
1081 t = end_of_line(t, endptr, &ellength);
1082 if (t <= endmatch) linenumber++; else break;
1083 }
1084 }
1085 endmatch = end_of_line(endmatch, endptr, &ellength);
1086 linelength = endmatch - ptr - ellength;
1087 }
1088
1089 /*** NOTE: Use only fwrite() to output the data line, so that binary
1090 zeroes are treated as just another data character. */
1091
1092 /* This extra option, for Jeffrey Friedl's debugging requirements,
1093 replaces the matched string, or a specific captured string if it exists,
1094 with X. When this happens, colouring is ignored. */
1095
1096 #ifdef JFRIEDL_DEBUG
1097 if (S_arg >= 0 && S_arg < mrc)
1098 {
1099 int first = S_arg * 2;
1100 int last = first + 1;
1101 fwrite(ptr, 1, offsets[first], stdout);
1102 fprintf(stdout, "X");
1103 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1104 }
1105 else
1106 #endif
1107
1108 /* We have to split the line(s) up if colouring. */
1109
1110 if (do_colour)
1111 {
1112 fwrite(ptr, 1, offsets[0], stdout);
1113 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1114 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1115 fprintf(stdout, "%c[00m", 0x1b);
1116 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1117 stdout);
1118 }
1119 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1120 }
1121
1122 /* End of doing what has to be done for a match */
1123
1124 rc = 0; /* Had some success */
1125
1126 /* Remember where the last match happened for after_context. We remember
1127 where we are about to restart, and that line's number. */
1128
1129 lastmatchrestart = ptr + linelength + endlinelength;
1130 lastmatchnumber = linenumber + 1;
1131 }
1132
1133 /* For a match in multiline inverted mode (which of course did not cause
1134 anything to be printed), we have to move on to the end of the match before
1135 proceeding. */
1136
1137 if (multiline && invert && match)
1138 {
1139 int ellength;
1140 char *endmatch = ptr + offsets[1];
1141 t = ptr;
1142 while (t < endmatch)
1143 {
1144 t = end_of_line(t, endptr, &ellength);
1145 if (t <= endmatch) linenumber++; else break;
1146 }
1147 endmatch = end_of_line(endmatch, endptr, &ellength);
1148 linelength = endmatch - ptr - ellength;
1149 }
1150
1151 /* Advance to after the newline and increment the line number. */
1152
1153 ptr += linelength + endlinelength;
1154 linenumber++;
1155
1156 /* If we haven't yet reached the end of the file (the buffer is full), and
1157 the current point is in the top 1/3 of the buffer, slide the buffer down by
1158 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1159 about to be lost, print them. */
1160
1161 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1162 {
1163 if (after_context > 0 &&
1164 lastmatchnumber > 0 &&
1165 lastmatchrestart < buffer + MBUFTHIRD)
1166 {
1167 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1168 lastmatchnumber = 0;
1169 }
1170
1171 /* Now do the shuffle */
1172
1173 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1174 ptr -= MBUFTHIRD;
1175 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1176 endptr = buffer + bufflength;
1177
1178 /* Adjust any last match point */
1179
1180 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1181 }
1182 } /* Loop through the whole file */
1183
1184 /* End of file; print final "after" lines if wanted; do_after_lines sets
1185 hyphenpending if it prints something. */
1186
1187 if (!only_matching && !count_only)
1188 {
1189 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1190 hyphenpending |= endhyphenpending;
1191 }
1192
1193 /* Print the file name if we are looking for those without matches and there
1194 were none. If we found a match, we won't have got this far. */
1195
1196 if (filenames == FN_NOMATCH_ONLY)
1197 {
1198 fprintf(stdout, "%s\n", printname);
1199 return 0;
1200 }
1201
1202 /* Print the match count if wanted */
1203
1204 if (count_only)
1205 {
1206 if (printname != NULL) fprintf(stdout, "%s:", printname);
1207 fprintf(stdout, "%d\n", count);
1208 }
1209
1210 return rc;
1211 }
1212
1213
1214
1215 /*************************************************
1216 * Grep a file or recurse into a directory *
1217 *************************************************/
1218
1219 /* Given a path name, if it's a directory, scan all the files if we are
1220 recursing; if it's a file, grep it.
1221
1222 Arguments:
1223 pathname the path to investigate
1224 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1225 only_one_at_top TRUE if the path is the only one at toplevel
1226
1227 Returns: 0 if there was at least one match
1228 1 if there were no matches
1229 2 there was some kind of error
1230
1231 However, file opening failures are suppressed if "silent" is set.
1232 */
1233
1234 static int
1235 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1236 {
1237 int rc = 1;
1238 int sep;
1239 FILE *in;
1240
1241 /* If the file name is "-" we scan stdin */
1242
1243 if (strcmp(pathname, "-") == 0)
1244 {
1245 return pcregrep(stdin,
1246 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1247 stdin_name : NULL);
1248 }
1249
1250
1251 /* If the file is a directory, skip if skipping or if we are recursing, scan
1252 each file within it, subject to any include or exclude patterns that were set.
1253 The scanning code is localized so it can be made system-specific. */
1254
1255 if ((sep = isdirectory(pathname)) != 0)
1256 {
1257 if (dee_action == dee_SKIP) return 1;
1258 if (dee_action == dee_RECURSE)
1259 {
1260 char buffer[1024];
1261 char *nextfile;
1262 directory_type *dir = opendirectory(pathname);
1263
1264 if (dir == NULL)
1265 {
1266 if (!silent)
1267 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1268 strerror(errno));
1269 return 2;
1270 }
1271
1272 while ((nextfile = readdirectory(dir)) != NULL)
1273 {
1274 int frc, blen;
1275 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1276 blen = strlen(buffer);
1277
1278 if (exclude_compiled != NULL &&
1279 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1280 continue;
1281
1282 if (include_compiled != NULL &&
1283 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1284 continue;
1285
1286 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1287 if (frc > 1) rc = frc;
1288 else if (frc == 0 && rc == 1) rc = 0;
1289 }
1290
1291 closedirectory(dir);
1292 return rc;
1293 }
1294 }
1295
1296 /* If the file is not a directory and not a regular file, skip it if that's
1297 been requested. */
1298
1299 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1300
1301 /* Control reaches here if we have a regular file, or if we have a directory
1302 and recursion or skipping was not requested, or if we have anything else and
1303 skipping was not requested. The scan proceeds. If this is the first and only
1304 argument at top level, we don't show the file name, unless we are only showing
1305 the file name, or the filename was forced (-H). */
1306
1307 in = fopen(pathname, "r");
1308 if (in == NULL)
1309 {
1310 if (!silent)
1311 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1312 strerror(errno));
1313 return 2;
1314 }
1315
1316 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1317 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1318
1319 fclose(in);
1320 return rc;
1321 }
1322
1323
1324
1325
1326 /*************************************************
1327 * Usage function *
1328 *************************************************/
1329
1330 static int
1331 usage(int rc)
1332 {
1333 option_item *op;
1334 fprintf(stderr, "Usage: pcregrep [-");
1335 for (op = optionlist; op->one_char != 0; op++)
1336 {
1337 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1338 }
1339 fprintf(stderr, "] [long options] [pattern] [files]\n");
1340 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1341 return rc;
1342 }
1343
1344
1345
1346
1347 /*************************************************
1348 * Help function *
1349 *************************************************/
1350
1351 static void
1352 help(void)
1353 {
1354 option_item *op;
1355
1356 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1357 printf("Search for PATTERN in each FILE or standard input.\n");
1358 printf("PATTERN must be present if neither -e nor -f is used.\n");
1359 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1360 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1361
1362 printf("Options:\n");
1363
1364 for (op = optionlist; op->one_char != 0; op++)
1365 {
1366 int n;
1367 char s[4];
1368 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1369 printf(" %s --%s%n", s, op->long_name, &n);
1370 n = 30 - n;
1371 if (n < 1) n = 1;
1372 printf("%.*s%s\n", n, " ", op->help_text);
1373 }
1374
1375 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1376 printf("trailing white space is removed and blank lines are ignored.\n");
1377 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1378
1379 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1380 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1381 }
1382
1383
1384
1385
1386 /*************************************************
1387 * Handle a single-letter, no data option *
1388 *************************************************/
1389
1390 static int
1391 handle_option(int letter, int options)
1392 {
1393 switch(letter)
1394 {
1395 case N_HELP: help(); exit(0);
1396 case 'c': count_only = TRUE; break;
1397 case 'F': process_options |= PO_FIXED_STRINGS; break;
1398 case 'H': filenames = FN_FORCE; break;
1399 case 'h': filenames = FN_NONE; break;
1400 case 'i': options |= PCRE_CASELESS; break;
1401 case 'l': filenames = FN_ONLY; break;
1402 case 'L': filenames = FN_NOMATCH_ONLY; break;
1403 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1404 case 'n': number = TRUE; break;
1405 case 'o': only_matching = TRUE; break;
1406 case 'q': quiet = TRUE; break;
1407 case 'r': dee_action = dee_RECURSE; break;
1408 case 's': silent = TRUE; break;
1409 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1410 case 'v': invert = TRUE; break;
1411 case 'w': process_options |= PO_WORD_MATCH; break;
1412 case 'x': process_options |= PO_LINE_MATCH; break;
1413
1414 case 'V':
1415 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1416 exit(0);
1417 break;
1418
1419 default:
1420 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1421 exit(usage(2));
1422 }
1423
1424 return options;
1425 }
1426
1427
1428
1429
1430 /*************************************************
1431 * Construct printed ordinal *
1432 *************************************************/
1433
1434 /* This turns a number into "1st", "3rd", etc. */
1435
1436 static char *
1437 ordin(int n)
1438 {
1439 static char buffer[8];
1440 char *p = buffer;
1441 sprintf(p, "%d", n);
1442 while (*p != 0) p++;
1443 switch (n%10)
1444 {
1445 case 1: strcpy(p, "st"); break;
1446 case 2: strcpy(p, "nd"); break;
1447 case 3: strcpy(p, "rd"); break;
1448 default: strcpy(p, "th"); break;
1449 }
1450 return buffer;
1451 }
1452
1453
1454
1455 /*************************************************
1456 * Compile a single pattern *
1457 *************************************************/
1458
1459 /* When the -F option has been used, this is called for each substring.
1460 Otherwise it's called for each supplied pattern.
1461
1462 Arguments:
1463 pattern the pattern string
1464 options the PCRE options
1465 filename the file name, or NULL for a command-line pattern
1466 count 0 if this is the only command line pattern, or
1467 number of the command line pattern, or
1468 linenumber for a pattern from a file
1469
1470 Returns: TRUE on success, FALSE after an error
1471 */
1472
1473 static BOOL
1474 compile_single_pattern(char *pattern, int options, char *filename, int count)
1475 {
1476 char buffer[MBUFTHIRD + 16];
1477 const char *error;
1478 int errptr;
1479
1480 if (pattern_count >= MAX_PATTERN_COUNT)
1481 {
1482 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1483 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1484 return FALSE;
1485 }
1486
1487 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1488 suffix[process_options]);
1489 pattern_list[pattern_count] =
1490 pcre_compile(buffer, options, &error, &errptr, pcretables);
1491 if (pattern_list[pattern_count] != NULL)
1492 {
1493 pattern_count++;
1494 return TRUE;
1495 }
1496
1497 /* Handle compile errors */
1498
1499 errptr -= (int)strlen(prefix[process_options]);
1500 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1501
1502 if (filename == NULL)
1503 {
1504 if (count == 0)
1505 fprintf(stderr, "pcregrep: Error in command-line regex "
1506 "at offset %d: %s\n", errptr, error);
1507 else
1508 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1509 "at offset %d: %s\n", ordin(count), errptr, error);
1510 }
1511 else
1512 {
1513 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1514 "at offset %d: %s\n", count, filename, errptr, error);
1515 }
1516
1517 return FALSE;
1518 }
1519
1520
1521
1522 /*************************************************
1523 * Compile one supplied pattern *
1524 *************************************************/
1525
1526 /* When the -F option has been used, each string may be a list of strings,
1527 separated by line breaks. They will be matched literally.
1528
1529 Arguments:
1530 pattern the pattern string
1531 options the PCRE options
1532 filename the file name, or NULL for a command-line pattern
1533 count 0 if this is the only command line pattern, or
1534 number of the command line pattern, or
1535 linenumber for a pattern from a file
1536
1537 Returns: TRUE on success, FALSE after an error
1538 */
1539
1540 static BOOL
1541 compile_pattern(char *pattern, int options, char *filename, int count)
1542 {
1543 if ((process_options & PO_FIXED_STRINGS) != 0)
1544 {
1545 char *eop = pattern + strlen(pattern);
1546 char buffer[MBUFTHIRD];
1547 for(;;)
1548 {
1549 int ellength;
1550 char *p = end_of_line(pattern, eop, &ellength);
1551 if (ellength == 0)
1552 return compile_single_pattern(pattern, options, filename, count);
1553 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1554 pattern = p;
1555 if (!compile_single_pattern(buffer, options, filename, count))
1556 return FALSE;
1557 }
1558 }
1559 else return compile_single_pattern(pattern, options, filename, count);
1560 }
1561
1562
1563
1564 /*************************************************
1565 * Main program *
1566 *************************************************/
1567
1568 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1569
1570 int
1571 main(int argc, char **argv)
1572 {
1573 int i, j;
1574 int rc = 1;
1575 int pcre_options = 0;
1576 int cmd_pattern_count = 0;
1577 int hint_count = 0;
1578 int errptr;
1579 BOOL only_one_at_top;
1580 char *patterns[MAX_PATTERN_COUNT];
1581 const char *locale_from = "--locale";
1582 const char *error;
1583
1584 /* Set the default line ending value from the default in the PCRE library;
1585 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1586 */
1587
1588 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1589 switch(i)
1590 {
1591 default: newline = (char *)"lf"; break;
1592 case '\r': newline = (char *)"cr"; break;
1593 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1594 case -1: newline = (char *)"any"; break;
1595 case -2: newline = (char *)"anycrlf"; break;
1596 }
1597
1598 /* Process the options */
1599
1600 for (i = 1; i < argc; i++)
1601 {
1602 option_item *op = NULL;
1603 char *option_data = (char *)""; /* default to keep compiler happy */
1604 BOOL longop;
1605 BOOL longopwasequals = FALSE;
1606
1607 if (argv[i][0] != '-') break;
1608
1609 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1610 but only if we have previously had -e or -f to define the patterns. */
1611
1612 if (argv[i][1] == 0)
1613 {
1614 if (pattern_filename != NULL || pattern_count > 0) break;
1615 else exit(usage(2));
1616 }
1617
1618 /* Handle a long name option, or -- to terminate the options */
1619
1620 if (argv[i][1] == '-')
1621 {
1622 char *arg = argv[i] + 2;
1623 char *argequals = strchr(arg, '=');
1624
1625 if (*arg == 0) /* -- terminates options */
1626 {
1627 i++;
1628 break; /* out of the options-handling loop */
1629 }
1630
1631 longop = TRUE;
1632
1633 /* Some long options have data that follows after =, for example file=name.
1634 Some options have variations in the long name spelling: specifically, we
1635 allow "regexp" because GNU grep allows it, though I personally go along
1636 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1637 These options are entered in the table as "regex(p)". No option is in both
1638 these categories, fortunately. */
1639
1640 for (op = optionlist; op->one_char != 0; op++)
1641 {
1642 char *opbra = strchr(op->long_name, '(');
1643 char *equals = strchr(op->long_name, '=');
1644 if (opbra == NULL) /* Not a (p) case */
1645 {
1646 if (equals == NULL) /* Not thing=data case */
1647 {
1648 if (strcmp(arg, op->long_name) == 0) break;
1649 }
1650 else /* Special case xxx=data */
1651 {
1652 int oplen = equals - op->long_name;
1653 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1654 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1655 {
1656 option_data = arg + arglen;
1657 if (*option_data == '=')
1658 {
1659 option_data++;
1660 longopwasequals = TRUE;
1661 }
1662 break;
1663 }
1664 }
1665 }
1666 else /* Special case xxxx(p) */
1667 {
1668 char buff1[24];
1669 char buff2[24];
1670 int baselen = opbra - op->long_name;
1671 sprintf(buff1, "%.*s", baselen, op->long_name);
1672 sprintf(buff2, "%s%.*s", buff1,
1673 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1674 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1675 break;
1676 }
1677 }
1678
1679 if (op->one_char == 0)
1680 {
1681 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1682 exit(usage(2));
1683 }
1684 }
1685
1686
1687 /* Jeffrey Friedl's debugging harness uses these additional options which
1688 are not in the right form for putting in the option table because they use
1689 only one hyphen, yet are more than one character long. By putting them
1690 separately here, they will not get displayed as part of the help() output,
1691 but I don't think Jeffrey will care about that. */
1692
1693 #ifdef JFRIEDL_DEBUG
1694 else if (strcmp(argv[i], "-pre") == 0) {
1695 jfriedl_prefix = argv[++i];
1696 continue;
1697 } else if (strcmp(argv[i], "-post") == 0) {
1698 jfriedl_postfix = argv[++i];
1699 continue;
1700 } else if (strcmp(argv[i], "-XT") == 0) {
1701 sscanf(argv[++i], "%d", &jfriedl_XT);
1702 continue;
1703 } else if (strcmp(argv[i], "-XR") == 0) {
1704 sscanf(argv[++i], "%d", &jfriedl_XR);
1705 continue;
1706 }
1707 #endif
1708
1709
1710 /* One-char options; many that have no data may be in a single argument; we
1711 continue till we hit the last one or one that needs data. */
1712
1713 else
1714 {
1715 char *s = argv[i] + 1;
1716 longop = FALSE;
1717 while (*s != 0)
1718 {
1719 for (op = optionlist; op->one_char != 0; op++)
1720 { if (*s == op->one_char) break; }
1721 if (op->one_char == 0)
1722 {
1723 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1724 *s, argv[i]);
1725 exit(usage(2));
1726 }
1727 if (op->type != OP_NODATA || s[1] == 0)
1728 {
1729 option_data = s+1;
1730 break;
1731 }
1732 pcre_options = handle_option(*s++, pcre_options);
1733 }
1734 }
1735
1736 /* At this point we should have op pointing to a matched option. If the type
1737 is NO_DATA, it means that there is no data, and the option might set
1738 something in the PCRE options. */
1739
1740 if (op->type == OP_NODATA)
1741 {
1742 pcre_options = handle_option(op->one_char, pcre_options);
1743 continue;
1744 }
1745
1746 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1747 either has a value or defaults to something. It cannot have data in a
1748 separate item. At the moment, the only such options are "colo(u)r" and
1749 Jeffrey Friedl's special -S debugging option. */
1750
1751 if (*option_data == 0 &&
1752 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1753 {
1754 switch (op->one_char)
1755 {
1756 case N_COLOUR:
1757 colour_option = (char *)"auto";
1758 break;
1759 #ifdef JFRIEDL_DEBUG
1760 case 'S':
1761 S_arg = 0;
1762 break;
1763 #endif
1764 }
1765 continue;
1766 }
1767
1768 /* Otherwise, find the data string for the option. */
1769
1770 if (*option_data == 0)
1771 {
1772 if (i >= argc - 1 || longopwasequals)
1773 {
1774 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1775 exit(usage(2));
1776 }
1777 option_data = argv[++i];
1778 }
1779
1780 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1781 multiple times to create a list of patterns. */
1782
1783 if (op->type == OP_PATLIST)
1784 {
1785 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1786 {
1787 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1788 MAX_PATTERN_COUNT);
1789 return 2;
1790 }
1791 patterns[cmd_pattern_count++] = option_data;
1792 }
1793
1794 /* Otherwise, deal with single string or numeric data values. */
1795
1796 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1797 {
1798 *((char **)op->dataptr) = option_data;
1799 }
1800 else
1801 {
1802 char *endptr;
1803 int n = strtoul(option_data, &endptr, 10);
1804 if (*endptr != 0)
1805 {
1806 if (longop)
1807 {
1808 char *equals = strchr(op->long_name, '=');
1809 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1810 equals - op->long_name;
1811 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1812 option_data, nlen, op->long_name);
1813 }
1814 else
1815 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1816 option_data, op->one_char);
1817 exit(usage(2));
1818 }
1819 *((int *)op->dataptr) = n;
1820 }
1821 }
1822
1823 /* Options have been decoded. If -C was used, its value is used as a default
1824 for -A and -B. */
1825
1826 if (both_context > 0)
1827 {
1828 if (after_context == 0) after_context = both_context;
1829 if (before_context == 0) before_context = both_context;
1830 }
1831
1832 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1833 LC_ALL environment variable is set, and if so, use it. */
1834
1835 if (locale == NULL)
1836 {
1837 locale = getenv("LC_ALL");
1838 locale_from = "LCC_ALL";
1839 }
1840
1841 if (locale == NULL)
1842 {
1843 locale = getenv("LC_CTYPE");
1844 locale_from = "LC_CTYPE";
1845 }
1846
1847 /* If a locale has been provided, set it, and generate the tables the PCRE
1848 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1849
1850 if (locale != NULL)
1851 {
1852 if (setlocale(LC_CTYPE, locale) == NULL)
1853 {
1854 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1855 locale, locale_from);
1856 return 2;
1857 }
1858 pcretables = pcre_maketables();
1859 }
1860
1861 /* Sort out colouring */
1862
1863 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1864 {
1865 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1866 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1867 else
1868 {
1869 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1870 colour_option);
1871 return 2;
1872 }
1873 if (do_colour)
1874 {
1875 char *cs = getenv("PCREGREP_COLOUR");
1876 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1877 if (cs != NULL) colour_string = cs;
1878 }
1879 }
1880
1881 /* Interpret the newline type; the default settings are Unix-like. */
1882
1883 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1884 {
1885 pcre_options |= PCRE_NEWLINE_CR;
1886 endlinetype = EL_CR;
1887 }
1888 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1889 {
1890 pcre_options |= PCRE_NEWLINE_LF;
1891 endlinetype = EL_LF;
1892 }
1893 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1894 {
1895 pcre_options |= PCRE_NEWLINE_CRLF;
1896 endlinetype = EL_CRLF;
1897 }
1898 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1899 {
1900 pcre_options |= PCRE_NEWLINE_ANY;
1901 endlinetype = EL_ANY;
1902 }
1903 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1904 {
1905 pcre_options |= PCRE_NEWLINE_ANYCRLF;
1906 endlinetype = EL_ANYCRLF;
1907 }
1908 else
1909 {
1910 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1911 return 2;
1912 }
1913
1914 /* Interpret the text values for -d and -D */
1915
1916 if (dee_option != NULL)
1917 {
1918 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1919 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1920 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1921 else
1922 {
1923 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1924 return 2;
1925 }
1926 }
1927
1928 if (DEE_option != NULL)
1929 {
1930 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1931 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1932 else
1933 {
1934 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1935 return 2;
1936 }
1937 }
1938
1939 /* Check the values for Jeffrey Friedl's debugging options. */
1940
1941 #ifdef JFRIEDL_DEBUG
1942 if (S_arg > 9)
1943 {
1944 fprintf(stderr, "pcregrep: bad value for -S option\n");
1945 return 2;
1946 }
1947 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1948 {
1949 if (jfriedl_XT == 0) jfriedl_XT = 1;
1950 if (jfriedl_XR == 0) jfriedl_XR = 1;
1951 }
1952 #endif
1953
1954 /* Get memory to store the pattern and hints lists. */
1955
1956 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1957 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1958
1959 if (pattern_list == NULL || hints_list == NULL)
1960 {
1961 fprintf(stderr, "pcregrep: malloc failed\n");
1962 goto EXIT2;
1963 }
1964
1965 /* If no patterns were provided by -e, and there is no file provided by -f,
1966 the first argument is the one and only pattern, and it must exist. */
1967
1968 if (cmd_pattern_count == 0 && pattern_filename == NULL)
1969 {
1970 if (i >= argc) return usage(2);
1971 patterns[cmd_pattern_count++] = argv[i++];
1972 }
1973
1974 /* Compile the patterns that were provided on the command line, either by
1975 multiple uses of -e or as a single unkeyed pattern. */
1976
1977 for (j = 0; j < cmd_pattern_count; j++)
1978 {
1979 if (!compile_pattern(patterns[j], pcre_options, NULL,
1980 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1981 goto EXIT2;
1982 }
1983
1984 /* Compile the regular expressions that are provided in a file. */
1985
1986 if (pattern_filename != NULL)
1987 {
1988 int linenumber = 0;
1989 FILE *f;
1990 char *filename;
1991 char buffer[MBUFTHIRD];
1992
1993 if (strcmp(pattern_filename, "-") == 0)
1994 {
1995 f = stdin;
1996 filename = stdin_name;
1997 }
1998 else
1999 {
2000 f = fopen(pattern_filename, "r");
2001 if (f == NULL)
2002 {
2003 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2004 strerror(errno));
2005 goto EXIT2;
2006 }
2007 filename = pattern_filename;
2008 }
2009
2010 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2011 {
2012 char *s = buffer + (int)strlen(buffer);
2013 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2014 *s = 0;
2015 linenumber++;
2016 if (buffer[0] == 0) continue; /* Skip blank lines */
2017 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2018 goto EXIT2;
2019 }
2020
2021 if (f != stdin) fclose(f);
2022 }
2023
2024 /* Study the regular expressions, as we will be running them many times */
2025
2026 for (j = 0; j < pattern_count; j++)
2027 {
2028 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2029 if (error != NULL)
2030 {
2031 char s[16];
2032 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2033 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2034 goto EXIT2;
2035 }
2036 hint_count++;
2037 }
2038
2039 /* If there are include or exclude patterns, compile them. */
2040
2041 if (exclude_pattern != NULL)
2042 {
2043 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2044 pcretables);
2045 if (exclude_compiled == NULL)
2046 {
2047 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2048 errptr, error);
2049 goto EXIT2;
2050 }
2051 }
2052
2053 if (include_pattern != NULL)
2054 {
2055 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2056 pcretables);
2057 if (include_compiled == NULL)
2058 {
2059 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2060 errptr, error);
2061 goto EXIT2;
2062 }
2063 }
2064
2065 /* If there are no further arguments, do the business on stdin and exit. */
2066
2067 if (i >= argc)
2068 {
2069 rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2070 goto EXIT;
2071 }
2072
2073 /* Otherwise, work through the remaining arguments as files or directories.
2074 Pass in the fact that there is only one argument at top level - this suppresses
2075 the file name if the argument is not a directory and filenames are not
2076 otherwise forced. */
2077
2078 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2079
2080 for (; i < argc; i++)
2081 {
2082 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2083 only_one_at_top);
2084 if (frc > 1) rc = frc;
2085 else if (frc == 0 && rc == 1) rc = 0;
2086 }
2087
2088 EXIT:
2089 if (pattern_list != NULL)
2090 {
2091 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2092 free(pattern_list);
2093 }
2094 if (hints_list != NULL)
2095 {
2096 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2097 free(hints_list);
2098 }
2099 return rc;
2100
2101 EXIT2:
2102 rc = 2;
2103 goto EXIT;
2104 }
2105
2106 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12