/[pcre]/code/tags/pcre-7.3/pcregrep.c
ViewVC logotype

Contents of /code/tags/pcre-7.3/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 229 - (show annotations) (download)
Tue Aug 28 13:42:43 2007 UTC (6 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 59466 byte(s)
Tag for 7.3.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include <config.h>
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #include <pcre.h>
59
60 #define FALSE 0
61 #define TRUE 1
62
63 typedef int BOOL;
64
65 #define MAX_PATTERN_COUNT 100
66
67 #if BUFSIZ > 8192
68 #define MBUFTHIRD BUFSIZ
69 #else
70 #define MBUFTHIRD 8192
71 #endif
72
73 /* Values for the "filenames" variable, which specifies options for file name
74 output. The order is important; it is assumed that a file name is wanted for
75 all values greater than FN_DEFAULT. */
76
77 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78
79 /* Actions for the -d and -D options */
80
81 enum { dee_READ, dee_SKIP, dee_RECURSE };
82 enum { DEE_READ, DEE_SKIP };
83
84 /* Actions for special processing options (flag bits) */
85
86 #define PO_WORD_MATCH 0x0001
87 #define PO_LINE_MATCH 0x0002
88 #define PO_FIXED_STRINGS 0x0004
89
90 /* Line ending types */
91
92 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93
94
95
96 /*************************************************
97 * Global variables *
98 *************************************************/
99
100 /* Jeffrey Friedl has some debugging requirements that are not part of the
101 regular code. */
102
103 #ifdef JFRIEDL_DEBUG
104 static int S_arg = -1;
105 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107 static const char *jfriedl_prefix = "";
108 static const char *jfriedl_postfix = "";
109 #endif
110
111 static int endlinetype;
112
113 static char *colour_string = (char *)"1;31";
114 static char *colour_option = NULL;
115 static char *dee_option = NULL;
116 static char *DEE_option = NULL;
117 static char *newline = NULL;
118 static char *pattern_filename = NULL;
119 static char *stdin_name = (char *)"(standard input)";
120 static char *locale = NULL;
121
122 static const unsigned char *pcretables = NULL;
123
124 static int pattern_count = 0;
125 static pcre **pattern_list = NULL;
126 static pcre_extra **hints_list = NULL;
127
128 static char *include_pattern = NULL;
129 static char *exclude_pattern = NULL;
130
131 static pcre *include_compiled = NULL;
132 static pcre *exclude_compiled = NULL;
133
134 static int after_context = 0;
135 static int before_context = 0;
136 static int both_context = 0;
137 static int dee_action = dee_READ;
138 static int DEE_action = DEE_READ;
139 static int error_count = 0;
140 static int filenames = FN_DEFAULT;
141 static int process_options = 0;
142
143 static BOOL count_only = FALSE;
144 static BOOL do_colour = FALSE;
145 static BOOL hyphenpending = FALSE;
146 static BOOL invert = FALSE;
147 static BOOL multiline = FALSE;
148 static BOOL number = FALSE;
149 static BOOL only_matching = FALSE;
150 static BOOL quiet = FALSE;
151 static BOOL silent = FALSE;
152 static BOOL utf8 = FALSE;
153
154 /* Structure for options and list of them */
155
156 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
157 OP_PATLIST };
158
159 typedef struct option_item {
160 int type;
161 int one_char;
162 void *dataptr;
163 const char *long_name;
164 const char *help_text;
165 } option_item;
166
167 /* Options without a single-letter equivalent get a negative value. This can be
168 used to identify them. */
169
170 #define N_COLOUR (-1)
171 #define N_EXCLUDE (-2)
172 #define N_HELP (-3)
173 #define N_INCLUDE (-4)
174 #define N_LABEL (-5)
175 #define N_LOCALE (-6)
176 #define N_NULL (-7)
177
178 static option_item optionlist[] = {
179 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
180 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
181 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
182 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
183 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
184 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
185 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
186 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
187 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
188 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
189 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
190 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
191 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
192 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
193 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
194 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
195 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
196 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
197 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
198 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
199 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
200 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
202 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
203 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
204 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
205 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
206 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
207 #ifdef JFRIEDL_DEBUG
208 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
209 #endif
210 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
211 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
212 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
213 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
214 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
215 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
216 { OP_NODATA, 0, NULL, NULL, NULL }
217 };
218
219 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
220 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
221 that the combination of -w and -x has the same effect as -x on its own, so we
222 can treat them as the same. */
223
224 static const char *prefix[] = {
225 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
226
227 static const char *suffix[] = {
228 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
229
230 /* UTF-8 tables - used only when the newline setting is "any". */
231
232 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233
234 const char utf8_table4[] = {
235 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
237 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
238 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
239
240
241
242 /*************************************************
243 * OS-specific functions *
244 *************************************************/
245
246 /* These functions are defined so that they can be made system specific,
247 although at present the only ones are for Unix, Win32, and for "no support". */
248
249
250 /************* Directory scanning in Unix ***********/
251
252 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253 #include <sys/types.h>
254 #include <sys/stat.h>
255 #include <dirent.h>
256
257 typedef DIR directory_type;
258
259 static int
260 isdirectory(char *filename)
261 {
262 struct stat statbuf;
263 if (stat(filename, &statbuf) < 0)
264 return 0; /* In the expectation that opening as a file will fail */
265 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
266 }
267
268 static directory_type *
269 opendirectory(char *filename)
270 {
271 return opendir(filename);
272 }
273
274 static char *
275 readdirectory(directory_type *dir)
276 {
277 for (;;)
278 {
279 struct dirent *dent = readdir(dir);
280 if (dent == NULL) return NULL;
281 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282 return dent->d_name;
283 }
284 /* Control never reaches here */
285 }
286
287 static void
288 closedirectory(directory_type *dir)
289 {
290 closedir(dir);
291 }
292
293
294 /************* Test for regular file in Unix **********/
295
296 static int
297 isregfile(char *filename)
298 {
299 struct stat statbuf;
300 if (stat(filename, &statbuf) < 0)
301 return 1; /* In the expectation that opening as a file will fail */
302 return (statbuf.st_mode & S_IFMT) == S_IFREG;
303 }
304
305
306 /************* Test stdout for being a terminal in Unix **********/
307
308 static BOOL
309 is_stdout_tty(void)
310 {
311 return isatty(fileno(stdout));
312 }
313
314
315 /************* Directory scanning in Win32 ***********/
316
317 /* I (Philip Hazel) have no means of testing this code. It was contributed by
318 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
319 when it did not exist. */
320
321
322 #elif HAVE_WINDOWS_H
323
324 #ifndef STRICT
325 # define STRICT
326 #endif
327 #ifndef WIN32_LEAN_AND_MEAN
328 # define WIN32_LEAN_AND_MEAN
329 #endif
330 #ifndef INVALID_FILE_ATTRIBUTES
331 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
332 #endif
333
334 #include <windows.h>
335
336 typedef struct directory_type
337 {
338 HANDLE handle;
339 BOOL first;
340 WIN32_FIND_DATA data;
341 } directory_type;
342
343 int
344 isdirectory(char *filename)
345 {
346 DWORD attr = GetFileAttributes(filename);
347 if (attr == INVALID_FILE_ATTRIBUTES)
348 return 0;
349 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
350 }
351
352 directory_type *
353 opendirectory(char *filename)
354 {
355 size_t len;
356 char *pattern;
357 directory_type *dir;
358 DWORD err;
359 len = strlen(filename);
360 pattern = (char *) malloc(len + 3);
361 dir = (directory_type *) malloc(sizeof(*dir));
362 if ((pattern == NULL) || (dir == NULL))
363 {
364 fprintf(stderr, "pcregrep: malloc failed\n");
365 exit(2);
366 }
367 memcpy(pattern, filename, len);
368 memcpy(&(pattern[len]), "\\*", 3);
369 dir->handle = FindFirstFile(pattern, &(dir->data));
370 if (dir->handle != INVALID_HANDLE_VALUE)
371 {
372 free(pattern);
373 dir->first = TRUE;
374 return dir;
375 }
376 err = GetLastError();
377 free(pattern);
378 free(dir);
379 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
380 return NULL;
381 }
382
383 char *
384 readdirectory(directory_type *dir)
385 {
386 for (;;)
387 {
388 if (!dir->first)
389 {
390 if (!FindNextFile(dir->handle, &(dir->data)))
391 return NULL;
392 }
393 else
394 {
395 dir->first = FALSE;
396 }
397 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
398 return dir->data.cFileName;
399 }
400 #ifndef _MSC_VER
401 return NULL; /* Keep compiler happy; never executed */
402 #endif
403 }
404
405 void
406 closedirectory(directory_type *dir)
407 {
408 FindClose(dir->handle);
409 free(dir);
410 }
411
412
413 /************* Test for regular file in Win32 **********/
414
415 /* I don't know how to do this, or if it can be done; assume all paths are
416 regular if they are not directories. */
417
418 int isregfile(char *filename)
419 {
420 return !isdirectory(filename)
421 }
422
423
424 /************* Test stdout for being a terminal in Win32 **********/
425
426 /* I don't know how to do this; assume never */
427
428 static BOOL
429 is_stdout_tty(void)
430 {
431 FALSE;
432 }
433
434
435 /************* Directory scanning when we can't do it ***********/
436
437 /* The type is void, and apart from isdirectory(), the functions do nothing. */
438
439 #else
440
441 typedef void directory_type;
442
443 int isdirectory(char *filename) { return 0; }
444 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445 char *readdirectory(directory_type *dir) { return (char*)0;}
446 void closedirectory(directory_type *dir) {}
447
448
449 /************* Test for regular when we can't do it **********/
450
451 /* Assume all files are regular. */
452
453 int isregfile(char *filename) { return 1; }
454
455
456 /************* Test stdout for being a terminal when we can't do it **********/
457
458 static BOOL
459 is_stdout_tty(void)
460 {
461 return FALSE;
462 }
463
464
465 #endif
466
467
468
469 #ifndef HAVE_STRERROR
470 /*************************************************
471 * Provide strerror() for non-ANSI libraries *
472 *************************************************/
473
474 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
475 in their libraries, but can provide the same facility by this simple
476 alternative function. */
477
478 extern int sys_nerr;
479 extern char *sys_errlist[];
480
481 char *
482 strerror(int n)
483 {
484 if (n < 0 || n >= sys_nerr) return "unknown error number";
485 return sys_errlist[n];
486 }
487 #endif /* HAVE_STRERROR */
488
489
490
491 /*************************************************
492 * Find end of line *
493 *************************************************/
494
495 /* The length of the endline sequence that is found is set via lenptr. This may
496 be zero at the very end of the file if there is no line-ending sequence there.
497
498 Arguments:
499 p current position in line
500 endptr end of available data
501 lenptr where to put the length of the eol sequence
502
503 Returns: pointer to the last byte of the line
504 */
505
506 static char *
507 end_of_line(char *p, char *endptr, int *lenptr)
508 {
509 switch(endlinetype)
510 {
511 default: /* Just in case */
512 case EL_LF:
513 while (p < endptr && *p != '\n') p++;
514 if (p < endptr)
515 {
516 *lenptr = 1;
517 return p + 1;
518 }
519 *lenptr = 0;
520 return endptr;
521
522 case EL_CR:
523 while (p < endptr && *p != '\r') p++;
524 if (p < endptr)
525 {
526 *lenptr = 1;
527 return p + 1;
528 }
529 *lenptr = 0;
530 return endptr;
531
532 case EL_CRLF:
533 for (;;)
534 {
535 while (p < endptr && *p != '\r') p++;
536 if (++p >= endptr)
537 {
538 *lenptr = 0;
539 return endptr;
540 }
541 if (*p == '\n')
542 {
543 *lenptr = 2;
544 return p + 1;
545 }
546 }
547 break;
548
549 case EL_ANYCRLF:
550 while (p < endptr)
551 {
552 int extra = 0;
553 register int c = *((unsigned char *)p);
554
555 if (utf8 && c >= 0xc0)
556 {
557 int gcii, gcss;
558 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
559 gcss = 6*extra;
560 c = (c & utf8_table3[extra]) << gcss;
561 for (gcii = 1; gcii <= extra; gcii++)
562 {
563 gcss -= 6;
564 c |= (p[gcii] & 0x3f) << gcss;
565 }
566 }
567
568 p += 1 + extra;
569
570 switch (c)
571 {
572 case 0x0a: /* LF */
573 *lenptr = 1;
574 return p;
575
576 case 0x0d: /* CR */
577 if (p < endptr && *p == 0x0a)
578 {
579 *lenptr = 2;
580 p++;
581 }
582 else *lenptr = 1;
583 return p;
584
585 default:
586 break;
587 }
588 } /* End of loop for ANYCRLF case */
589
590 *lenptr = 0; /* Must have hit the end */
591 return endptr;
592
593 case EL_ANY:
594 while (p < endptr)
595 {
596 int extra = 0;
597 register int c = *((unsigned char *)p);
598
599 if (utf8 && c >= 0xc0)
600 {
601 int gcii, gcss;
602 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
603 gcss = 6*extra;
604 c = (c & utf8_table3[extra]) << gcss;
605 for (gcii = 1; gcii <= extra; gcii++)
606 {
607 gcss -= 6;
608 c |= (p[gcii] & 0x3f) << gcss;
609 }
610 }
611
612 p += 1 + extra;
613
614 switch (c)
615 {
616 case 0x0a: /* LF */
617 case 0x0b: /* VT */
618 case 0x0c: /* FF */
619 *lenptr = 1;
620 return p;
621
622 case 0x0d: /* CR */
623 if (p < endptr && *p == 0x0a)
624 {
625 *lenptr = 2;
626 p++;
627 }
628 else *lenptr = 1;
629 return p;
630
631 case 0x85: /* NEL */
632 *lenptr = utf8? 2 : 1;
633 return p;
634
635 case 0x2028: /* LS */
636 case 0x2029: /* PS */
637 *lenptr = 3;
638 return p;
639
640 default:
641 break;
642 }
643 } /* End of loop for ANY case */
644
645 *lenptr = 0; /* Must have hit the end */
646 return endptr;
647 } /* End of overall switch */
648 }
649
650
651
652 /*************************************************
653 * Find start of previous line *
654 *************************************************/
655
656 /* This is called when looking back for before lines to print.
657
658 Arguments:
659 p start of the subsequent line
660 startptr start of available data
661
662 Returns: pointer to the start of the previous line
663 */
664
665 static char *
666 previous_line(char *p, char *startptr)
667 {
668 switch(endlinetype)
669 {
670 default: /* Just in case */
671 case EL_LF:
672 p--;
673 while (p > startptr && p[-1] != '\n') p--;
674 return p;
675
676 case EL_CR:
677 p--;
678 while (p > startptr && p[-1] != '\n') p--;
679 return p;
680
681 case EL_CRLF:
682 for (;;)
683 {
684 p -= 2;
685 while (p > startptr && p[-1] != '\n') p--;
686 if (p <= startptr + 1 || p[-2] == '\r') return p;
687 }
688 return p; /* But control should never get here */
689
690 case EL_ANY:
691 case EL_ANYCRLF:
692 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693 if (utf8) while ((*p & 0xc0) == 0x80) p--;
694
695 while (p > startptr)
696 {
697 register int c;
698 char *pp = p - 1;
699
700 if (utf8)
701 {
702 int extra = 0;
703 while ((*pp & 0xc0) == 0x80) pp--;
704 c = *((unsigned char *)pp);
705 if (c >= 0xc0)
706 {
707 int gcii, gcss;
708 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
709 gcss = 6*extra;
710 c = (c & utf8_table3[extra]) << gcss;
711 for (gcii = 1; gcii <= extra; gcii++)
712 {
713 gcss -= 6;
714 c |= (pp[gcii] & 0x3f) << gcss;
715 }
716 }
717 }
718 else c = *((unsigned char *)pp);
719
720 if (endlinetype == EL_ANYCRLF) switch (c)
721 {
722 case 0x0a: /* LF */
723 case 0x0d: /* CR */
724 return p;
725
726 default:
727 break;
728 }
729
730 else switch (c)
731 {
732 case 0x0a: /* LF */
733 case 0x0b: /* VT */
734 case 0x0c: /* FF */
735 case 0x0d: /* CR */
736 case 0x85: /* NEL */
737 case 0x2028: /* LS */
738 case 0x2029: /* PS */
739 return p;
740
741 default:
742 break;
743 }
744
745 p = pp; /* Back one character */
746 } /* End of loop for ANY case */
747
748 return startptr; /* Hit start of data */
749 } /* End of overall switch */
750 }
751
752
753
754
755
756 /*************************************************
757 * Print the previous "after" lines *
758 *************************************************/
759
760 /* This is called if we are about to lose said lines because of buffer filling,
761 and at the end of the file. The data in the line is written using fwrite() so
762 that a binary zero does not terminate it.
763
764 Arguments:
765 lastmatchnumber the number of the last matching line, plus one
766 lastmatchrestart where we restarted after the last match
767 endptr end of available data
768 printname filename for printing
769
770 Returns: nothing
771 */
772
773 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
774 char *endptr, char *printname)
775 {
776 if (after_context > 0 && lastmatchnumber > 0)
777 {
778 int count = 0;
779 while (lastmatchrestart < endptr && count++ < after_context)
780 {
781 int ellength;
782 char *pp = lastmatchrestart;
783 if (printname != NULL) fprintf(stdout, "%s-", printname);
784 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
785 pp = end_of_line(pp, endptr, &ellength);
786 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
787 lastmatchrestart = pp;
788 }
789 hyphenpending = TRUE;
790 }
791 }
792
793
794
795 /*************************************************
796 * Grep an individual file *
797 *************************************************/
798
799 /* This is called from grep_or_recurse() below. It uses a buffer that is three
800 times the value of MBUFTHIRD. The matching point is never allowed to stray into
801 the top third of the buffer, thus keeping more of the file available for
802 context printing or for multiline scanning. For large files, the pointer will
803 be in the middle third most of the time, so the bottom third is available for
804 "before" context printing.
805
806 Arguments:
807 in the fopened FILE stream
808 printname the file name if it is to be printed for each match
809 or NULL if the file name is not to be printed
810 it cannot be NULL if filenames[_nomatch]_only is set
811
812 Returns: 0 if there was at least one match
813 1 otherwise (no matches)
814 */
815
816 static int
817 pcregrep(FILE *in, char *printname)
818 {
819 int rc = 1;
820 int linenumber = 1;
821 int lastmatchnumber = 0;
822 int count = 0;
823 int offsets[99];
824 char *lastmatchrestart = NULL;
825 char buffer[3*MBUFTHIRD];
826 char *ptr = buffer;
827 char *endptr;
828 size_t bufflength;
829 BOOL endhyphenpending = FALSE;
830
831 /* Do the first read into the start of the buffer and set up the pointer to
832 end of what we have. */
833
834 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
835 endptr = buffer + bufflength;
836
837 /* Loop while the current pointer is not at the end of the file. For large
838 files, endptr will be at the end of the buffer when we are in the middle of the
839 file, but ptr will never get there, because as soon as it gets over 2/3 of the
840 way, the buffer is shifted left and re-filled. */
841
842 while (ptr < endptr)
843 {
844 int i, endlinelength;
845 int mrc = 0;
846 BOOL match = FALSE;
847 char *t = ptr;
848 size_t length, linelength;
849
850 /* At this point, ptr is at the start of a line. We need to find the length
851 of the subject string to pass to pcre_exec(). In multiline mode, it is the
852 length remainder of the data in the buffer. Otherwise, it is the length of
853 the next line. After matching, we always advance by the length of the next
854 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
855 that any match is constrained to be in the first line. */
856
857 t = end_of_line(t, endptr, &endlinelength);
858 linelength = t - ptr - endlinelength;
859 length = multiline? (size_t)(endptr - ptr) : linelength;
860
861 /* Extra processing for Jeffrey Friedl's debugging. */
862
863 #ifdef JFRIEDL_DEBUG
864 if (jfriedl_XT || jfriedl_XR)
865 {
866 #include <sys/time.h>
867 #include <time.h>
868 struct timeval start_time, end_time;
869 struct timezone dummy;
870
871 if (jfriedl_XT)
872 {
873 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
874 const char *orig = ptr;
875 ptr = malloc(newlen + 1);
876 if (!ptr) {
877 printf("out of memory");
878 exit(2);
879 }
880 endptr = ptr;
881 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
882 for (i = 0; i < jfriedl_XT; i++) {
883 strncpy(endptr, orig, length);
884 endptr += length;
885 }
886 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
887 length = newlen;
888 }
889
890 if (gettimeofday(&start_time, &dummy) != 0)
891 perror("bad gettimeofday");
892
893
894 for (i = 0; i < jfriedl_XR; i++)
895 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
896
897 if (gettimeofday(&end_time, &dummy) != 0)
898 perror("bad gettimeofday");
899
900 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
901 -
902 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
903
904 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
905 return 0;
906 }
907 #endif
908
909
910 /* Run through all the patterns until one matches. Note that we don't include
911 the final newline in the subject string. */
912
913 for (i = 0; i < pattern_count; i++)
914 {
915 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
916 offsets, 99);
917 if (mrc >= 0) { match = TRUE; break; }
918 if (mrc != PCRE_ERROR_NOMATCH)
919 {
920 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
921 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
922 fprintf(stderr, "this line:\n");
923 fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
924 fprintf(stderr, "\n");
925 if (error_count == 0 &&
926 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
927 {
928 fprintf(stderr, "pcregrep: error %d means that a resource limit "
929 "was exceeded\n", mrc);
930 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
931 }
932 if (error_count++ > 20)
933 {
934 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
935 exit(2);
936 }
937 match = invert; /* No more matching; don't show the line again */
938 break;
939 }
940 }
941
942 /* If it's a match or a not-match (as required), do what's wanted. */
943
944 if (match != invert)
945 {
946 BOOL hyphenprinted = FALSE;
947
948 /* We've failed if we want a file that doesn't have any matches. */
949
950 if (filenames == FN_NOMATCH_ONLY) return 1;
951
952 /* Just count if just counting is wanted. */
953
954 if (count_only) count++;
955
956 /* If all we want is a file name, there is no need to scan any more lines
957 in the file. */
958
959 else if (filenames == FN_ONLY)
960 {
961 fprintf(stdout, "%s\n", printname);
962 return 0;
963 }
964
965 /* Likewise, if all we want is a yes/no answer. */
966
967 else if (quiet) return 0;
968
969 /* The --only-matching option prints just the substring that matched, and
970 does not pring any context. */
971
972 else if (only_matching)
973 {
974 if (printname != NULL) fprintf(stdout, "%s:", printname);
975 if (number) fprintf(stdout, "%d:", linenumber);
976 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
977 fprintf(stdout, "\n");
978 }
979
980 /* This is the default case when none of the above options is set. We print
981 the matching lines(s), possibly preceded and/or followed by other lines of
982 context. */
983
984 else
985 {
986 /* See if there is a requirement to print some "after" lines from a
987 previous match. We never print any overlaps. */
988
989 if (after_context > 0 && lastmatchnumber > 0)
990 {
991 int ellength;
992 int linecount = 0;
993 char *p = lastmatchrestart;
994
995 while (p < ptr && linecount < after_context)
996 {
997 p = end_of_line(p, ptr, &ellength);
998 linecount++;
999 }
1000
1001 /* It is important to advance lastmatchrestart during this printing so
1002 that it interacts correctly with any "before" printing below. Print
1003 each line's data using fwrite() in case there are binary zeroes. */
1004
1005 while (lastmatchrestart < p)
1006 {
1007 char *pp = lastmatchrestart;
1008 if (printname != NULL) fprintf(stdout, "%s-", printname);
1009 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1010 pp = end_of_line(pp, endptr, &ellength);
1011 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1012 lastmatchrestart = pp;
1013 }
1014 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1015 }
1016
1017 /* If there were non-contiguous lines printed above, insert hyphens. */
1018
1019 if (hyphenpending)
1020 {
1021 fprintf(stdout, "--\n");
1022 hyphenpending = FALSE;
1023 hyphenprinted = TRUE;
1024 }
1025
1026 /* See if there is a requirement to print some "before" lines for this
1027 match. Again, don't print overlaps. */
1028
1029 if (before_context > 0)
1030 {
1031 int linecount = 0;
1032 char *p = ptr;
1033
1034 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1035 linecount < before_context)
1036 {
1037 linecount++;
1038 p = previous_line(p, buffer);
1039 }
1040
1041 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1042 fprintf(stdout, "--\n");
1043
1044 while (p < ptr)
1045 {
1046 int ellength;
1047 char *pp = p;
1048 if (printname != NULL) fprintf(stdout, "%s-", printname);
1049 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1050 pp = end_of_line(pp, endptr, &ellength);
1051 fwrite(p, 1, pp - p, stdout);
1052 p = pp;
1053 }
1054 }
1055
1056 /* Now print the matching line(s); ensure we set hyphenpending at the end
1057 of the file if any context lines are being output. */
1058
1059 if (after_context > 0 || before_context > 0)
1060 endhyphenpending = TRUE;
1061
1062 if (printname != NULL) fprintf(stdout, "%s:", printname);
1063 if (number) fprintf(stdout, "%d:", linenumber);
1064
1065 /* In multiline mode, we want to print to the end of the line in which
1066 the end of the matched string is found, so we adjust linelength and the
1067 line number appropriately, but only when there actually was a match
1068 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1069 the match will always be before the first newline sequence. */
1070
1071 if (multiline)
1072 {
1073 int ellength;
1074 char *endmatch = ptr;
1075 if (!invert)
1076 {
1077 endmatch += offsets[1];
1078 t = ptr;
1079 while (t < endmatch)
1080 {
1081 t = end_of_line(t, endptr, &ellength);
1082 if (t <= endmatch) linenumber++; else break;
1083 }
1084 }
1085 endmatch = end_of_line(endmatch, endptr, &ellength);
1086 linelength = endmatch - ptr - ellength;
1087 }
1088
1089 /*** NOTE: Use only fwrite() to output the data line, so that binary
1090 zeroes are treated as just another data character. */
1091
1092 /* This extra option, for Jeffrey Friedl's debugging requirements,
1093 replaces the matched string, or a specific captured string if it exists,
1094 with X. When this happens, colouring is ignored. */
1095
1096 #ifdef JFRIEDL_DEBUG
1097 if (S_arg >= 0 && S_arg < mrc)
1098 {
1099 int first = S_arg * 2;
1100 int last = first + 1;
1101 fwrite(ptr, 1, offsets[first], stdout);
1102 fprintf(stdout, "X");
1103 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1104 }
1105 else
1106 #endif
1107
1108 /* We have to split the line(s) up if colouring. */
1109
1110 if (do_colour)
1111 {
1112 fwrite(ptr, 1, offsets[0], stdout);
1113 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1114 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1115 fprintf(stdout, "%c[00m", 0x1b);
1116 fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1117 }
1118 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1119 }
1120
1121 /* End of doing what has to be done for a match */
1122
1123 rc = 0; /* Had some success */
1124
1125 /* Remember where the last match happened for after_context. We remember
1126 where we are about to restart, and that line's number. */
1127
1128 lastmatchrestart = ptr + linelength + endlinelength;
1129 lastmatchnumber = linenumber + 1;
1130 }
1131
1132 /* For a match in multiline inverted mode (which of course did not cause
1133 anything to be printed), we have to move on to the end of the match before
1134 proceeding. */
1135
1136 if (multiline && invert && match)
1137 {
1138 int ellength;
1139 char *endmatch = ptr + offsets[1];
1140 t = ptr;
1141 while (t < endmatch)
1142 {
1143 t = end_of_line(t, endptr, &ellength);
1144 if (t <= endmatch) linenumber++; else break;
1145 }
1146 endmatch = end_of_line(endmatch, endptr, &ellength);
1147 linelength = endmatch - ptr - ellength;
1148 }
1149
1150 /* Advance to after the newline and increment the line number. */
1151
1152 ptr += linelength + endlinelength;
1153 linenumber++;
1154
1155 /* If we haven't yet reached the end of the file (the buffer is full), and
1156 the current point is in the top 1/3 of the buffer, slide the buffer down by
1157 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1158 about to be lost, print them. */
1159
1160 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1161 {
1162 if (after_context > 0 &&
1163 lastmatchnumber > 0 &&
1164 lastmatchrestart < buffer + MBUFTHIRD)
1165 {
1166 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1167 lastmatchnumber = 0;
1168 }
1169
1170 /* Now do the shuffle */
1171
1172 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1173 ptr -= MBUFTHIRD;
1174 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1175 endptr = buffer + bufflength;
1176
1177 /* Adjust any last match point */
1178
1179 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1180 }
1181 } /* Loop through the whole file */
1182
1183 /* End of file; print final "after" lines if wanted; do_after_lines sets
1184 hyphenpending if it prints something. */
1185
1186 if (!only_matching && !count_only)
1187 {
1188 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1189 hyphenpending |= endhyphenpending;
1190 }
1191
1192 /* Print the file name if we are looking for those without matches and there
1193 were none. If we found a match, we won't have got this far. */
1194
1195 if (filenames == FN_NOMATCH_ONLY)
1196 {
1197 fprintf(stdout, "%s\n", printname);
1198 return 0;
1199 }
1200
1201 /* Print the match count if wanted */
1202
1203 if (count_only)
1204 {
1205 if (printname != NULL) fprintf(stdout, "%s:", printname);
1206 fprintf(stdout, "%d\n", count);
1207 }
1208
1209 return rc;
1210 }
1211
1212
1213
1214 /*************************************************
1215 * Grep a file or recurse into a directory *
1216 *************************************************/
1217
1218 /* Given a path name, if it's a directory, scan all the files if we are
1219 recursing; if it's a file, grep it.
1220
1221 Arguments:
1222 pathname the path to investigate
1223 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1224 only_one_at_top TRUE if the path is the only one at toplevel
1225
1226 Returns: 0 if there was at least one match
1227 1 if there were no matches
1228 2 there was some kind of error
1229
1230 However, file opening failures are suppressed if "silent" is set.
1231 */
1232
1233 static int
1234 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1235 {
1236 int rc = 1;
1237 int sep;
1238 FILE *in;
1239
1240 /* If the file name is "-" we scan stdin */
1241
1242 if (strcmp(pathname, "-") == 0)
1243 {
1244 return pcregrep(stdin,
1245 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1246 stdin_name : NULL);
1247 }
1248
1249
1250 /* If the file is a directory, skip if skipping or if we are recursing, scan
1251 each file within it, subject to any include or exclude patterns that were set.
1252 The scanning code is localized so it can be made system-specific. */
1253
1254 if ((sep = isdirectory(pathname)) != 0)
1255 {
1256 if (dee_action == dee_SKIP) return 1;
1257 if (dee_action == dee_RECURSE)
1258 {
1259 char buffer[1024];
1260 char *nextfile;
1261 directory_type *dir = opendirectory(pathname);
1262
1263 if (dir == NULL)
1264 {
1265 if (!silent)
1266 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1267 strerror(errno));
1268 return 2;
1269 }
1270
1271 while ((nextfile = readdirectory(dir)) != NULL)
1272 {
1273 int frc, blen;
1274 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1275 blen = strlen(buffer);
1276
1277 if (exclude_compiled != NULL &&
1278 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1279 continue;
1280
1281 if (include_compiled != NULL &&
1282 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1283 continue;
1284
1285 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1286 if (frc > 1) rc = frc;
1287 else if (frc == 0 && rc == 1) rc = 0;
1288 }
1289
1290 closedirectory(dir);
1291 return rc;
1292 }
1293 }
1294
1295 /* If the file is not a directory and not a regular file, skip it if that's
1296 been requested. */
1297
1298 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1299
1300 /* Control reaches here if we have a regular file, or if we have a directory
1301 and recursion or skipping was not requested, or if we have anything else and
1302 skipping was not requested. The scan proceeds. If this is the first and only
1303 argument at top level, we don't show the file name, unless we are only showing
1304 the file name, or the filename was forced (-H). */
1305
1306 in = fopen(pathname, "r");
1307 if (in == NULL)
1308 {
1309 if (!silent)
1310 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1311 strerror(errno));
1312 return 2;
1313 }
1314
1315 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1316 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1317
1318 fclose(in);
1319 return rc;
1320 }
1321
1322
1323
1324
1325 /*************************************************
1326 * Usage function *
1327 *************************************************/
1328
1329 static int
1330 usage(int rc)
1331 {
1332 option_item *op;
1333 fprintf(stderr, "Usage: pcregrep [-");
1334 for (op = optionlist; op->one_char != 0; op++)
1335 {
1336 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1337 }
1338 fprintf(stderr, "] [long options] [pattern] [files]\n");
1339 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1340 return rc;
1341 }
1342
1343
1344
1345
1346 /*************************************************
1347 * Help function *
1348 *************************************************/
1349
1350 static void
1351 help(void)
1352 {
1353 option_item *op;
1354
1355 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1356 printf("Search for PATTERN in each FILE or standard input.\n");
1357 printf("PATTERN must be present if neither -e nor -f is used.\n");
1358 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1359 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1360
1361 printf("Options:\n");
1362
1363 for (op = optionlist; op->one_char != 0; op++)
1364 {
1365 int n;
1366 char s[4];
1367 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1368 printf(" %s --%s%n", s, op->long_name, &n);
1369 n = 30 - n;
1370 if (n < 1) n = 1;
1371 printf("%.*s%s\n", n, " ", op->help_text);
1372 }
1373
1374 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1375 printf("trailing white space is removed and blank lines are ignored.\n");
1376 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1377
1378 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1379 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1380 }
1381
1382
1383
1384
1385 /*************************************************
1386 * Handle a single-letter, no data option *
1387 *************************************************/
1388
1389 static int
1390 handle_option(int letter, int options)
1391 {
1392 switch(letter)
1393 {
1394 case N_HELP: help(); exit(0);
1395 case 'c': count_only = TRUE; break;
1396 case 'F': process_options |= PO_FIXED_STRINGS; break;
1397 case 'H': filenames = FN_FORCE; break;
1398 case 'h': filenames = FN_NONE; break;
1399 case 'i': options |= PCRE_CASELESS; break;
1400 case 'l': filenames = FN_ONLY; break;
1401 case 'L': filenames = FN_NOMATCH_ONLY; break;
1402 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1403 case 'n': number = TRUE; break;
1404 case 'o': only_matching = TRUE; break;
1405 case 'q': quiet = TRUE; break;
1406 case 'r': dee_action = dee_RECURSE; break;
1407 case 's': silent = TRUE; break;
1408 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1409 case 'v': invert = TRUE; break;
1410 case 'w': process_options |= PO_WORD_MATCH; break;
1411 case 'x': process_options |= PO_LINE_MATCH; break;
1412
1413 case 'V':
1414 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1415 exit(0);
1416 break;
1417
1418 default:
1419 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1420 exit(usage(2));
1421 }
1422
1423 return options;
1424 }
1425
1426
1427
1428
1429 /*************************************************
1430 * Construct printed ordinal *
1431 *************************************************/
1432
1433 /* This turns a number into "1st", "3rd", etc. */
1434
1435 static char *
1436 ordin(int n)
1437 {
1438 static char buffer[8];
1439 char *p = buffer;
1440 sprintf(p, "%d", n);
1441 while (*p != 0) p++;
1442 switch (n%10)
1443 {
1444 case 1: strcpy(p, "st"); break;
1445 case 2: strcpy(p, "nd"); break;
1446 case 3: strcpy(p, "rd"); break;
1447 default: strcpy(p, "th"); break;
1448 }
1449 return buffer;
1450 }
1451
1452
1453
1454 /*************************************************
1455 * Compile a single pattern *
1456 *************************************************/
1457
1458 /* When the -F option has been used, this is called for each substring.
1459 Otherwise it's called for each supplied pattern.
1460
1461 Arguments:
1462 pattern the pattern string
1463 options the PCRE options
1464 filename the file name, or NULL for a command-line pattern
1465 count 0 if this is the only command line pattern, or
1466 number of the command line pattern, or
1467 linenumber for a pattern from a file
1468
1469 Returns: TRUE on success, FALSE after an error
1470 */
1471
1472 static BOOL
1473 compile_single_pattern(char *pattern, int options, char *filename, int count)
1474 {
1475 char buffer[MBUFTHIRD + 16];
1476 const char *error;
1477 int errptr;
1478
1479 if (pattern_count >= MAX_PATTERN_COUNT)
1480 {
1481 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1482 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1483 return FALSE;
1484 }
1485
1486 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1487 suffix[process_options]);
1488 pattern_list[pattern_count] =
1489 pcre_compile(buffer, options, &error, &errptr, pcretables);
1490 if (pattern_list[pattern_count] != NULL)
1491 {
1492 pattern_count++;
1493 return TRUE;
1494 }
1495
1496 /* Handle compile errors */
1497
1498 errptr -= (int)strlen(prefix[process_options]);
1499 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1500
1501 if (filename == NULL)
1502 {
1503 if (count == 0)
1504 fprintf(stderr, "pcregrep: Error in command-line regex "
1505 "at offset %d: %s\n", errptr, error);
1506 else
1507 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1508 "at offset %d: %s\n", ordin(count), errptr, error);
1509 }
1510 else
1511 {
1512 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1513 "at offset %d: %s\n", count, filename, errptr, error);
1514 }
1515
1516 return FALSE;
1517 }
1518
1519
1520
1521 /*************************************************
1522 * Compile one supplied pattern *
1523 *************************************************/
1524
1525 /* When the -F option has been used, each string may be a list of strings,
1526 separated by line breaks. They will be matched literally.
1527
1528 Arguments:
1529 pattern the pattern string
1530 options the PCRE options
1531 filename the file name, or NULL for a command-line pattern
1532 count 0 if this is the only command line pattern, or
1533 number of the command line pattern, or
1534 linenumber for a pattern from a file
1535
1536 Returns: TRUE on success, FALSE after an error
1537 */
1538
1539 static BOOL
1540 compile_pattern(char *pattern, int options, char *filename, int count)
1541 {
1542 if ((process_options & PO_FIXED_STRINGS) != 0)
1543 {
1544 char *eop = pattern + strlen(pattern);
1545 char buffer[MBUFTHIRD];
1546 for(;;)
1547 {
1548 int ellength;
1549 char *p = end_of_line(pattern, eop, &ellength);
1550 if (ellength == 0)
1551 return compile_single_pattern(pattern, options, filename, count);
1552 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1553 pattern = p;
1554 if (!compile_single_pattern(buffer, options, filename, count))
1555 return FALSE;
1556 }
1557 }
1558 else return compile_single_pattern(pattern, options, filename, count);
1559 }
1560
1561
1562
1563 /*************************************************
1564 * Main program *
1565 *************************************************/
1566
1567 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1568
1569 int
1570 main(int argc, char **argv)
1571 {
1572 int i, j;
1573 int rc = 1;
1574 int pcre_options = 0;
1575 int cmd_pattern_count = 0;
1576 int hint_count = 0;
1577 int errptr;
1578 BOOL only_one_at_top;
1579 char *patterns[MAX_PATTERN_COUNT];
1580 const char *locale_from = "--locale";
1581 const char *error;
1582
1583 /* Set the default line ending value from the default in the PCRE library;
1584 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1585 */
1586
1587 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1588 switch(i)
1589 {
1590 default: newline = (char *)"lf"; break;
1591 case '\r': newline = (char *)"cr"; break;
1592 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1593 case -1: newline = (char *)"any"; break;
1594 case -2: newline = (char *)"anycrlf"; break;
1595 }
1596
1597 /* Process the options */
1598
1599 for (i = 1; i < argc; i++)
1600 {
1601 option_item *op = NULL;
1602 char *option_data = (char *)""; /* default to keep compiler happy */
1603 BOOL longop;
1604 BOOL longopwasequals = FALSE;
1605
1606 if (argv[i][0] != '-') break;
1607
1608 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1609 but only if we have previously had -e or -f to define the patterns. */
1610
1611 if (argv[i][1] == 0)
1612 {
1613 if (pattern_filename != NULL || pattern_count > 0) break;
1614 else exit(usage(2));
1615 }
1616
1617 /* Handle a long name option, or -- to terminate the options */
1618
1619 if (argv[i][1] == '-')
1620 {
1621 char *arg = argv[i] + 2;
1622 char *argequals = strchr(arg, '=');
1623
1624 if (*arg == 0) /* -- terminates options */
1625 {
1626 i++;
1627 break; /* out of the options-handling loop */
1628 }
1629
1630 longop = TRUE;
1631
1632 /* Some long options have data that follows after =, for example file=name.
1633 Some options have variations in the long name spelling: specifically, we
1634 allow "regexp" because GNU grep allows it, though I personally go along
1635 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1636 These options are entered in the table as "regex(p)". No option is in both
1637 these categories, fortunately. */
1638
1639 for (op = optionlist; op->one_char != 0; op++)
1640 {
1641 char *opbra = strchr(op->long_name, '(');
1642 char *equals = strchr(op->long_name, '=');
1643 if (opbra == NULL) /* Not a (p) case */
1644 {
1645 if (equals == NULL) /* Not thing=data case */
1646 {
1647 if (strcmp(arg, op->long_name) == 0) break;
1648 }
1649 else /* Special case xxx=data */
1650 {
1651 int oplen = equals - op->long_name;
1652 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1653 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1654 {
1655 option_data = arg + arglen;
1656 if (*option_data == '=')
1657 {
1658 option_data++;
1659 longopwasequals = TRUE;
1660 }
1661 break;
1662 }
1663 }
1664 }
1665 else /* Special case xxxx(p) */
1666 {
1667 char buff1[24];
1668 char buff2[24];
1669 int baselen = opbra - op->long_name;
1670 sprintf(buff1, "%.*s", baselen, op->long_name);
1671 sprintf(buff2, "%s%.*s", buff1,
1672 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1673 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1674 break;
1675 }
1676 }
1677
1678 if (op->one_char == 0)
1679 {
1680 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1681 exit(usage(2));
1682 }
1683 }
1684
1685
1686 /* Jeffrey Friedl's debugging harness uses these additional options which
1687 are not in the right form for putting in the option table because they use
1688 only one hyphen, yet are more than one character long. By putting them
1689 separately here, they will not get displayed as part of the help() output,
1690 but I don't think Jeffrey will care about that. */
1691
1692 #ifdef JFRIEDL_DEBUG
1693 else if (strcmp(argv[i], "-pre") == 0) {
1694 jfriedl_prefix = argv[++i];
1695 continue;
1696 } else if (strcmp(argv[i], "-post") == 0) {
1697 jfriedl_postfix = argv[++i];
1698 continue;
1699 } else if (strcmp(argv[i], "-XT") == 0) {
1700 sscanf(argv[++i], "%d", &jfriedl_XT);
1701 continue;
1702 } else if (strcmp(argv[i], "-XR") == 0) {
1703 sscanf(argv[++i], "%d", &jfriedl_XR);
1704 continue;
1705 }
1706 #endif
1707
1708
1709 /* One-char options; many that have no data may be in a single argument; we
1710 continue till we hit the last one or one that needs data. */
1711
1712 else
1713 {
1714 char *s = argv[i] + 1;
1715 longop = FALSE;
1716 while (*s != 0)
1717 {
1718 for (op = optionlist; op->one_char != 0; op++)
1719 { if (*s == op->one_char) break; }
1720 if (op->one_char == 0)
1721 {
1722 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1723 *s, argv[i]);
1724 exit(usage(2));
1725 }
1726 if (op->type != OP_NODATA || s[1] == 0)
1727 {
1728 option_data = s+1;
1729 break;
1730 }
1731 pcre_options = handle_option(*s++, pcre_options);
1732 }
1733 }
1734
1735 /* At this point we should have op pointing to a matched option. If the type
1736 is NO_DATA, it means that there is no data, and the option might set
1737 something in the PCRE options. */
1738
1739 if (op->type == OP_NODATA)
1740 {
1741 pcre_options = handle_option(op->one_char, pcre_options);
1742 continue;
1743 }
1744
1745 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1746 either has a value or defaults to something. It cannot have data in a
1747 separate item. At the moment, the only such options are "colo(u)r" and
1748 Jeffrey Friedl's special -S debugging option. */
1749
1750 if (*option_data == 0 &&
1751 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1752 {
1753 switch (op->one_char)
1754 {
1755 case N_COLOUR:
1756 colour_option = (char *)"auto";
1757 break;
1758 #ifdef JFRIEDL_DEBUG
1759 case 'S':
1760 S_arg = 0;
1761 break;
1762 #endif
1763 }
1764 continue;
1765 }
1766
1767 /* Otherwise, find the data string for the option. */
1768
1769 if (*option_data == 0)
1770 {
1771 if (i >= argc - 1 || longopwasequals)
1772 {
1773 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1774 exit(usage(2));
1775 }
1776 option_data = argv[++i];
1777 }
1778
1779 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1780 multiple times to create a list of patterns. */
1781
1782 if (op->type == OP_PATLIST)
1783 {
1784 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1785 {
1786 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1787 MAX_PATTERN_COUNT);
1788 return 2;
1789 }
1790 patterns[cmd_pattern_count++] = option_data;
1791 }
1792
1793 /* Otherwise, deal with single string or numeric data values. */
1794
1795 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1796 {
1797 *((char **)op->dataptr) = option_data;
1798 }
1799 else
1800 {
1801 char *endptr;
1802 int n = strtoul(option_data, &endptr, 10);
1803 if (*endptr != 0)
1804 {
1805 if (longop)
1806 {
1807 char *equals = strchr(op->long_name, '=');
1808 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1809 equals - op->long_name;
1810 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1811 option_data, nlen, op->long_name);
1812 }
1813 else
1814 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1815 option_data, op->one_char);
1816 exit(usage(2));
1817 }
1818 *((int *)op->dataptr) = n;
1819 }
1820 }
1821
1822 /* Options have been decoded. If -C was used, its value is used as a default
1823 for -A and -B. */
1824
1825 if (both_context > 0)
1826 {
1827 if (after_context == 0) after_context = both_context;
1828 if (before_context == 0) before_context = both_context;
1829 }
1830
1831 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1832 LC_ALL environment variable is set, and if so, use it. */
1833
1834 if (locale == NULL)
1835 {
1836 locale = getenv("LC_ALL");
1837 locale_from = "LCC_ALL";
1838 }
1839
1840 if (locale == NULL)
1841 {
1842 locale = getenv("LC_CTYPE");
1843 locale_from = "LC_CTYPE";
1844 }
1845
1846 /* If a locale has been provided, set it, and generate the tables the PCRE
1847 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1848
1849 if (locale != NULL)
1850 {
1851 if (setlocale(LC_CTYPE, locale) == NULL)
1852 {
1853 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1854 locale, locale_from);
1855 return 2;
1856 }
1857 pcretables = pcre_maketables();
1858 }
1859
1860 /* Sort out colouring */
1861
1862 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1863 {
1864 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1865 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1866 else
1867 {
1868 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1869 colour_option);
1870 return 2;
1871 }
1872 if (do_colour)
1873 {
1874 char *cs = getenv("PCREGREP_COLOUR");
1875 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1876 if (cs != NULL) colour_string = cs;
1877 }
1878 }
1879
1880 /* Interpret the newline type; the default settings are Unix-like. */
1881
1882 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1883 {
1884 pcre_options |= PCRE_NEWLINE_CR;
1885 endlinetype = EL_CR;
1886 }
1887 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1888 {
1889 pcre_options |= PCRE_NEWLINE_LF;
1890 endlinetype = EL_LF;
1891 }
1892 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1893 {
1894 pcre_options |= PCRE_NEWLINE_CRLF;
1895 endlinetype = EL_CRLF;
1896 }
1897 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1898 {
1899 pcre_options |= PCRE_NEWLINE_ANY;
1900 endlinetype = EL_ANY;
1901 }
1902 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1903 {
1904 pcre_options |= PCRE_NEWLINE_ANYCRLF;
1905 endlinetype = EL_ANYCRLF;
1906 }
1907 else
1908 {
1909 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1910 return 2;
1911 }
1912
1913 /* Interpret the text values for -d and -D */
1914
1915 if (dee_option != NULL)
1916 {
1917 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1918 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1919 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1920 else
1921 {
1922 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1923 return 2;
1924 }
1925 }
1926
1927 if (DEE_option != NULL)
1928 {
1929 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1930 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1931 else
1932 {
1933 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1934 return 2;
1935 }
1936 }
1937
1938 /* Check the values for Jeffrey Friedl's debugging options. */
1939
1940 #ifdef JFRIEDL_DEBUG
1941 if (S_arg > 9)
1942 {
1943 fprintf(stderr, "pcregrep: bad value for -S option\n");
1944 return 2;
1945 }
1946 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1947 {
1948 if (jfriedl_XT == 0) jfriedl_XT = 1;
1949 if (jfriedl_XR == 0) jfriedl_XR = 1;
1950 }
1951 #endif
1952
1953 /* Get memory to store the pattern and hints lists. */
1954
1955 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1956 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1957
1958 if (pattern_list == NULL || hints_list == NULL)
1959 {
1960 fprintf(stderr, "pcregrep: malloc failed\n");
1961 goto EXIT2;
1962 }
1963
1964 /* If no patterns were provided by -e, and there is no file provided by -f,
1965 the first argument is the one and only pattern, and it must exist. */
1966
1967 if (cmd_pattern_count == 0 && pattern_filename == NULL)
1968 {
1969 if (i >= argc) return usage(2);
1970 patterns[cmd_pattern_count++] = argv[i++];
1971 }
1972
1973 /* Compile the patterns that were provided on the command line, either by
1974 multiple uses of -e or as a single unkeyed pattern. */
1975
1976 for (j = 0; j < cmd_pattern_count; j++)
1977 {
1978 if (!compile_pattern(patterns[j], pcre_options, NULL,
1979 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1980 goto EXIT2;
1981 }
1982
1983 /* Compile the regular expressions that are provided in a file. */
1984
1985 if (pattern_filename != NULL)
1986 {
1987 int linenumber = 0;
1988 FILE *f;
1989 char *filename;
1990 char buffer[MBUFTHIRD];
1991
1992 if (strcmp(pattern_filename, "-") == 0)
1993 {
1994 f = stdin;
1995 filename = stdin_name;
1996 }
1997 else
1998 {
1999 f = fopen(pattern_filename, "r");
2000 if (f == NULL)
2001 {
2002 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2003 strerror(errno));
2004 goto EXIT2;
2005 }
2006 filename = pattern_filename;
2007 }
2008
2009 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2010 {
2011 char *s = buffer + (int)strlen(buffer);
2012 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2013 *s = 0;
2014 linenumber++;
2015 if (buffer[0] == 0) continue; /* Skip blank lines */
2016 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2017 goto EXIT2;
2018 }
2019
2020 if (f != stdin) fclose(f);
2021 }
2022
2023 /* Study the regular expressions, as we will be running them many times */
2024
2025 for (j = 0; j < pattern_count; j++)
2026 {
2027 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2028 if (error != NULL)
2029 {
2030 char s[16];
2031 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2032 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2033 goto EXIT2;
2034 }
2035 hint_count++;
2036 }
2037
2038 /* If there are include or exclude patterns, compile them. */
2039
2040 if (exclude_pattern != NULL)
2041 {
2042 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2043 pcretables);
2044 if (exclude_compiled == NULL)
2045 {
2046 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2047 errptr, error);
2048 goto EXIT2;
2049 }
2050 }
2051
2052 if (include_pattern != NULL)
2053 {
2054 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2055 pcretables);
2056 if (include_compiled == NULL)
2057 {
2058 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2059 errptr, error);
2060 goto EXIT2;
2061 }
2062 }
2063
2064 /* If there are no further arguments, do the business on stdin and exit. */
2065
2066 if (i >= argc)
2067 {
2068 rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2069 goto EXIT;
2070 }
2071
2072 /* Otherwise, work through the remaining arguments as files or directories.
2073 Pass in the fact that there is only one argument at top level - this suppresses
2074 the file name if the argument is not a directory and filenames are not
2075 otherwise forced. */
2076
2077 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2078
2079 for (; i < argc; i++)
2080 {
2081 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2082 only_one_at_top);
2083 if (frc > 1) rc = frc;
2084 else if (frc == 0 && rc == 1) rc = 0;
2085 }
2086
2087 EXIT:
2088 if (pattern_list != NULL)
2089 {
2090 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2091 free(pattern_list);
2092 }
2093 if (hints_list != NULL)
2094 {
2095 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2096 free(hints_list);
2097 }
2098 return rc;
2099
2100 EXIT2:
2101 rc = 2;
2102 goto EXIT;
2103 }
2104
2105 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12