/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 279 - (show annotations) (download)
Tue Dec 4 20:01:43 2007 UTC (6 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 60113 byte(s)
Fix -o bugs in pcregrep.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #include "pcre.h"
59
60 #define FALSE 0
61 #define TRUE 1
62
63 typedef int BOOL;
64
65 #define MAX_PATTERN_COUNT 100
66
67 #if BUFSIZ > 8192
68 #define MBUFTHIRD BUFSIZ
69 #else
70 #define MBUFTHIRD 8192
71 #endif
72
73 /* Values for the "filenames" variable, which specifies options for file name
74 output. The order is important; it is assumed that a file name is wanted for
75 all values greater than FN_DEFAULT. */
76
77 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78
79 /* Actions for the -d and -D options */
80
81 enum { dee_READ, dee_SKIP, dee_RECURSE };
82 enum { DEE_READ, DEE_SKIP };
83
84 /* Actions for special processing options (flag bits) */
85
86 #define PO_WORD_MATCH 0x0001
87 #define PO_LINE_MATCH 0x0002
88 #define PO_FIXED_STRINGS 0x0004
89
90 /* Line ending types */
91
92 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93
94
95
96 /*************************************************
97 * Global variables *
98 *************************************************/
99
100 /* Jeffrey Friedl has some debugging requirements that are not part of the
101 regular code. */
102
103 #ifdef JFRIEDL_DEBUG
104 static int S_arg = -1;
105 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107 static const char *jfriedl_prefix = "";
108 static const char *jfriedl_postfix = "";
109 #endif
110
111 static int endlinetype;
112
113 static char *colour_string = (char *)"1;31";
114 static char *colour_option = NULL;
115 static char *dee_option = NULL;
116 static char *DEE_option = NULL;
117 static char *newline = NULL;
118 static char *pattern_filename = NULL;
119 static char *stdin_name = (char *)"(standard input)";
120 static char *locale = NULL;
121
122 static const unsigned char *pcretables = NULL;
123
124 static int pattern_count = 0;
125 static pcre **pattern_list = NULL;
126 static pcre_extra **hints_list = NULL;
127
128 static char *include_pattern = NULL;
129 static char *exclude_pattern = NULL;
130
131 static pcre *include_compiled = NULL;
132 static pcre *exclude_compiled = NULL;
133
134 static int after_context = 0;
135 static int before_context = 0;
136 static int both_context = 0;
137 static int dee_action = dee_READ;
138 static int DEE_action = DEE_READ;
139 static int error_count = 0;
140 static int filenames = FN_DEFAULT;
141 static int process_options = 0;
142
143 static BOOL count_only = FALSE;
144 static BOOL do_colour = FALSE;
145 static BOOL hyphenpending = FALSE;
146 static BOOL invert = FALSE;
147 static BOOL multiline = FALSE;
148 static BOOL number = FALSE;
149 static BOOL only_matching = FALSE;
150 static BOOL quiet = FALSE;
151 static BOOL silent = FALSE;
152 static BOOL utf8 = FALSE;
153
154 /* Structure for options and list of them */
155
156 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
157 OP_PATLIST };
158
159 typedef struct option_item {
160 int type;
161 int one_char;
162 void *dataptr;
163 const char *long_name;
164 const char *help_text;
165 } option_item;
166
167 /* Options without a single-letter equivalent get a negative value. This can be
168 used to identify them. */
169
170 #define N_COLOUR (-1)
171 #define N_EXCLUDE (-2)
172 #define N_HELP (-3)
173 #define N_INCLUDE (-4)
174 #define N_LABEL (-5)
175 #define N_LOCALE (-6)
176 #define N_NULL (-7)
177
178 static option_item optionlist[] = {
179 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
180 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
181 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
182 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
183 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
184 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
185 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
186 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
187 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
188 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
189 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
190 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
191 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
192 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
193 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
194 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
195 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
196 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
197 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
198 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
199 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
200 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
202 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
203 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
204 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
205 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
206 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
207 #ifdef JFRIEDL_DEBUG
208 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
209 #endif
210 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
211 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
212 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
213 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
214 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
215 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
216 { OP_NODATA, 0, NULL, NULL, NULL }
217 };
218
219 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
220 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
221 that the combination of -w and -x has the same effect as -x on its own, so we
222 can treat them as the same. */
223
224 static const char *prefix[] = {
225 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
226
227 static const char *suffix[] = {
228 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
229
230 /* UTF-8 tables - used only when the newline setting is "any". */
231
232 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233
234 const char utf8_table4[] = {
235 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
237 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
238 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
239
240
241
242 /*************************************************
243 * OS-specific functions *
244 *************************************************/
245
246 /* These functions are defined so that they can be made system specific,
247 although at present the only ones are for Unix, Win32, and for "no support". */
248
249
250 /************* Directory scanning in Unix ***********/
251
252 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253 #include <sys/types.h>
254 #include <sys/stat.h>
255 #include <dirent.h>
256
257 typedef DIR directory_type;
258
259 static int
260 isdirectory(char *filename)
261 {
262 struct stat statbuf;
263 if (stat(filename, &statbuf) < 0)
264 return 0; /* In the expectation that opening as a file will fail */
265 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
266 }
267
268 static directory_type *
269 opendirectory(char *filename)
270 {
271 return opendir(filename);
272 }
273
274 static char *
275 readdirectory(directory_type *dir)
276 {
277 for (;;)
278 {
279 struct dirent *dent = readdir(dir);
280 if (dent == NULL) return NULL;
281 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282 return dent->d_name;
283 }
284 /* Control never reaches here */
285 }
286
287 static void
288 closedirectory(directory_type *dir)
289 {
290 closedir(dir);
291 }
292
293
294 /************* Test for regular file in Unix **********/
295
296 static int
297 isregfile(char *filename)
298 {
299 struct stat statbuf;
300 if (stat(filename, &statbuf) < 0)
301 return 1; /* In the expectation that opening as a file will fail */
302 return (statbuf.st_mode & S_IFMT) == S_IFREG;
303 }
304
305
306 /************* Test stdout for being a terminal in Unix **********/
307
308 static BOOL
309 is_stdout_tty(void)
310 {
311 return isatty(fileno(stdout));
312 }
313
314
315 /************* Directory scanning in Win32 ***********/
316
317 /* I (Philip Hazel) have no means of testing this code. It was contributed by
318 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
319 when it did not exist. */
320
321
322 #elif HAVE_WINDOWS_H
323
324 #ifndef STRICT
325 # define STRICT
326 #endif
327 #ifndef WIN32_LEAN_AND_MEAN
328 # define WIN32_LEAN_AND_MEAN
329 #endif
330 #ifndef INVALID_FILE_ATTRIBUTES
331 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
332 #endif
333
334 #include <windows.h>
335
336 typedef struct directory_type
337 {
338 HANDLE handle;
339 BOOL first;
340 WIN32_FIND_DATA data;
341 } directory_type;
342
343 int
344 isdirectory(char *filename)
345 {
346 DWORD attr = GetFileAttributes(filename);
347 if (attr == INVALID_FILE_ATTRIBUTES)
348 return 0;
349 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
350 }
351
352 directory_type *
353 opendirectory(char *filename)
354 {
355 size_t len;
356 char *pattern;
357 directory_type *dir;
358 DWORD err;
359 len = strlen(filename);
360 pattern = (char *) malloc(len + 3);
361 dir = (directory_type *) malloc(sizeof(*dir));
362 if ((pattern == NULL) || (dir == NULL))
363 {
364 fprintf(stderr, "pcregrep: malloc failed\n");
365 exit(2);
366 }
367 memcpy(pattern, filename, len);
368 memcpy(&(pattern[len]), "\\*", 3);
369 dir->handle = FindFirstFile(pattern, &(dir->data));
370 if (dir->handle != INVALID_HANDLE_VALUE)
371 {
372 free(pattern);
373 dir->first = TRUE;
374 return dir;
375 }
376 err = GetLastError();
377 free(pattern);
378 free(dir);
379 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
380 return NULL;
381 }
382
383 char *
384 readdirectory(directory_type *dir)
385 {
386 for (;;)
387 {
388 if (!dir->first)
389 {
390 if (!FindNextFile(dir->handle, &(dir->data)))
391 return NULL;
392 }
393 else
394 {
395 dir->first = FALSE;
396 }
397 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
398 return dir->data.cFileName;
399 }
400 #ifndef _MSC_VER
401 return NULL; /* Keep compiler happy; never executed */
402 #endif
403 }
404
405 void
406 closedirectory(directory_type *dir)
407 {
408 FindClose(dir->handle);
409 free(dir);
410 }
411
412
413 /************* Test for regular file in Win32 **********/
414
415 /* I don't know how to do this, or if it can be done; assume all paths are
416 regular if they are not directories. */
417
418 int isregfile(char *filename)
419 {
420 return !isdirectory(filename)
421 }
422
423
424 /************* Test stdout for being a terminal in Win32 **********/
425
426 /* I don't know how to do this; assume never */
427
428 static BOOL
429 is_stdout_tty(void)
430 {
431 FALSE;
432 }
433
434
435 /************* Directory scanning when we can't do it ***********/
436
437 /* The type is void, and apart from isdirectory(), the functions do nothing. */
438
439 #else
440
441 typedef void directory_type;
442
443 int isdirectory(char *filename) { return 0; }
444 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445 char *readdirectory(directory_type *dir) { return (char*)0;}
446 void closedirectory(directory_type *dir) {}
447
448
449 /************* Test for regular when we can't do it **********/
450
451 /* Assume all files are regular. */
452
453 int isregfile(char *filename) { return 1; }
454
455
456 /************* Test stdout for being a terminal when we can't do it **********/
457
458 static BOOL
459 is_stdout_tty(void)
460 {
461 return FALSE;
462 }
463
464
465 #endif
466
467
468
469 #ifndef HAVE_STRERROR
470 /*************************************************
471 * Provide strerror() for non-ANSI libraries *
472 *************************************************/
473
474 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
475 in their libraries, but can provide the same facility by this simple
476 alternative function. */
477
478 extern int sys_nerr;
479 extern char *sys_errlist[];
480
481 char *
482 strerror(int n)
483 {
484 if (n < 0 || n >= sys_nerr) return "unknown error number";
485 return sys_errlist[n];
486 }
487 #endif /* HAVE_STRERROR */
488
489
490
491 /*************************************************
492 * Find end of line *
493 *************************************************/
494
495 /* The length of the endline sequence that is found is set via lenptr. This may
496 be zero at the very end of the file if there is no line-ending sequence there.
497
498 Arguments:
499 p current position in line
500 endptr end of available data
501 lenptr where to put the length of the eol sequence
502
503 Returns: pointer to the last byte of the line
504 */
505
506 static char *
507 end_of_line(char *p, char *endptr, int *lenptr)
508 {
509 switch(endlinetype)
510 {
511 default: /* Just in case */
512 case EL_LF:
513 while (p < endptr && *p != '\n') p++;
514 if (p < endptr)
515 {
516 *lenptr = 1;
517 return p + 1;
518 }
519 *lenptr = 0;
520 return endptr;
521
522 case EL_CR:
523 while (p < endptr && *p != '\r') p++;
524 if (p < endptr)
525 {
526 *lenptr = 1;
527 return p + 1;
528 }
529 *lenptr = 0;
530 return endptr;
531
532 case EL_CRLF:
533 for (;;)
534 {
535 while (p < endptr && *p != '\r') p++;
536 if (++p >= endptr)
537 {
538 *lenptr = 0;
539 return endptr;
540 }
541 if (*p == '\n')
542 {
543 *lenptr = 2;
544 return p + 1;
545 }
546 }
547 break;
548
549 case EL_ANYCRLF:
550 while (p < endptr)
551 {
552 int extra = 0;
553 register int c = *((unsigned char *)p);
554
555 if (utf8 && c >= 0xc0)
556 {
557 int gcii, gcss;
558 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
559 gcss = 6*extra;
560 c = (c & utf8_table3[extra]) << gcss;
561 for (gcii = 1; gcii <= extra; gcii++)
562 {
563 gcss -= 6;
564 c |= (p[gcii] & 0x3f) << gcss;
565 }
566 }
567
568 p += 1 + extra;
569
570 switch (c)
571 {
572 case 0x0a: /* LF */
573 *lenptr = 1;
574 return p;
575
576 case 0x0d: /* CR */
577 if (p < endptr && *p == 0x0a)
578 {
579 *lenptr = 2;
580 p++;
581 }
582 else *lenptr = 1;
583 return p;
584
585 default:
586 break;
587 }
588 } /* End of loop for ANYCRLF case */
589
590 *lenptr = 0; /* Must have hit the end */
591 return endptr;
592
593 case EL_ANY:
594 while (p < endptr)
595 {
596 int extra = 0;
597 register int c = *((unsigned char *)p);
598
599 if (utf8 && c >= 0xc0)
600 {
601 int gcii, gcss;
602 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
603 gcss = 6*extra;
604 c = (c & utf8_table3[extra]) << gcss;
605 for (gcii = 1; gcii <= extra; gcii++)
606 {
607 gcss -= 6;
608 c |= (p[gcii] & 0x3f) << gcss;
609 }
610 }
611
612 p += 1 + extra;
613
614 switch (c)
615 {
616 case 0x0a: /* LF */
617 case 0x0b: /* VT */
618 case 0x0c: /* FF */
619 *lenptr = 1;
620 return p;
621
622 case 0x0d: /* CR */
623 if (p < endptr && *p == 0x0a)
624 {
625 *lenptr = 2;
626 p++;
627 }
628 else *lenptr = 1;
629 return p;
630
631 case 0x85: /* NEL */
632 *lenptr = utf8? 2 : 1;
633 return p;
634
635 case 0x2028: /* LS */
636 case 0x2029: /* PS */
637 *lenptr = 3;
638 return p;
639
640 default:
641 break;
642 }
643 } /* End of loop for ANY case */
644
645 *lenptr = 0; /* Must have hit the end */
646 return endptr;
647 } /* End of overall switch */
648 }
649
650
651
652 /*************************************************
653 * Find start of previous line *
654 *************************************************/
655
656 /* This is called when looking back for before lines to print.
657
658 Arguments:
659 p start of the subsequent line
660 startptr start of available data
661
662 Returns: pointer to the start of the previous line
663 */
664
665 static char *
666 previous_line(char *p, char *startptr)
667 {
668 switch(endlinetype)
669 {
670 default: /* Just in case */
671 case EL_LF:
672 p--;
673 while (p > startptr && p[-1] != '\n') p--;
674 return p;
675
676 case EL_CR:
677 p--;
678 while (p > startptr && p[-1] != '\n') p--;
679 return p;
680
681 case EL_CRLF:
682 for (;;)
683 {
684 p -= 2;
685 while (p > startptr && p[-1] != '\n') p--;
686 if (p <= startptr + 1 || p[-2] == '\r') return p;
687 }
688 return p; /* But control should never get here */
689
690 case EL_ANY:
691 case EL_ANYCRLF:
692 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693 if (utf8) while ((*p & 0xc0) == 0x80) p--;
694
695 while (p > startptr)
696 {
697 register int c;
698 char *pp = p - 1;
699
700 if (utf8)
701 {
702 int extra = 0;
703 while ((*pp & 0xc0) == 0x80) pp--;
704 c = *((unsigned char *)pp);
705 if (c >= 0xc0)
706 {
707 int gcii, gcss;
708 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
709 gcss = 6*extra;
710 c = (c & utf8_table3[extra]) << gcss;
711 for (gcii = 1; gcii <= extra; gcii++)
712 {
713 gcss -= 6;
714 c |= (pp[gcii] & 0x3f) << gcss;
715 }
716 }
717 }
718 else c = *((unsigned char *)pp);
719
720 if (endlinetype == EL_ANYCRLF) switch (c)
721 {
722 case 0x0a: /* LF */
723 case 0x0d: /* CR */
724 return p;
725
726 default:
727 break;
728 }
729
730 else switch (c)
731 {
732 case 0x0a: /* LF */
733 case 0x0b: /* VT */
734 case 0x0c: /* FF */
735 case 0x0d: /* CR */
736 case 0x85: /* NEL */
737 case 0x2028: /* LS */
738 case 0x2029: /* PS */
739 return p;
740
741 default:
742 break;
743 }
744
745 p = pp; /* Back one character */
746 } /* End of loop for ANY case */
747
748 return startptr; /* Hit start of data */
749 } /* End of overall switch */
750 }
751
752
753
754
755
756 /*************************************************
757 * Print the previous "after" lines *
758 *************************************************/
759
760 /* This is called if we are about to lose said lines because of buffer filling,
761 and at the end of the file. The data in the line is written using fwrite() so
762 that a binary zero does not terminate it.
763
764 Arguments:
765 lastmatchnumber the number of the last matching line, plus one
766 lastmatchrestart where we restarted after the last match
767 endptr end of available data
768 printname filename for printing
769
770 Returns: nothing
771 */
772
773 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
774 char *endptr, char *printname)
775 {
776 if (after_context > 0 && lastmatchnumber > 0)
777 {
778 int count = 0;
779 while (lastmatchrestart < endptr && count++ < after_context)
780 {
781 int ellength;
782 char *pp = lastmatchrestart;
783 if (printname != NULL) fprintf(stdout, "%s-", printname);
784 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
785 pp = end_of_line(pp, endptr, &ellength);
786 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
787 lastmatchrestart = pp;
788 }
789 hyphenpending = TRUE;
790 }
791 }
792
793
794
795 /*************************************************
796 * Grep an individual file *
797 *************************************************/
798
799 /* This is called from grep_or_recurse() below. It uses a buffer that is three
800 times the value of MBUFTHIRD. The matching point is never allowed to stray into
801 the top third of the buffer, thus keeping more of the file available for
802 context printing or for multiline scanning. For large files, the pointer will
803 be in the middle third most of the time, so the bottom third is available for
804 "before" context printing.
805
806 Arguments:
807 in the fopened FILE stream
808 printname the file name if it is to be printed for each match
809 or NULL if the file name is not to be printed
810 it cannot be NULL if filenames[_nomatch]_only is set
811
812 Returns: 0 if there was at least one match
813 1 otherwise (no matches)
814 */
815
816 static int
817 pcregrep(FILE *in, char *printname)
818 {
819 int rc = 1;
820 int linenumber = 1;
821 int lastmatchnumber = 0;
822 int count = 0;
823 int offsets[99];
824 char *lastmatchrestart = NULL;
825 char buffer[3*MBUFTHIRD];
826 char *ptr = buffer;
827 char *endptr;
828 size_t bufflength;
829 BOOL endhyphenpending = FALSE;
830
831 /* Do the first read into the start of the buffer and set up the pointer to
832 end of what we have. */
833
834 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
835 endptr = buffer + bufflength;
836
837 /* Loop while the current pointer is not at the end of the file. For large
838 files, endptr will be at the end of the buffer when we are in the middle of the
839 file, but ptr will never get there, because as soon as it gets over 2/3 of the
840 way, the buffer is shifted left and re-filled. */
841
842 while (ptr < endptr)
843 {
844 int i, endlinelength;
845 int mrc = 0;
846 BOOL match = FALSE;
847 char *matchptr = ptr;
848 char *t = ptr;
849 size_t length, linelength;
850
851 /* At this point, ptr is at the start of a line. We need to find the length
852 of the subject string to pass to pcre_exec(). In multiline mode, it is the
853 length remainder of the data in the buffer. Otherwise, it is the length of
854 the next line. After matching, we always advance by the length of the next
855 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
856 that any match is constrained to be in the first line. */
857
858 t = end_of_line(t, endptr, &endlinelength);
859 linelength = t - ptr - endlinelength;
860 length = multiline? (size_t)(endptr - ptr) : linelength;
861
862 /* Extra processing for Jeffrey Friedl's debugging. */
863
864 #ifdef JFRIEDL_DEBUG
865 if (jfriedl_XT || jfriedl_XR)
866 {
867 #include <sys/time.h>
868 #include <time.h>
869 struct timeval start_time, end_time;
870 struct timezone dummy;
871
872 if (jfriedl_XT)
873 {
874 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
875 const char *orig = ptr;
876 ptr = malloc(newlen + 1);
877 if (!ptr) {
878 printf("out of memory");
879 exit(2);
880 }
881 endptr = ptr;
882 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
883 for (i = 0; i < jfriedl_XT; i++) {
884 strncpy(endptr, orig, length);
885 endptr += length;
886 }
887 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
888 length = newlen;
889 }
890
891 if (gettimeofday(&start_time, &dummy) != 0)
892 perror("bad gettimeofday");
893
894
895 for (i = 0; i < jfriedl_XR; i++)
896 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
897
898 if (gettimeofday(&end_time, &dummy) != 0)
899 perror("bad gettimeofday");
900
901 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
902 -
903 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
904
905 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
906 return 0;
907 }
908 #endif
909
910 /* We come back here after a match when the -o option (only_matching) is set,
911 in order to find any further matches in the same line. */
912
913 ONLY_MATCHING_RESTART:
914
915 /* Run through all the patterns until one matches. Note that we don't include
916 the final newline in the subject string. */
917
918 for (i = 0; i < pattern_count; i++)
919 {
920 mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
921 offsets, 99);
922 if (mrc >= 0) { match = TRUE; break; }
923 if (mrc != PCRE_ERROR_NOMATCH)
924 {
925 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
926 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
927 fprintf(stderr, "this line:\n");
928 fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
929 fprintf(stderr, "\n");
930 if (error_count == 0 &&
931 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
932 {
933 fprintf(stderr, "pcregrep: error %d means that a resource limit "
934 "was exceeded\n", mrc);
935 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
936 }
937 if (error_count++ > 20)
938 {
939 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
940 exit(2);
941 }
942 match = invert; /* No more matching; don't show the line again */
943 break;
944 }
945 }
946
947 /* If it's a match or a not-match (as required), do what's wanted. */
948
949 if (match != invert)
950 {
951 BOOL hyphenprinted = FALSE;
952
953 /* We've failed if we want a file that doesn't have any matches. */
954
955 if (filenames == FN_NOMATCH_ONLY) return 1;
956
957 /* Just count if just counting is wanted. */
958
959 if (count_only) count++;
960
961 /* If all we want is a file name, there is no need to scan any more lines
962 in the file. */
963
964 else if (filenames == FN_ONLY)
965 {
966 fprintf(stdout, "%s\n", printname);
967 return 0;
968 }
969
970 /* Likewise, if all we want is a yes/no answer. */
971
972 else if (quiet) return 0;
973
974 /* The --only-matching option prints just the substring that matched, and
975 does not print any context. Afterwards, adjust the start and length, and
976 then jump back to look for further matches in the same line. If we are in
977 invert mode, however, nothing is printed - this could be useful still
978 because the return code is set. */
979
980 else if (only_matching)
981 {
982 if (!invert)
983 {
984 if (printname != NULL) fprintf(stdout, "%s:", printname);
985 if (number) fprintf(stdout, "%d:", linenumber);
986 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
987 fprintf(stdout, "\n");
988 matchptr += offsets[1];
989 length -= offsets[1];
990 match = FALSE;
991 goto ONLY_MATCHING_RESTART;
992 }
993 }
994
995 /* This is the default case when none of the above options is set. We print
996 the matching lines(s), possibly preceded and/or followed by other lines of
997 context. */
998
999 else
1000 {
1001 /* See if there is a requirement to print some "after" lines from a
1002 previous match. We never print any overlaps. */
1003
1004 if (after_context > 0 && lastmatchnumber > 0)
1005 {
1006 int ellength;
1007 int linecount = 0;
1008 char *p = lastmatchrestart;
1009
1010 while (p < ptr && linecount < after_context)
1011 {
1012 p = end_of_line(p, ptr, &ellength);
1013 linecount++;
1014 }
1015
1016 /* It is important to advance lastmatchrestart during this printing so
1017 that it interacts correctly with any "before" printing below. Print
1018 each line's data using fwrite() in case there are binary zeroes. */
1019
1020 while (lastmatchrestart < p)
1021 {
1022 char *pp = lastmatchrestart;
1023 if (printname != NULL) fprintf(stdout, "%s-", printname);
1024 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1025 pp = end_of_line(pp, endptr, &ellength);
1026 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1027 lastmatchrestart = pp;
1028 }
1029 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1030 }
1031
1032 /* If there were non-contiguous lines printed above, insert hyphens. */
1033
1034 if (hyphenpending)
1035 {
1036 fprintf(stdout, "--\n");
1037 hyphenpending = FALSE;
1038 hyphenprinted = TRUE;
1039 }
1040
1041 /* See if there is a requirement to print some "before" lines for this
1042 match. Again, don't print overlaps. */
1043
1044 if (before_context > 0)
1045 {
1046 int linecount = 0;
1047 char *p = ptr;
1048
1049 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1050 linecount < before_context)
1051 {
1052 linecount++;
1053 p = previous_line(p, buffer);
1054 }
1055
1056 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1057 fprintf(stdout, "--\n");
1058
1059 while (p < ptr)
1060 {
1061 int ellength;
1062 char *pp = p;
1063 if (printname != NULL) fprintf(stdout, "%s-", printname);
1064 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1065 pp = end_of_line(pp, endptr, &ellength);
1066 fwrite(p, 1, pp - p, stdout);
1067 p = pp;
1068 }
1069 }
1070
1071 /* Now print the matching line(s); ensure we set hyphenpending at the end
1072 of the file if any context lines are being output. */
1073
1074 if (after_context > 0 || before_context > 0)
1075 endhyphenpending = TRUE;
1076
1077 if (printname != NULL) fprintf(stdout, "%s:", printname);
1078 if (number) fprintf(stdout, "%d:", linenumber);
1079
1080 /* In multiline mode, we want to print to the end of the line in which
1081 the end of the matched string is found, so we adjust linelength and the
1082 line number appropriately, but only when there actually was a match
1083 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1084 the match will always be before the first newline sequence. */
1085
1086 if (multiline)
1087 {
1088 int ellength;
1089 char *endmatch = ptr;
1090 if (!invert)
1091 {
1092 endmatch += offsets[1];
1093 t = ptr;
1094 while (t < endmatch)
1095 {
1096 t = end_of_line(t, endptr, &ellength);
1097 if (t <= endmatch) linenumber++; else break;
1098 }
1099 }
1100 endmatch = end_of_line(endmatch, endptr, &ellength);
1101 linelength = endmatch - ptr - ellength;
1102 }
1103
1104 /*** NOTE: Use only fwrite() to output the data line, so that binary
1105 zeroes are treated as just another data character. */
1106
1107 /* This extra option, for Jeffrey Friedl's debugging requirements,
1108 replaces the matched string, or a specific captured string if it exists,
1109 with X. When this happens, colouring is ignored. */
1110
1111 #ifdef JFRIEDL_DEBUG
1112 if (S_arg >= 0 && S_arg < mrc)
1113 {
1114 int first = S_arg * 2;
1115 int last = first + 1;
1116 fwrite(ptr, 1, offsets[first], stdout);
1117 fprintf(stdout, "X");
1118 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1119 }
1120 else
1121 #endif
1122
1123 /* We have to split the line(s) up if colouring. */
1124
1125 if (do_colour)
1126 {
1127 fwrite(ptr, 1, offsets[0], stdout);
1128 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1129 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1130 fprintf(stdout, "%c[00m", 0x1b);
1131 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1132 stdout);
1133 }
1134 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1135 }
1136
1137 /* End of doing what has to be done for a match */
1138
1139 rc = 0; /* Had some success */
1140
1141 /* Remember where the last match happened for after_context. We remember
1142 where we are about to restart, and that line's number. */
1143
1144 lastmatchrestart = ptr + linelength + endlinelength;
1145 lastmatchnumber = linenumber + 1;
1146 }
1147
1148 /* For a match in multiline inverted mode (which of course did not cause
1149 anything to be printed), we have to move on to the end of the match before
1150 proceeding. */
1151
1152 if (multiline && invert && match)
1153 {
1154 int ellength;
1155 char *endmatch = ptr + offsets[1];
1156 t = ptr;
1157 while (t < endmatch)
1158 {
1159 t = end_of_line(t, endptr, &ellength);
1160 if (t <= endmatch) linenumber++; else break;
1161 }
1162 endmatch = end_of_line(endmatch, endptr, &ellength);
1163 linelength = endmatch - ptr - ellength;
1164 }
1165
1166 /* Advance to after the newline and increment the line number. */
1167
1168 ptr += linelength + endlinelength;
1169 linenumber++;
1170
1171 /* If we haven't yet reached the end of the file (the buffer is full), and
1172 the current point is in the top 1/3 of the buffer, slide the buffer down by
1173 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1174 about to be lost, print them. */
1175
1176 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1177 {
1178 if (after_context > 0 &&
1179 lastmatchnumber > 0 &&
1180 lastmatchrestart < buffer + MBUFTHIRD)
1181 {
1182 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1183 lastmatchnumber = 0;
1184 }
1185
1186 /* Now do the shuffle */
1187
1188 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1189 ptr -= MBUFTHIRD;
1190 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1191 endptr = buffer + bufflength;
1192
1193 /* Adjust any last match point */
1194
1195 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1196 }
1197 } /* Loop through the whole file */
1198
1199 /* End of file; print final "after" lines if wanted; do_after_lines sets
1200 hyphenpending if it prints something. */
1201
1202 if (!only_matching && !count_only)
1203 {
1204 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1205 hyphenpending |= endhyphenpending;
1206 }
1207
1208 /* Print the file name if we are looking for those without matches and there
1209 were none. If we found a match, we won't have got this far. */
1210
1211 if (filenames == FN_NOMATCH_ONLY)
1212 {
1213 fprintf(stdout, "%s\n", printname);
1214 return 0;
1215 }
1216
1217 /* Print the match count if wanted */
1218
1219 if (count_only)
1220 {
1221 if (printname != NULL) fprintf(stdout, "%s:", printname);
1222 fprintf(stdout, "%d\n", count);
1223 }
1224
1225 return rc;
1226 }
1227
1228
1229
1230 /*************************************************
1231 * Grep a file or recurse into a directory *
1232 *************************************************/
1233
1234 /* Given a path name, if it's a directory, scan all the files if we are
1235 recursing; if it's a file, grep it.
1236
1237 Arguments:
1238 pathname the path to investigate
1239 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1240 only_one_at_top TRUE if the path is the only one at toplevel
1241
1242 Returns: 0 if there was at least one match
1243 1 if there were no matches
1244 2 there was some kind of error
1245
1246 However, file opening failures are suppressed if "silent" is set.
1247 */
1248
1249 static int
1250 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1251 {
1252 int rc = 1;
1253 int sep;
1254 FILE *in;
1255
1256 /* If the file name is "-" we scan stdin */
1257
1258 if (strcmp(pathname, "-") == 0)
1259 {
1260 return pcregrep(stdin,
1261 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1262 stdin_name : NULL);
1263 }
1264
1265
1266 /* If the file is a directory, skip if skipping or if we are recursing, scan
1267 each file within it, subject to any include or exclude patterns that were set.
1268 The scanning code is localized so it can be made system-specific. */
1269
1270 if ((sep = isdirectory(pathname)) != 0)
1271 {
1272 if (dee_action == dee_SKIP) return 1;
1273 if (dee_action == dee_RECURSE)
1274 {
1275 char buffer[1024];
1276 char *nextfile;
1277 directory_type *dir = opendirectory(pathname);
1278
1279 if (dir == NULL)
1280 {
1281 if (!silent)
1282 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1283 strerror(errno));
1284 return 2;
1285 }
1286
1287 while ((nextfile = readdirectory(dir)) != NULL)
1288 {
1289 int frc, blen;
1290 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1291 blen = strlen(buffer);
1292
1293 if (exclude_compiled != NULL &&
1294 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1295 continue;
1296
1297 if (include_compiled != NULL &&
1298 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1299 continue;
1300
1301 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1302 if (frc > 1) rc = frc;
1303 else if (frc == 0 && rc == 1) rc = 0;
1304 }
1305
1306 closedirectory(dir);
1307 return rc;
1308 }
1309 }
1310
1311 /* If the file is not a directory and not a regular file, skip it if that's
1312 been requested. */
1313
1314 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1315
1316 /* Control reaches here if we have a regular file, or if we have a directory
1317 and recursion or skipping was not requested, or if we have anything else and
1318 skipping was not requested. The scan proceeds. If this is the first and only
1319 argument at top level, we don't show the file name, unless we are only showing
1320 the file name, or the filename was forced (-H). */
1321
1322 in = fopen(pathname, "r");
1323 if (in == NULL)
1324 {
1325 if (!silent)
1326 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1327 strerror(errno));
1328 return 2;
1329 }
1330
1331 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1332 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1333
1334 fclose(in);
1335 return rc;
1336 }
1337
1338
1339
1340
1341 /*************************************************
1342 * Usage function *
1343 *************************************************/
1344
1345 static int
1346 usage(int rc)
1347 {
1348 option_item *op;
1349 fprintf(stderr, "Usage: pcregrep [-");
1350 for (op = optionlist; op->one_char != 0; op++)
1351 {
1352 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1353 }
1354 fprintf(stderr, "] [long options] [pattern] [files]\n");
1355 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1356 return rc;
1357 }
1358
1359
1360
1361
1362 /*************************************************
1363 * Help function *
1364 *************************************************/
1365
1366 static void
1367 help(void)
1368 {
1369 option_item *op;
1370
1371 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1372 printf("Search for PATTERN in each FILE or standard input.\n");
1373 printf("PATTERN must be present if neither -e nor -f is used.\n");
1374 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1375 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1376
1377 printf("Options:\n");
1378
1379 for (op = optionlist; op->one_char != 0; op++)
1380 {
1381 int n;
1382 char s[4];
1383 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1384 printf(" %s --%s%n", s, op->long_name, &n);
1385 n = 30 - n;
1386 if (n < 1) n = 1;
1387 printf("%.*s%s\n", n, " ", op->help_text);
1388 }
1389
1390 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1391 printf("trailing white space is removed and blank lines are ignored.\n");
1392 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1393
1394 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1395 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1396 }
1397
1398
1399
1400
1401 /*************************************************
1402 * Handle a single-letter, no data option *
1403 *************************************************/
1404
1405 static int
1406 handle_option(int letter, int options)
1407 {
1408 switch(letter)
1409 {
1410 case N_HELP: help(); exit(0);
1411 case 'c': count_only = TRUE; break;
1412 case 'F': process_options |= PO_FIXED_STRINGS; break;
1413 case 'H': filenames = FN_FORCE; break;
1414 case 'h': filenames = FN_NONE; break;
1415 case 'i': options |= PCRE_CASELESS; break;
1416 case 'l': filenames = FN_ONLY; break;
1417 case 'L': filenames = FN_NOMATCH_ONLY; break;
1418 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1419 case 'n': number = TRUE; break;
1420 case 'o': only_matching = TRUE; break;
1421 case 'q': quiet = TRUE; break;
1422 case 'r': dee_action = dee_RECURSE; break;
1423 case 's': silent = TRUE; break;
1424 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1425 case 'v': invert = TRUE; break;
1426 case 'w': process_options |= PO_WORD_MATCH; break;
1427 case 'x': process_options |= PO_LINE_MATCH; break;
1428
1429 case 'V':
1430 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1431 exit(0);
1432 break;
1433
1434 default:
1435 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1436 exit(usage(2));
1437 }
1438
1439 return options;
1440 }
1441
1442
1443
1444
1445 /*************************************************
1446 * Construct printed ordinal *
1447 *************************************************/
1448
1449 /* This turns a number into "1st", "3rd", etc. */
1450
1451 static char *
1452 ordin(int n)
1453 {
1454 static char buffer[8];
1455 char *p = buffer;
1456 sprintf(p, "%d", n);
1457 while (*p != 0) p++;
1458 switch (n%10)
1459 {
1460 case 1: strcpy(p, "st"); break;
1461 case 2: strcpy(p, "nd"); break;
1462 case 3: strcpy(p, "rd"); break;
1463 default: strcpy(p, "th"); break;
1464 }
1465 return buffer;
1466 }
1467
1468
1469
1470 /*************************************************
1471 * Compile a single pattern *
1472 *************************************************/
1473
1474 /* When the -F option has been used, this is called for each substring.
1475 Otherwise it's called for each supplied pattern.
1476
1477 Arguments:
1478 pattern the pattern string
1479 options the PCRE options
1480 filename the file name, or NULL for a command-line pattern
1481 count 0 if this is the only command line pattern, or
1482 number of the command line pattern, or
1483 linenumber for a pattern from a file
1484
1485 Returns: TRUE on success, FALSE after an error
1486 */
1487
1488 static BOOL
1489 compile_single_pattern(char *pattern, int options, char *filename, int count)
1490 {
1491 char buffer[MBUFTHIRD + 16];
1492 const char *error;
1493 int errptr;
1494
1495 if (pattern_count >= MAX_PATTERN_COUNT)
1496 {
1497 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1498 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1499 return FALSE;
1500 }
1501
1502 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1503 suffix[process_options]);
1504 pattern_list[pattern_count] =
1505 pcre_compile(buffer, options, &error, &errptr, pcretables);
1506 if (pattern_list[pattern_count] != NULL)
1507 {
1508 pattern_count++;
1509 return TRUE;
1510 }
1511
1512 /* Handle compile errors */
1513
1514 errptr -= (int)strlen(prefix[process_options]);
1515 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1516
1517 if (filename == NULL)
1518 {
1519 if (count == 0)
1520 fprintf(stderr, "pcregrep: Error in command-line regex "
1521 "at offset %d: %s\n", errptr, error);
1522 else
1523 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1524 "at offset %d: %s\n", ordin(count), errptr, error);
1525 }
1526 else
1527 {
1528 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1529 "at offset %d: %s\n", count, filename, errptr, error);
1530 }
1531
1532 return FALSE;
1533 }
1534
1535
1536
1537 /*************************************************
1538 * Compile one supplied pattern *
1539 *************************************************/
1540
1541 /* When the -F option has been used, each string may be a list of strings,
1542 separated by line breaks. They will be matched literally.
1543
1544 Arguments:
1545 pattern the pattern string
1546 options the PCRE options
1547 filename the file name, or NULL for a command-line pattern
1548 count 0 if this is the only command line pattern, or
1549 number of the command line pattern, or
1550 linenumber for a pattern from a file
1551
1552 Returns: TRUE on success, FALSE after an error
1553 */
1554
1555 static BOOL
1556 compile_pattern(char *pattern, int options, char *filename, int count)
1557 {
1558 if ((process_options & PO_FIXED_STRINGS) != 0)
1559 {
1560 char *eop = pattern + strlen(pattern);
1561 char buffer[MBUFTHIRD];
1562 for(;;)
1563 {
1564 int ellength;
1565 char *p = end_of_line(pattern, eop, &ellength);
1566 if (ellength == 0)
1567 return compile_single_pattern(pattern, options, filename, count);
1568 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1569 pattern = p;
1570 if (!compile_single_pattern(buffer, options, filename, count))
1571 return FALSE;
1572 }
1573 }
1574 else return compile_single_pattern(pattern, options, filename, count);
1575 }
1576
1577
1578
1579 /*************************************************
1580 * Main program *
1581 *************************************************/
1582
1583 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1584
1585 int
1586 main(int argc, char **argv)
1587 {
1588 int i, j;
1589 int rc = 1;
1590 int pcre_options = 0;
1591 int cmd_pattern_count = 0;
1592 int hint_count = 0;
1593 int errptr;
1594 BOOL only_one_at_top;
1595 char *patterns[MAX_PATTERN_COUNT];
1596 const char *locale_from = "--locale";
1597 const char *error;
1598
1599 /* Set the default line ending value from the default in the PCRE library;
1600 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1601 */
1602
1603 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1604 switch(i)
1605 {
1606 default: newline = (char *)"lf"; break;
1607 case '\r': newline = (char *)"cr"; break;
1608 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1609 case -1: newline = (char *)"any"; break;
1610 case -2: newline = (char *)"anycrlf"; break;
1611 }
1612
1613 /* Process the options */
1614
1615 for (i = 1; i < argc; i++)
1616 {
1617 option_item *op = NULL;
1618 char *option_data = (char *)""; /* default to keep compiler happy */
1619 BOOL longop;
1620 BOOL longopwasequals = FALSE;
1621
1622 if (argv[i][0] != '-') break;
1623
1624 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1625 but only if we have previously had -e or -f to define the patterns. */
1626
1627 if (argv[i][1] == 0)
1628 {
1629 if (pattern_filename != NULL || pattern_count > 0) break;
1630 else exit(usage(2));
1631 }
1632
1633 /* Handle a long name option, or -- to terminate the options */
1634
1635 if (argv[i][1] == '-')
1636 {
1637 char *arg = argv[i] + 2;
1638 char *argequals = strchr(arg, '=');
1639
1640 if (*arg == 0) /* -- terminates options */
1641 {
1642 i++;
1643 break; /* out of the options-handling loop */
1644 }
1645
1646 longop = TRUE;
1647
1648 /* Some long options have data that follows after =, for example file=name.
1649 Some options have variations in the long name spelling: specifically, we
1650 allow "regexp" because GNU grep allows it, though I personally go along
1651 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1652 These options are entered in the table as "regex(p)". No option is in both
1653 these categories, fortunately. */
1654
1655 for (op = optionlist; op->one_char != 0; op++)
1656 {
1657 char *opbra = strchr(op->long_name, '(');
1658 char *equals = strchr(op->long_name, '=');
1659 if (opbra == NULL) /* Not a (p) case */
1660 {
1661 if (equals == NULL) /* Not thing=data case */
1662 {
1663 if (strcmp(arg, op->long_name) == 0) break;
1664 }
1665 else /* Special case xxx=data */
1666 {
1667 int oplen = equals - op->long_name;
1668 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1669 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1670 {
1671 option_data = arg + arglen;
1672 if (*option_data == '=')
1673 {
1674 option_data++;
1675 longopwasequals = TRUE;
1676 }
1677 break;
1678 }
1679 }
1680 }
1681 else /* Special case xxxx(p) */
1682 {
1683 char buff1[24];
1684 char buff2[24];
1685 int baselen = opbra - op->long_name;
1686 sprintf(buff1, "%.*s", baselen, op->long_name);
1687 sprintf(buff2, "%s%.*s", buff1,
1688 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1689 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1690 break;
1691 }
1692 }
1693
1694 if (op->one_char == 0)
1695 {
1696 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1697 exit(usage(2));
1698 }
1699 }
1700
1701
1702 /* Jeffrey Friedl's debugging harness uses these additional options which
1703 are not in the right form for putting in the option table because they use
1704 only one hyphen, yet are more than one character long. By putting them
1705 separately here, they will not get displayed as part of the help() output,
1706 but I don't think Jeffrey will care about that. */
1707
1708 #ifdef JFRIEDL_DEBUG
1709 else if (strcmp(argv[i], "-pre") == 0) {
1710 jfriedl_prefix = argv[++i];
1711 continue;
1712 } else if (strcmp(argv[i], "-post") == 0) {
1713 jfriedl_postfix = argv[++i];
1714 continue;
1715 } else if (strcmp(argv[i], "-XT") == 0) {
1716 sscanf(argv[++i], "%d", &jfriedl_XT);
1717 continue;
1718 } else if (strcmp(argv[i], "-XR") == 0) {
1719 sscanf(argv[++i], "%d", &jfriedl_XR);
1720 continue;
1721 }
1722 #endif
1723
1724
1725 /* One-char options; many that have no data may be in a single argument; we
1726 continue till we hit the last one or one that needs data. */
1727
1728 else
1729 {
1730 char *s = argv[i] + 1;
1731 longop = FALSE;
1732 while (*s != 0)
1733 {
1734 for (op = optionlist; op->one_char != 0; op++)
1735 { if (*s == op->one_char) break; }
1736 if (op->one_char == 0)
1737 {
1738 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1739 *s, argv[i]);
1740 exit(usage(2));
1741 }
1742 if (op->type != OP_NODATA || s[1] == 0)
1743 {
1744 option_data = s+1;
1745 break;
1746 }
1747 pcre_options = handle_option(*s++, pcre_options);
1748 }
1749 }
1750
1751 /* At this point we should have op pointing to a matched option. If the type
1752 is NO_DATA, it means that there is no data, and the option might set
1753 something in the PCRE options. */
1754
1755 if (op->type == OP_NODATA)
1756 {
1757 pcre_options = handle_option(op->one_char, pcre_options);
1758 continue;
1759 }
1760
1761 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1762 either has a value or defaults to something. It cannot have data in a
1763 separate item. At the moment, the only such options are "colo(u)r" and
1764 Jeffrey Friedl's special -S debugging option. */
1765
1766 if (*option_data == 0 &&
1767 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1768 {
1769 switch (op->one_char)
1770 {
1771 case N_COLOUR:
1772 colour_option = (char *)"auto";
1773 break;
1774 #ifdef JFRIEDL_DEBUG
1775 case 'S':
1776 S_arg = 0;
1777 break;
1778 #endif
1779 }
1780 continue;
1781 }
1782
1783 /* Otherwise, find the data string for the option. */
1784
1785 if (*option_data == 0)
1786 {
1787 if (i >= argc - 1 || longopwasequals)
1788 {
1789 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1790 exit(usage(2));
1791 }
1792 option_data = argv[++i];
1793 }
1794
1795 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1796 multiple times to create a list of patterns. */
1797
1798 if (op->type == OP_PATLIST)
1799 {
1800 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1801 {
1802 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1803 MAX_PATTERN_COUNT);
1804 return 2;
1805 }
1806 patterns[cmd_pattern_count++] = option_data;
1807 }
1808
1809 /* Otherwise, deal with single string or numeric data values. */
1810
1811 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1812 {
1813 *((char **)op->dataptr) = option_data;
1814 }
1815 else
1816 {
1817 char *endptr;
1818 int n = strtoul(option_data, &endptr, 10);
1819 if (*endptr != 0)
1820 {
1821 if (longop)
1822 {
1823 char *equals = strchr(op->long_name, '=');
1824 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1825 equals - op->long_name;
1826 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1827 option_data, nlen, op->long_name);
1828 }
1829 else
1830 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1831 option_data, op->one_char);
1832 exit(usage(2));
1833 }
1834 *((int *)op->dataptr) = n;
1835 }
1836 }
1837
1838 /* Options have been decoded. If -C was used, its value is used as a default
1839 for -A and -B. */
1840
1841 if (both_context > 0)
1842 {
1843 if (after_context == 0) after_context = both_context;
1844 if (before_context == 0) before_context = both_context;
1845 }
1846
1847 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1848 LC_ALL environment variable is set, and if so, use it. */
1849
1850 if (locale == NULL)
1851 {
1852 locale = getenv("LC_ALL");
1853 locale_from = "LCC_ALL";
1854 }
1855
1856 if (locale == NULL)
1857 {
1858 locale = getenv("LC_CTYPE");
1859 locale_from = "LC_CTYPE";
1860 }
1861
1862 /* If a locale has been provided, set it, and generate the tables the PCRE
1863 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1864
1865 if (locale != NULL)
1866 {
1867 if (setlocale(LC_CTYPE, locale) == NULL)
1868 {
1869 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1870 locale, locale_from);
1871 return 2;
1872 }
1873 pcretables = pcre_maketables();
1874 }
1875
1876 /* Sort out colouring */
1877
1878 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1879 {
1880 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1881 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1882 else
1883 {
1884 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1885 colour_option);
1886 return 2;
1887 }
1888 if (do_colour)
1889 {
1890 char *cs = getenv("PCREGREP_COLOUR");
1891 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1892 if (cs != NULL) colour_string = cs;
1893 }
1894 }
1895
1896 /* Interpret the newline type; the default settings are Unix-like. */
1897
1898 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1899 {
1900 pcre_options |= PCRE_NEWLINE_CR;
1901 endlinetype = EL_CR;
1902 }
1903 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1904 {
1905 pcre_options |= PCRE_NEWLINE_LF;
1906 endlinetype = EL_LF;
1907 }
1908 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1909 {
1910 pcre_options |= PCRE_NEWLINE_CRLF;
1911 endlinetype = EL_CRLF;
1912 }
1913 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1914 {
1915 pcre_options |= PCRE_NEWLINE_ANY;
1916 endlinetype = EL_ANY;
1917 }
1918 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1919 {
1920 pcre_options |= PCRE_NEWLINE_ANYCRLF;
1921 endlinetype = EL_ANYCRLF;
1922 }
1923 else
1924 {
1925 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1926 return 2;
1927 }
1928
1929 /* Interpret the text values for -d and -D */
1930
1931 if (dee_option != NULL)
1932 {
1933 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1934 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1935 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1936 else
1937 {
1938 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1939 return 2;
1940 }
1941 }
1942
1943 if (DEE_option != NULL)
1944 {
1945 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1946 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1947 else
1948 {
1949 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1950 return 2;
1951 }
1952 }
1953
1954 /* Check the values for Jeffrey Friedl's debugging options. */
1955
1956 #ifdef JFRIEDL_DEBUG
1957 if (S_arg > 9)
1958 {
1959 fprintf(stderr, "pcregrep: bad value for -S option\n");
1960 return 2;
1961 }
1962 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1963 {
1964 if (jfriedl_XT == 0) jfriedl_XT = 1;
1965 if (jfriedl_XR == 0) jfriedl_XR = 1;
1966 }
1967 #endif
1968
1969 /* Get memory to store the pattern and hints lists. */
1970
1971 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1972 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1973
1974 if (pattern_list == NULL || hints_list == NULL)
1975 {
1976 fprintf(stderr, "pcregrep: malloc failed\n");
1977 goto EXIT2;
1978 }
1979
1980 /* If no patterns were provided by -e, and there is no file provided by -f,
1981 the first argument is the one and only pattern, and it must exist. */
1982
1983 if (cmd_pattern_count == 0 && pattern_filename == NULL)
1984 {
1985 if (i >= argc) return usage(2);
1986 patterns[cmd_pattern_count++] = argv[i++];
1987 }
1988
1989 /* Compile the patterns that were provided on the command line, either by
1990 multiple uses of -e or as a single unkeyed pattern. */
1991
1992 for (j = 0; j < cmd_pattern_count; j++)
1993 {
1994 if (!compile_pattern(patterns[j], pcre_options, NULL,
1995 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1996 goto EXIT2;
1997 }
1998
1999 /* Compile the regular expressions that are provided in a file. */
2000
2001 if (pattern_filename != NULL)
2002 {
2003 int linenumber = 0;
2004 FILE *f;
2005 char *filename;
2006 char buffer[MBUFTHIRD];
2007
2008 if (strcmp(pattern_filename, "-") == 0)
2009 {
2010 f = stdin;
2011 filename = stdin_name;
2012 }
2013 else
2014 {
2015 f = fopen(pattern_filename, "r");
2016 if (f == NULL)
2017 {
2018 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2019 strerror(errno));
2020 goto EXIT2;
2021 }
2022 filename = pattern_filename;
2023 }
2024
2025 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2026 {
2027 char *s = buffer + (int)strlen(buffer);
2028 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2029 *s = 0;
2030 linenumber++;
2031 if (buffer[0] == 0) continue; /* Skip blank lines */
2032 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2033 goto EXIT2;
2034 }
2035
2036 if (f != stdin) fclose(f);
2037 }
2038
2039 /* Study the regular expressions, as we will be running them many times */
2040
2041 for (j = 0; j < pattern_count; j++)
2042 {
2043 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2044 if (error != NULL)
2045 {
2046 char s[16];
2047 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2048 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2049 goto EXIT2;
2050 }
2051 hint_count++;
2052 }
2053
2054 /* If there are include or exclude patterns, compile them. */
2055
2056 if (exclude_pattern != NULL)
2057 {
2058 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2059 pcretables);
2060 if (exclude_compiled == NULL)
2061 {
2062 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2063 errptr, error);
2064 goto EXIT2;
2065 }
2066 }
2067
2068 if (include_pattern != NULL)
2069 {
2070 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2071 pcretables);
2072 if (include_compiled == NULL)
2073 {
2074 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2075 errptr, error);
2076 goto EXIT2;
2077 }
2078 }
2079
2080 /* If there are no further arguments, do the business on stdin and exit. */
2081
2082 if (i >= argc)
2083 {
2084 rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2085 goto EXIT;
2086 }
2087
2088 /* Otherwise, work through the remaining arguments as files or directories.
2089 Pass in the fact that there is only one argument at top level - this suppresses
2090 the file name if the argument is not a directory and filenames are not
2091 otherwise forced. */
2092
2093 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2094
2095 for (; i < argc; i++)
2096 {
2097 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2098 only_one_at_top);
2099 if (frc > 1) rc = frc;
2100 else if (frc == 0 && rc == 1) rc = 0;
2101 }
2102
2103 EXIT:
2104 if (pattern_list != NULL)
2105 {
2106 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2107 free(pattern_list);
2108 }
2109 if (hints_list != NULL)
2110 {
2111 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2112 free(hints_list);
2113 }
2114 return rc;
2115
2116 EXIT2:
2117 rc = 2;
2118 goto EXIT;
2119 }
2120
2121 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12