/[pcre]/code/tags/pcre-7.1/pcregrep.c
ViewVC logotype

Contents of /code/tags/pcre-7.1/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 142 - (show annotations) (download)
Fri Mar 30 15:55:18 2007 UTC (7 years, 3 months ago) by ph10
Original Path: code/trunk/pcregrep.c
File MIME type: text/plain
File size: 57516 byte(s)
Trailing spaces.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 # include <config.h>
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53 #ifdef HAVE_UNISTD_H
54 # include <unistd.h>
55 #endif
56
57 #include <pcre.h>
58
59 #define FALSE 0
60 #define TRUE 1
61
62 typedef int BOOL;
63
64 #define MAX_PATTERN_COUNT 100
65
66 #if BUFSIZ > 8192
67 #define MBUFTHIRD BUFSIZ
68 #else
69 #define MBUFTHIRD 8192
70 #endif
71
72 /* Values for the "filenames" variable, which specifies options for file name
73 output. The order is important; it is assumed that a file name is wanted for
74 all values greater than FN_DEFAULT. */
75
76 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
77
78 /* Actions for the -d and -D options */
79
80 enum { dee_READ, dee_SKIP, dee_RECURSE };
81 enum { DEE_READ, DEE_SKIP };
82
83 /* Actions for special processing options (flag bits) */
84
85 #define PO_WORD_MATCH 0x0001
86 #define PO_LINE_MATCH 0x0002
87 #define PO_FIXED_STRINGS 0x0004
88
89 /* Line ending types */
90
91 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };
92
93
94
95 /*************************************************
96 * Global variables *
97 *************************************************/
98
99 /* Jeffrey Friedl has some debugging requirements that are not part of the
100 regular code. */
101
102 #ifdef JFRIEDL_DEBUG
103 static int S_arg = -1;
104 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
105 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
106 static const char *jfriedl_prefix = "";
107 static const char *jfriedl_postfix = "";
108 #endif
109
110 static int endlinetype;
111
112 static char *colour_string = (char *)"1;31";
113 static char *colour_option = NULL;
114 static char *dee_option = NULL;
115 static char *DEE_option = NULL;
116 static char *newline = NULL;
117 static char *pattern_filename = NULL;
118 static char *stdin_name = (char *)"(standard input)";
119 static char *locale = NULL;
120
121 static const unsigned char *pcretables = NULL;
122
123 static int pattern_count = 0;
124 static pcre **pattern_list = NULL;
125 static pcre_extra **hints_list = NULL;
126
127 static char *include_pattern = NULL;
128 static char *exclude_pattern = NULL;
129
130 static pcre *include_compiled = NULL;
131 static pcre *exclude_compiled = NULL;
132
133 static int after_context = 0;
134 static int before_context = 0;
135 static int both_context = 0;
136 static int dee_action = dee_READ;
137 static int DEE_action = DEE_READ;
138 static int error_count = 0;
139 static int filenames = FN_DEFAULT;
140 static int process_options = 0;
141
142 static BOOL count_only = FALSE;
143 static BOOL do_colour = FALSE;
144 static BOOL hyphenpending = FALSE;
145 static BOOL invert = FALSE;
146 static BOOL multiline = FALSE;
147 static BOOL number = FALSE;
148 static BOOL only_matching = FALSE;
149 static BOOL quiet = FALSE;
150 static BOOL silent = FALSE;
151 static BOOL utf8 = FALSE;
152
153 /* Structure for options and list of them */
154
155 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
156 OP_PATLIST };
157
158 typedef struct option_item {
159 int type;
160 int one_char;
161 void *dataptr;
162 const char *long_name;
163 const char *help_text;
164 } option_item;
165
166 /* Options without a single-letter equivalent get a negative value. This can be
167 used to identify them. */
168
169 #define N_COLOUR (-1)
170 #define N_EXCLUDE (-2)
171 #define N_HELP (-3)
172 #define N_INCLUDE (-4)
173 #define N_LABEL (-5)
174 #define N_LOCALE (-6)
175 #define N_NULL (-7)
176
177 static option_item optionlist[] = {
178 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
179 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
180 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
181 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
182 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
183 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
184 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
185 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
186 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
187 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
188 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
189 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
190 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
191 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
192 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
193 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
194 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
195 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
196 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
197 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
198 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
199 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LR, CRLF)" },
200 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
201 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
202 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
203 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
204 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
205 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
206 #ifdef JFRIEDL_DEBUG
207 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
208 #endif
209 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
210 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
211 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
212 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
213 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
214 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
215 { OP_NODATA, 0, NULL, NULL, NULL }
216 };
217
218 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
219 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
220 that the combination of -w and -x has the same effect as -x on its own, so we
221 can treat them as the same. */
222
223 static const char *prefix[] = {
224 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
225
226 static const char *suffix[] = {
227 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
228
229 /* UTF-8 tables - used only when the newline setting is "all". */
230
231 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
232
233 const char utf8_table4[] = {
234 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
235 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
237 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
238
239
240
241 /*************************************************
242 * OS-specific functions *
243 *************************************************/
244
245 /* These functions are defined so that they can be made system specific,
246 although at present the only ones are for Unix, Win32, and for "no support". */
247
248
249 /************* Directory scanning in Unix ***********/
250
251 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
252 #include <sys/types.h>
253 #include <sys/stat.h>
254 #include <dirent.h>
255
256 typedef DIR directory_type;
257
258 static int
259 isdirectory(char *filename)
260 {
261 struct stat statbuf;
262 if (stat(filename, &statbuf) < 0)
263 return 0; /* In the expectation that opening as a file will fail */
264 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
265 }
266
267 static directory_type *
268 opendirectory(char *filename)
269 {
270 return opendir(filename);
271 }
272
273 static char *
274 readdirectory(directory_type *dir)
275 {
276 for (;;)
277 {
278 struct dirent *dent = readdir(dir);
279 if (dent == NULL) return NULL;
280 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
281 return dent->d_name;
282 }
283 return NULL; /* Keep compiler happy; never executed */
284 }
285
286 static void
287 closedirectory(directory_type *dir)
288 {
289 closedir(dir);
290 }
291
292
293 /************* Test for regular file in Unix **********/
294
295 static int
296 isregfile(char *filename)
297 {
298 struct stat statbuf;
299 if (stat(filename, &statbuf) < 0)
300 return 1; /* In the expectation that opening as a file will fail */
301 return (statbuf.st_mode & S_IFMT) == S_IFREG;
302 }
303
304
305 /************* Test stdout for being a terminal in Unix **********/
306
307 static BOOL
308 is_stdout_tty(void)
309 {
310 return isatty(fileno(stdout));
311 }
312
313
314 /************* Directory scanning in Win32 ***********/
315
316 /* I (Philip Hazel) have no means of testing this code. It was contributed by
317 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
318 when it did not exist. */
319
320
321 #elif HAVE_WINDOWS_H
322
323 #ifndef STRICT
324 # define STRICT
325 #endif
326 #ifndef WIN32_LEAN_AND_MEAN
327 # define WIN32_LEAN_AND_MEAN
328 #endif
329 #ifndef INVALID_FILE_ATTRIBUTES
330 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
331 #endif
332
333 #include <windows.h>
334
335 typedef struct directory_type
336 {
337 HANDLE handle;
338 BOOL first;
339 WIN32_FIND_DATA data;
340 } directory_type;
341
342 int
343 isdirectory(char *filename)
344 {
345 DWORD attr = GetFileAttributes(filename);
346 if (attr == INVALID_FILE_ATTRIBUTES)
347 return 0;
348 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
349 }
350
351 directory_type *
352 opendirectory(char *filename)
353 {
354 size_t len;
355 char *pattern;
356 directory_type *dir;
357 DWORD err;
358 len = strlen(filename);
359 pattern = (char *) malloc(len + 3);
360 dir = (directory_type *) malloc(sizeof(*dir));
361 if ((pattern == NULL) || (dir == NULL))
362 {
363 fprintf(stderr, "pcregrep: malloc failed\n");
364 exit(2);
365 }
366 memcpy(pattern, filename, len);
367 memcpy(&(pattern[len]), "\\*", 3);
368 dir->handle = FindFirstFile(pattern, &(dir->data));
369 if (dir->handle != INVALID_HANDLE_VALUE)
370 {
371 free(pattern);
372 dir->first = TRUE;
373 return dir;
374 }
375 err = GetLastError();
376 free(pattern);
377 free(dir);
378 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
379 return NULL;
380 }
381
382 char *
383 readdirectory(directory_type *dir)
384 {
385 for (;;)
386 {
387 if (!dir->first)
388 {
389 if (!FindNextFile(dir->handle, &(dir->data)))
390 return NULL;
391 }
392 else
393 {
394 dir->first = FALSE;
395 }
396 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
397 return dir->data.cFileName;
398 }
399 #ifndef _MSC_VER
400 return NULL; /* Keep compiler happy; never executed */
401 #endif
402 }
403
404 void
405 closedirectory(directory_type *dir)
406 {
407 FindClose(dir->handle);
408 free(dir);
409 }
410
411
412 /************* Test for regular file in Win32 **********/
413
414 /* I don't know how to do this, or if it can be done; assume all paths are
415 regular if they are not directories. */
416
417 int isregfile(char *filename)
418 {
419 return !isdirectory(filename)
420 }
421
422
423 /************* Test stdout for being a terminal in Win32 **********/
424
425 /* I don't know how to do this; assume never */
426
427 static BOOL
428 is_stdout_tty(void)
429 {
430 FALSE;
431 }
432
433
434 /************* Directory scanning when we can't do it ***********/
435
436 /* The type is void, and apart from isdirectory(), the functions do nothing. */
437
438 #else
439
440 typedef void directory_type;
441
442 int isdirectory(char *filename) { return 0; }
443 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
444 char *readdirectory(directory_type *dir) { return (char*)0;}
445 void closedirectory(directory_type *dir) {}
446
447
448 /************* Test for regular when we can't do it **********/
449
450 /* Assume all files are regular. */
451
452 int isregfile(char *filename) { return 1; }
453
454
455 /************* Test stdout for being a terminal when we can't do it **********/
456
457 static BOOL
458 is_stdout_tty(void)
459 {
460 return FALSE;
461 }
462
463
464 #endif
465
466
467
468 #ifndef HAVE_STRERROR
469 /*************************************************
470 * Provide strerror() for non-ANSI libraries *
471 *************************************************/
472
473 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
474 in their libraries, but can provide the same facility by this simple
475 alternative function. */
476
477 extern int sys_nerr;
478 extern char *sys_errlist[];
479
480 char *
481 strerror(int n)
482 {
483 if (n < 0 || n >= sys_nerr) return "unknown error number";
484 return sys_errlist[n];
485 }
486 #endif /* HAVE_STRERROR */
487
488
489
490 /*************************************************
491 * Find end of line *
492 *************************************************/
493
494 /* The length of the endline sequence that is found is set via lenptr. This may
495 be zero at the very end of the file if there is no line-ending sequence there.
496
497 Arguments:
498 p current position in line
499 endptr end of available data
500 lenptr where to put the length of the eol sequence
501
502 Returns: pointer to the last byte of the line
503 */
504
505 static char *
506 end_of_line(char *p, char *endptr, int *lenptr)
507 {
508 switch(endlinetype)
509 {
510 default: /* Just in case */
511 case EL_LF:
512 while (p < endptr && *p != '\n') p++;
513 if (p < endptr)
514 {
515 *lenptr = 1;
516 return p + 1;
517 }
518 *lenptr = 0;
519 return endptr;
520
521 case EL_CR:
522 while (p < endptr && *p != '\r') p++;
523 if (p < endptr)
524 {
525 *lenptr = 1;
526 return p + 1;
527 }
528 *lenptr = 0;
529 return endptr;
530
531 case EL_CRLF:
532 for (;;)
533 {
534 while (p < endptr && *p != '\r') p++;
535 if (++p >= endptr)
536 {
537 *lenptr = 0;
538 return endptr;
539 }
540 if (*p == '\n')
541 {
542 *lenptr = 2;
543 return p + 1;
544 }
545 }
546 break;
547
548 case EL_ANY:
549 while (p < endptr)
550 {
551 int extra = 0;
552 register int c = *((unsigned char *)p);
553
554 if (utf8 && c >= 0xc0)
555 {
556 int gcii, gcss;
557 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
558 gcss = 6*extra;
559 c = (c & utf8_table3[extra]) << gcss;
560 for (gcii = 1; gcii <= extra; gcii++)
561 {
562 gcss -= 6;
563 c |= (p[gcii] & 0x3f) << gcss;
564 }
565 }
566
567 p += 1 + extra;
568
569 switch (c)
570 {
571 case 0x0a: /* LF */
572 case 0x0b: /* VT */
573 case 0x0c: /* FF */
574 *lenptr = 1;
575 return p;
576
577 case 0x0d: /* CR */
578 if (p < endptr && *p == 0x0a)
579 {
580 *lenptr = 2;
581 p++;
582 }
583 else *lenptr = 1;
584 return p;
585
586 case 0x85: /* NEL */
587 *lenptr = utf8? 2 : 1;
588 return p;
589
590 case 0x2028: /* LS */
591 case 0x2029: /* PS */
592 *lenptr = 3;
593 return p;
594
595 default:
596 break;
597 }
598 } /* End of loop for ANY case */
599
600 *lenptr = 0; /* Must have hit the end */
601 return endptr;
602 } /* End of overall switch */
603 }
604
605
606
607 /*************************************************
608 * Find start of previous line *
609 *************************************************/
610
611 /* This is called when looking back for before lines to print.
612
613 Arguments:
614 p start of the subsequent line
615 startptr start of available data
616
617 Returns: pointer to the start of the previous line
618 */
619
620 static char *
621 previous_line(char *p, char *startptr)
622 {
623 switch(endlinetype)
624 {
625 default: /* Just in case */
626 case EL_LF:
627 p--;
628 while (p > startptr && p[-1] != '\n') p--;
629 return p;
630
631 case EL_CR:
632 p--;
633 while (p > startptr && p[-1] != '\n') p--;
634 return p;
635
636 case EL_CRLF:
637 for (;;)
638 {
639 p -= 2;
640 while (p > startptr && p[-1] != '\n') p--;
641 if (p <= startptr + 1 || p[-2] == '\r') return p;
642 }
643 return p; /* But control should never get here */
644
645 case EL_ANY:
646 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
647 if (utf8) while ((*p & 0xc0) == 0x80) p--;
648
649 while (p > startptr)
650 {
651 register int c;
652 char *pp = p - 1;
653
654 if (utf8)
655 {
656 int extra = 0;
657 while ((*pp & 0xc0) == 0x80) pp--;
658 c = *((unsigned char *)pp);
659 if (c >= 0xc0)
660 {
661 int gcii, gcss;
662 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
663 gcss = 6*extra;
664 c = (c & utf8_table3[extra]) << gcss;
665 for (gcii = 1; gcii <= extra; gcii++)
666 {
667 gcss -= 6;
668 c |= (pp[gcii] & 0x3f) << gcss;
669 }
670 }
671 }
672 else c = *((unsigned char *)pp);
673
674 switch (c)
675 {
676 case 0x0a: /* LF */
677 case 0x0b: /* VT */
678 case 0x0c: /* FF */
679 case 0x0d: /* CR */
680 case 0x85: /* NEL */
681 case 0x2028: /* LS */
682 case 0x2029: /* PS */
683 return p;
684
685 default:
686 break;
687 }
688
689 p = pp; /* Back one character */
690 } /* End of loop for ANY case */
691
692 return startptr; /* Hit start of data */
693 } /* End of overall switch */
694 }
695
696
697
698
699
700 /*************************************************
701 * Print the previous "after" lines *
702 *************************************************/
703
704 /* This is called if we are about to lose said lines because of buffer filling,
705 and at the end of the file. The data in the line is written using fwrite() so
706 that a binary zero does not terminate it.
707
708 Arguments:
709 lastmatchnumber the number of the last matching line, plus one
710 lastmatchrestart where we restarted after the last match
711 endptr end of available data
712 printname filename for printing
713
714 Returns: nothing
715 */
716
717 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
718 char *endptr, char *printname)
719 {
720 if (after_context > 0 && lastmatchnumber > 0)
721 {
722 int count = 0;
723 while (lastmatchrestart < endptr && count++ < after_context)
724 {
725 int ellength;
726 char *pp = lastmatchrestart;
727 if (printname != NULL) fprintf(stdout, "%s-", printname);
728 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
729 pp = end_of_line(pp, endptr, &ellength);
730 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
731 lastmatchrestart = pp;
732 }
733 hyphenpending = TRUE;
734 }
735 }
736
737
738
739 /*************************************************
740 * Grep an individual file *
741 *************************************************/
742
743 /* This is called from grep_or_recurse() below. It uses a buffer that is three
744 times the value of MBUFTHIRD. The matching point is never allowed to stray into
745 the top third of the buffer, thus keeping more of the file available for
746 context printing or for multiline scanning. For large files, the pointer will
747 be in the middle third most of the time, so the bottom third is available for
748 "before" context printing.
749
750 Arguments:
751 in the fopened FILE stream
752 printname the file name if it is to be printed for each match
753 or NULL if the file name is not to be printed
754 it cannot be NULL if filenames[_nomatch]_only is set
755
756 Returns: 0 if there was at least one match
757 1 otherwise (no matches)
758 */
759
760 static int
761 pcregrep(FILE *in, char *printname)
762 {
763 int rc = 1;
764 int linenumber = 1;
765 int lastmatchnumber = 0;
766 int count = 0;
767 int offsets[99];
768 char *lastmatchrestart = NULL;
769 char buffer[3*MBUFTHIRD];
770 char *ptr = buffer;
771 char *endptr;
772 size_t bufflength;
773 BOOL endhyphenpending = FALSE;
774
775 /* Do the first read into the start of the buffer and set up the pointer to
776 end of what we have. */
777
778 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
779 endptr = buffer + bufflength;
780
781 /* Loop while the current pointer is not at the end of the file. For large
782 files, endptr will be at the end of the buffer when we are in the middle of the
783 file, but ptr will never get there, because as soon as it gets over 2/3 of the
784 way, the buffer is shifted left and re-filled. */
785
786 while (ptr < endptr)
787 {
788 int i, endlinelength;
789 int mrc = 0;
790 BOOL match = FALSE;
791 char *t = ptr;
792 size_t length, linelength;
793
794 /* At this point, ptr is at the start of a line. We need to find the length
795 of the subject string to pass to pcre_exec(). In multiline mode, it is the
796 length remainder of the data in the buffer. Otherwise, it is the length of
797 the next line. After matching, we always advance by the length of the next
798 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
799 that any match is constrained to be in the first line. */
800
801 t = end_of_line(t, endptr, &endlinelength);
802 linelength = t - ptr - endlinelength;
803 length = multiline? endptr - ptr : linelength;
804
805 /* Extra processing for Jeffrey Friedl's debugging. */
806
807 #ifdef JFRIEDL_DEBUG
808 if (jfriedl_XT || jfriedl_XR)
809 {
810 #include <sys/time.h>
811 #include <time.h>
812 struct timeval start_time, end_time;
813 struct timezone dummy;
814
815 if (jfriedl_XT)
816 {
817 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
818 const char *orig = ptr;
819 ptr = malloc(newlen + 1);
820 if (!ptr) {
821 printf("out of memory");
822 exit(2);
823 }
824 endptr = ptr;
825 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
826 for (i = 0; i < jfriedl_XT; i++) {
827 strncpy(endptr, orig, length);
828 endptr += length;
829 }
830 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
831 length = newlen;
832 }
833
834 if (gettimeofday(&start_time, &dummy) != 0)
835 perror("bad gettimeofday");
836
837
838 for (i = 0; i < jfriedl_XR; i++)
839 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
840
841 if (gettimeofday(&end_time, &dummy) != 0)
842 perror("bad gettimeofday");
843
844 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
845 -
846 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
847
848 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
849 return 0;
850 }
851 #endif
852
853
854 /* Run through all the patterns until one matches. Note that we don't include
855 the final newline in the subject string. */
856
857 for (i = 0; i < pattern_count; i++)
858 {
859 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
860 offsets, 99);
861 if (mrc >= 0) { match = TRUE; break; }
862 if (mrc != PCRE_ERROR_NOMATCH)
863 {
864 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
865 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
866 fprintf(stderr, "this line:\n");
867 fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
868 fprintf(stderr, "\n");
869 if (error_count == 0 &&
870 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
871 {
872 fprintf(stderr, "pcregrep: error %d means that a resource limit "
873 "was exceeded\n", mrc);
874 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
875 }
876 if (error_count++ > 20)
877 {
878 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
879 exit(2);
880 }
881 match = invert; /* No more matching; don't show the line again */
882 break;
883 }
884 }
885
886 /* If it's a match or a not-match (as required), do what's wanted. */
887
888 if (match != invert)
889 {
890 BOOL hyphenprinted = FALSE;
891
892 /* We've failed if we want a file that doesn't have any matches. */
893
894 if (filenames == FN_NOMATCH_ONLY) return 1;
895
896 /* Just count if just counting is wanted. */
897
898 if (count_only) count++;
899
900 /* If all we want is a file name, there is no need to scan any more lines
901 in the file. */
902
903 else if (filenames == FN_ONLY)
904 {
905 fprintf(stdout, "%s\n", printname);
906 return 0;
907 }
908
909 /* Likewise, if all we want is a yes/no answer. */
910
911 else if (quiet) return 0;
912
913 /* The --only-matching option prints just the substring that matched, and
914 does not pring any context. */
915
916 else if (only_matching)
917 {
918 if (printname != NULL) fprintf(stdout, "%s:", printname);
919 if (number) fprintf(stdout, "%d:", linenumber);
920 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
921 fprintf(stdout, "\n");
922 }
923
924 /* This is the default case when none of the above options is set. We print
925 the matching lines(s), possibly preceded and/or followed by other lines of
926 context. */
927
928 else
929 {
930 /* See if there is a requirement to print some "after" lines from a
931 previous match. We never print any overlaps. */
932
933 if (after_context > 0 && lastmatchnumber > 0)
934 {
935 int ellength;
936 int linecount = 0;
937 char *p = lastmatchrestart;
938
939 while (p < ptr && linecount < after_context)
940 {
941 p = end_of_line(p, ptr, &ellength);
942 linecount++;
943 }
944
945 /* It is important to advance lastmatchrestart during this printing so
946 that it interacts correctly with any "before" printing below. Print
947 each line's data using fwrite() in case there are binary zeroes. */
948
949 while (lastmatchrestart < p)
950 {
951 char *pp = lastmatchrestart;
952 if (printname != NULL) fprintf(stdout, "%s-", printname);
953 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
954 pp = end_of_line(pp, endptr, &ellength);
955 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
956 lastmatchrestart = pp;
957 }
958 if (lastmatchrestart != ptr) hyphenpending = TRUE;
959 }
960
961 /* If there were non-contiguous lines printed above, insert hyphens. */
962
963 if (hyphenpending)
964 {
965 fprintf(stdout, "--\n");
966 hyphenpending = FALSE;
967 hyphenprinted = TRUE;
968 }
969
970 /* See if there is a requirement to print some "before" lines for this
971 match. Again, don't print overlaps. */
972
973 if (before_context > 0)
974 {
975 int linecount = 0;
976 char *p = ptr;
977
978 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
979 linecount < before_context)
980 {
981 linecount++;
982 p = previous_line(p, buffer);
983 }
984
985 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
986 fprintf(stdout, "--\n");
987
988 while (p < ptr)
989 {
990 int ellength;
991 char *pp = p;
992 if (printname != NULL) fprintf(stdout, "%s-", printname);
993 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
994 pp = end_of_line(pp, endptr, &ellength);
995 fwrite(p, 1, pp - p, stdout);
996 p = pp;
997 }
998 }
999
1000 /* Now print the matching line(s); ensure we set hyphenpending at the end
1001 of the file if any context lines are being output. */
1002
1003 if (after_context > 0 || before_context > 0)
1004 endhyphenpending = TRUE;
1005
1006 if (printname != NULL) fprintf(stdout, "%s:", printname);
1007 if (number) fprintf(stdout, "%d:", linenumber);
1008
1009 /* In multiline mode, we want to print to the end of the line in which
1010 the end of the matched string is found, so we adjust linelength and the
1011 line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1012 start of the match will always be before the first newline sequence. */
1013
1014 if (multiline)
1015 {
1016 int ellength;
1017 char *endmatch = ptr + offsets[1];
1018 t = ptr;
1019 while (t < endmatch)
1020 {
1021 t = end_of_line(t, endptr, &ellength);
1022 if (t <= endmatch) linenumber++; else break;
1023 }
1024 endmatch = end_of_line(endmatch, endptr, &ellength);
1025 linelength = endmatch - ptr - ellength;
1026 }
1027
1028 /*** NOTE: Use only fwrite() to output the data line, so that binary
1029 zeroes are treated as just another data character. */
1030
1031 /* This extra option, for Jeffrey Friedl's debugging requirements,
1032 replaces the matched string, or a specific captured string if it exists,
1033 with X. When this happens, colouring is ignored. */
1034
1035 #ifdef JFRIEDL_DEBUG
1036 if (S_arg >= 0 && S_arg < mrc)
1037 {
1038 int first = S_arg * 2;
1039 int last = first + 1;
1040 fwrite(ptr, 1, offsets[first], stdout);
1041 fprintf(stdout, "X");
1042 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1043 }
1044 else
1045 #endif
1046
1047 /* We have to split the line(s) up if colouring. */
1048
1049 if (do_colour)
1050 {
1051 fwrite(ptr, 1, offsets[0], stdout);
1052 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1053 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1054 fprintf(stdout, "%c[00m", 0x1b);
1055 fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1056 }
1057 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1058 }
1059
1060 /* End of doing what has to be done for a match */
1061
1062 rc = 0; /* Had some success */
1063
1064 /* Remember where the last match happened for after_context. We remember
1065 where we are about to restart, and that line's number. */
1066
1067 lastmatchrestart = ptr + linelength + endlinelength;
1068 lastmatchnumber = linenumber + 1;
1069 }
1070
1071 /* Advance to after the newline and increment the line number. */
1072
1073 ptr += linelength + endlinelength;
1074 linenumber++;
1075
1076 /* If we haven't yet reached the end of the file (the buffer is full), and
1077 the current point is in the top 1/3 of the buffer, slide the buffer down by
1078 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1079 about to be lost, print them. */
1080
1081 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1082 {
1083 if (after_context > 0 &&
1084 lastmatchnumber > 0 &&
1085 lastmatchrestart < buffer + MBUFTHIRD)
1086 {
1087 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1088 lastmatchnumber = 0;
1089 }
1090
1091 /* Now do the shuffle */
1092
1093 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1094 ptr -= MBUFTHIRD;
1095 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1096 endptr = buffer + bufflength;
1097
1098 /* Adjust any last match point */
1099
1100 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1101 }
1102 } /* Loop through the whole file */
1103
1104 /* End of file; print final "after" lines if wanted; do_after_lines sets
1105 hyphenpending if it prints something. */
1106
1107 if (!only_matching && !count_only)
1108 {
1109 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1110 hyphenpending |= endhyphenpending;
1111 }
1112
1113 /* Print the file name if we are looking for those without matches and there
1114 were none. If we found a match, we won't have got this far. */
1115
1116 if (filenames == FN_NOMATCH_ONLY)
1117 {
1118 fprintf(stdout, "%s\n", printname);
1119 return 0;
1120 }
1121
1122 /* Print the match count if wanted */
1123
1124 if (count_only)
1125 {
1126 if (printname != NULL) fprintf(stdout, "%s:", printname);
1127 fprintf(stdout, "%d\n", count);
1128 }
1129
1130 return rc;
1131 }
1132
1133
1134
1135 /*************************************************
1136 * Grep a file or recurse into a directory *
1137 *************************************************/
1138
1139 /* Given a path name, if it's a directory, scan all the files if we are
1140 recursing; if it's a file, grep it.
1141
1142 Arguments:
1143 pathname the path to investigate
1144 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1145 only_one_at_top TRUE if the path is the only one at toplevel
1146
1147 Returns: 0 if there was at least one match
1148 1 if there were no matches
1149 2 there was some kind of error
1150
1151 However, file opening failures are suppressed if "silent" is set.
1152 */
1153
1154 static int
1155 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1156 {
1157 int rc = 1;
1158 int sep;
1159 FILE *in;
1160
1161 /* If the file name is "-" we scan stdin */
1162
1163 if (strcmp(pathname, "-") == 0)
1164 {
1165 return pcregrep(stdin,
1166 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1167 stdin_name : NULL);
1168 }
1169
1170
1171 /* If the file is a directory, skip if skipping or if we are recursing, scan
1172 each file within it, subject to any include or exclude patterns that were set.
1173 The scanning code is localized so it can be made system-specific. */
1174
1175 if ((sep = isdirectory(pathname)) != 0)
1176 {
1177 if (dee_action == dee_SKIP) return 1;
1178 if (dee_action == dee_RECURSE)
1179 {
1180 char buffer[1024];
1181 char *nextfile;
1182 directory_type *dir = opendirectory(pathname);
1183
1184 if (dir == NULL)
1185 {
1186 if (!silent)
1187 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1188 strerror(errno));
1189 return 2;
1190 }
1191
1192 while ((nextfile = readdirectory(dir)) != NULL)
1193 {
1194 int frc, blen;
1195 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1196 blen = strlen(buffer);
1197
1198 if (exclude_compiled != NULL &&
1199 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1200 continue;
1201
1202 if (include_compiled != NULL &&
1203 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1204 continue;
1205
1206 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1207 if (frc > 1) rc = frc;
1208 else if (frc == 0 && rc == 1) rc = 0;
1209 }
1210
1211 closedirectory(dir);
1212 return rc;
1213 }
1214 }
1215
1216 /* If the file is not a directory and not a regular file, skip it if that's
1217 been requested. */
1218
1219 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1220
1221 /* Control reaches here if we have a regular file, or if we have a directory
1222 and recursion or skipping was not requested, or if we have anything else and
1223 skipping was not requested. The scan proceeds. If this is the first and only
1224 argument at top level, we don't show the file name, unless we are only showing
1225 the file name, or the filename was forced (-H). */
1226
1227 in = fopen(pathname, "r");
1228 if (in == NULL)
1229 {
1230 if (!silent)
1231 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1232 strerror(errno));
1233 return 2;
1234 }
1235
1236 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1237 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1238
1239 fclose(in);
1240 return rc;
1241 }
1242
1243
1244
1245
1246 /*************************************************
1247 * Usage function *
1248 *************************************************/
1249
1250 static int
1251 usage(int rc)
1252 {
1253 option_item *op;
1254 fprintf(stderr, "Usage: pcregrep [-");
1255 for (op = optionlist; op->one_char != 0; op++)
1256 {
1257 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1258 }
1259 fprintf(stderr, "] [long options] [pattern] [files]\n");
1260 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1261 return rc;
1262 }
1263
1264
1265
1266
1267 /*************************************************
1268 * Help function *
1269 *************************************************/
1270
1271 static void
1272 help(void)
1273 {
1274 option_item *op;
1275
1276 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1277 printf("Search for PATTERN in each FILE or standard input.\n");
1278 printf("PATTERN must be present if neither -e nor -f is used.\n");
1279 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1280 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1281
1282 printf("Options:\n");
1283
1284 for (op = optionlist; op->one_char != 0; op++)
1285 {
1286 int n;
1287 char s[4];
1288 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1289 printf(" %s --%s%n", s, op->long_name, &n);
1290 n = 30 - n;
1291 if (n < 1) n = 1;
1292 printf("%.*s%s\n", n, " ", op->help_text);
1293 }
1294
1295 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1296 printf("trailing white space is removed and blank lines are ignored.\n");
1297 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1298
1299 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1300 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1301 }
1302
1303
1304
1305
1306 /*************************************************
1307 * Handle a single-letter, no data option *
1308 *************************************************/
1309
1310 static int
1311 handle_option(int letter, int options)
1312 {
1313 switch(letter)
1314 {
1315 case N_HELP: help(); exit(0);
1316 case 'c': count_only = TRUE; break;
1317 case 'F': process_options |= PO_FIXED_STRINGS; break;
1318 case 'H': filenames = FN_FORCE; break;
1319 case 'h': filenames = FN_NONE; break;
1320 case 'i': options |= PCRE_CASELESS; break;
1321 case 'l': filenames = FN_ONLY; break;
1322 case 'L': filenames = FN_NOMATCH_ONLY; break;
1323 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1324 case 'n': number = TRUE; break;
1325 case 'o': only_matching = TRUE; break;
1326 case 'q': quiet = TRUE; break;
1327 case 'r': dee_action = dee_RECURSE; break;
1328 case 's': silent = TRUE; break;
1329 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1330 case 'v': invert = TRUE; break;
1331 case 'w': process_options |= PO_WORD_MATCH; break;
1332 case 'x': process_options |= PO_LINE_MATCH; break;
1333
1334 case 'V':
1335 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1336 exit(0);
1337 break;
1338
1339 default:
1340 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1341 exit(usage(2));
1342 }
1343
1344 return options;
1345 }
1346
1347
1348
1349
1350 /*************************************************
1351 * Construct printed ordinal *
1352 *************************************************/
1353
1354 /* This turns a number into "1st", "3rd", etc. */
1355
1356 static char *
1357 ordin(int n)
1358 {
1359 static char buffer[8];
1360 char *p = buffer;
1361 sprintf(p, "%d", n);
1362 while (*p != 0) p++;
1363 switch (n%10)
1364 {
1365 case 1: strcpy(p, "st"); break;
1366 case 2: strcpy(p, "nd"); break;
1367 case 3: strcpy(p, "rd"); break;
1368 default: strcpy(p, "th"); break;
1369 }
1370 return buffer;
1371 }
1372
1373
1374
1375 /*************************************************
1376 * Compile a single pattern *
1377 *************************************************/
1378
1379 /* When the -F option has been used, this is called for each substring.
1380 Otherwise it's called for each supplied pattern.
1381
1382 Arguments:
1383 pattern the pattern string
1384 options the PCRE options
1385 filename the file name, or NULL for a command-line pattern
1386 count 0 if this is the only command line pattern, or
1387 number of the command line pattern, or
1388 linenumber for a pattern from a file
1389
1390 Returns: TRUE on success, FALSE after an error
1391 */
1392
1393 static BOOL
1394 compile_single_pattern(char *pattern, int options, char *filename, int count)
1395 {
1396 char buffer[MBUFTHIRD + 16];
1397 const char *error;
1398 int errptr;
1399
1400 if (pattern_count >= MAX_PATTERN_COUNT)
1401 {
1402 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1403 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1404 return FALSE;
1405 }
1406
1407 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1408 suffix[process_options]);
1409 pattern_list[pattern_count] =
1410 pcre_compile(buffer, options, &error, &errptr, pcretables);
1411 if (pattern_list[pattern_count] != NULL)
1412 {
1413 pattern_count++;
1414 return TRUE;
1415 }
1416
1417 /* Handle compile errors */
1418
1419 errptr -= (int)strlen(prefix[process_options]);
1420 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1421
1422 if (filename == NULL)
1423 {
1424 if (count == 0)
1425 fprintf(stderr, "pcregrep: Error in command-line regex "
1426 "at offset %d: %s\n", errptr, error);
1427 else
1428 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1429 "at offset %d: %s\n", ordin(count), errptr, error);
1430 }
1431 else
1432 {
1433 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1434 "at offset %d: %s\n", count, filename, errptr, error);
1435 }
1436
1437 return FALSE;
1438 }
1439
1440
1441
1442 /*************************************************
1443 * Compile one supplied pattern *
1444 *************************************************/
1445
1446 /* When the -F option has been used, each string may be a list of strings,
1447 separated by line breaks. They will be matched literally.
1448
1449 Arguments:
1450 pattern the pattern string
1451 options the PCRE options
1452 filename the file name, or NULL for a command-line pattern
1453 count 0 if this is the only command line pattern, or
1454 number of the command line pattern, or
1455 linenumber for a pattern from a file
1456
1457 Returns: TRUE on success, FALSE after an error
1458 */
1459
1460 static BOOL
1461 compile_pattern(char *pattern, int options, char *filename, int count)
1462 {
1463 if ((process_options & PO_FIXED_STRINGS) != 0)
1464 {
1465 char *eop = pattern + strlen(pattern);
1466 char buffer[MBUFTHIRD];
1467 for(;;)
1468 {
1469 int ellength;
1470 char *p = end_of_line(pattern, eop, &ellength);
1471 if (ellength == 0)
1472 return compile_single_pattern(pattern, options, filename, count);
1473 sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1474 pattern = p;
1475 if (!compile_single_pattern(buffer, options, filename, count))
1476 return FALSE;
1477 }
1478 }
1479 else return compile_single_pattern(pattern, options, filename, count);
1480 }
1481
1482
1483
1484 /*************************************************
1485 * Main program *
1486 *************************************************/
1487
1488 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1489
1490 int
1491 main(int argc, char **argv)
1492 {
1493 int i, j;
1494 int rc = 1;
1495 int pcre_options = 0;
1496 int cmd_pattern_count = 0;
1497 int hint_count = 0;
1498 int errptr;
1499 BOOL only_one_at_top;
1500 char *patterns[MAX_PATTERN_COUNT];
1501 const char *locale_from = "--locale";
1502 const char *error;
1503
1504 /* Set the default line ending value from the default in the PCRE library;
1505 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1506 */
1507
1508 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1509 switch(i)
1510 {
1511 default: newline = (char *)"lf"; break;
1512 case '\r': newline = (char *)"cr"; break;
1513 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1514 case -1: newline = (char *)"any"; break;
1515 }
1516
1517 /* Process the options */
1518
1519 for (i = 1; i < argc; i++)
1520 {
1521 option_item *op = NULL;
1522 char *option_data = (char *)""; /* default to keep compiler happy */
1523 BOOL longop;
1524 BOOL longopwasequals = FALSE;
1525
1526 if (argv[i][0] != '-') break;
1527
1528 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1529 but only if we have previously had -e or -f to define the patterns. */
1530
1531 if (argv[i][1] == 0)
1532 {
1533 if (pattern_filename != NULL || pattern_count > 0) break;
1534 else exit(usage(2));
1535 }
1536
1537 /* Handle a long name option, or -- to terminate the options */
1538
1539 if (argv[i][1] == '-')
1540 {
1541 char *arg = argv[i] + 2;
1542 char *argequals = strchr(arg, '=');
1543
1544 if (*arg == 0) /* -- terminates options */
1545 {
1546 i++;
1547 break; /* out of the options-handling loop */
1548 }
1549
1550 longop = TRUE;
1551
1552 /* Some long options have data that follows after =, for example file=name.
1553 Some options have variations in the long name spelling: specifically, we
1554 allow "regexp" because GNU grep allows it, though I personally go along
1555 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1556 These options are entered in the table as "regex(p)". No option is in both
1557 these categories, fortunately. */
1558
1559 for (op = optionlist; op->one_char != 0; op++)
1560 {
1561 char *opbra = strchr(op->long_name, '(');
1562 char *equals = strchr(op->long_name, '=');
1563 if (opbra == NULL) /* Not a (p) case */
1564 {
1565 if (equals == NULL) /* Not thing=data case */
1566 {
1567 if (strcmp(arg, op->long_name) == 0) break;
1568 }
1569 else /* Special case xxx=data */
1570 {
1571 int oplen = equals - op->long_name;
1572 int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1573 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1574 {
1575 option_data = arg + arglen;
1576 if (*option_data == '=')
1577 {
1578 option_data++;
1579 longopwasequals = TRUE;
1580 }
1581 break;
1582 }
1583 }
1584 }
1585 else /* Special case xxxx(p) */
1586 {
1587 char buff1[24];
1588 char buff2[24];
1589 int baselen = opbra - op->long_name;
1590 sprintf(buff1, "%.*s", baselen, op->long_name);
1591 sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1592 opbra + 1);
1593 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1594 break;
1595 }
1596 }
1597
1598 if (op->one_char == 0)
1599 {
1600 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1601 exit(usage(2));
1602 }
1603 }
1604
1605
1606 /* Jeffrey Friedl's debugging harness uses these additional options which
1607 are not in the right form for putting in the option table because they use
1608 only one hyphen, yet are more than one character long. By putting them
1609 separately here, they will not get displayed as part of the help() output,
1610 but I don't think Jeffrey will care about that. */
1611
1612 #ifdef JFRIEDL_DEBUG
1613 else if (strcmp(argv[i], "-pre") == 0) {
1614 jfriedl_prefix = argv[++i];
1615 continue;
1616 } else if (strcmp(argv[i], "-post") == 0) {
1617 jfriedl_postfix = argv[++i];
1618 continue;
1619 } else if (strcmp(argv[i], "-XT") == 0) {
1620 sscanf(argv[++i], "%d", &jfriedl_XT);
1621 continue;
1622 } else if (strcmp(argv[i], "-XR") == 0) {
1623 sscanf(argv[++i], "%d", &jfriedl_XR);
1624 continue;
1625 }
1626 #endif
1627
1628
1629 /* One-char options; many that have no data may be in a single argument; we
1630 continue till we hit the last one or one that needs data. */
1631
1632 else
1633 {
1634 char *s = argv[i] + 1;
1635 longop = FALSE;
1636 while (*s != 0)
1637 {
1638 for (op = optionlist; op->one_char != 0; op++)
1639 { if (*s == op->one_char) break; }
1640 if (op->one_char == 0)
1641 {
1642 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1643 *s, argv[i]);
1644 exit(usage(2));
1645 }
1646 if (op->type != OP_NODATA || s[1] == 0)
1647 {
1648 option_data = s+1;
1649 break;
1650 }
1651 pcre_options = handle_option(*s++, pcre_options);
1652 }
1653 }
1654
1655 /* At this point we should have op pointing to a matched option. If the type
1656 is NO_DATA, it means that there is no data, and the option might set
1657 something in the PCRE options. */
1658
1659 if (op->type == OP_NODATA)
1660 {
1661 pcre_options = handle_option(op->one_char, pcre_options);
1662 continue;
1663 }
1664
1665 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1666 either has a value or defaults to something. It cannot have data in a
1667 separate item. At the moment, the only such options are "colo(u)r" and
1668 Jeffrey Friedl's special -S debugging option. */
1669
1670 if (*option_data == 0 &&
1671 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1672 {
1673 switch (op->one_char)
1674 {
1675 case N_COLOUR:
1676 colour_option = (char *)"auto";
1677 break;
1678 #ifdef JFRIEDL_DEBUG
1679 case 'S':
1680 S_arg = 0;
1681 break;
1682 #endif
1683 }
1684 continue;
1685 }
1686
1687 /* Otherwise, find the data string for the option. */
1688
1689 if (*option_data == 0)
1690 {
1691 if (i >= argc - 1 || longopwasequals)
1692 {
1693 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1694 exit(usage(2));
1695 }
1696 option_data = argv[++i];
1697 }
1698
1699 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1700 multiple times to create a list of patterns. */
1701
1702 if (op->type == OP_PATLIST)
1703 {
1704 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1705 {
1706 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1707 MAX_PATTERN_COUNT);
1708 return 2;
1709 }
1710 patterns[cmd_pattern_count++] = option_data;
1711 }
1712
1713 /* Otherwise, deal with single string or numeric data values. */
1714
1715 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1716 {
1717 *((char **)op->dataptr) = option_data;
1718 }
1719 else
1720 {
1721 char *endptr;
1722 int n = strtoul(option_data, &endptr, 10);
1723 if (*endptr != 0)
1724 {
1725 if (longop)
1726 {
1727 char *equals = strchr(op->long_name, '=');
1728 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1729 equals - op->long_name;
1730 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1731 option_data, nlen, op->long_name);
1732 }
1733 else
1734 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1735 option_data, op->one_char);
1736 exit(usage(2));
1737 }
1738 *((int *)op->dataptr) = n;
1739 }
1740 }
1741
1742 /* Options have been decoded. If -C was used, its value is used as a default
1743 for -A and -B. */
1744
1745 if (both_context > 0)
1746 {
1747 if (after_context == 0) after_context = both_context;
1748 if (before_context == 0) before_context = both_context;
1749 }
1750
1751 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1752 LC_ALL environment variable is set, and if so, use it. */
1753
1754 if (locale == NULL)
1755 {
1756 locale = getenv("LC_ALL");
1757 locale_from = "LCC_ALL";
1758 }
1759
1760 if (locale == NULL)
1761 {
1762 locale = getenv("LC_CTYPE");
1763 locale_from = "LC_CTYPE";
1764 }
1765
1766 /* If a locale has been provided, set it, and generate the tables the PCRE
1767 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1768
1769 if (locale != NULL)
1770 {
1771 if (setlocale(LC_CTYPE, locale) == NULL)
1772 {
1773 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1774 locale, locale_from);
1775 return 2;
1776 }
1777 pcretables = pcre_maketables();
1778 }
1779
1780 /* Sort out colouring */
1781
1782 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1783 {
1784 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1785 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1786 else
1787 {
1788 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1789 colour_option);
1790 return 2;
1791 }
1792 if (do_colour)
1793 {
1794 char *cs = getenv("PCREGREP_COLOUR");
1795 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1796 if (cs != NULL) colour_string = cs;
1797 }
1798 }
1799
1800 /* Interpret the newline type; the default settings are Unix-like. */
1801
1802 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1803 {
1804 pcre_options |= PCRE_NEWLINE_CR;
1805 endlinetype = EL_CR;
1806 }
1807 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1808 {
1809 pcre_options |= PCRE_NEWLINE_LF;
1810 endlinetype = EL_LF;
1811 }
1812 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1813 {
1814 pcre_options |= PCRE_NEWLINE_CRLF;
1815 endlinetype = EL_CRLF;
1816 }
1817 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1818 {
1819 pcre_options |= PCRE_NEWLINE_ANY;
1820 endlinetype = EL_ANY;
1821 }
1822 else
1823 {
1824 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1825 return 2;
1826 }
1827
1828 /* Interpret the text values for -d and -D */
1829
1830 if (dee_option != NULL)
1831 {
1832 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1833 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1834 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1835 else
1836 {
1837 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1838 return 2;
1839 }
1840 }
1841
1842 if (DEE_option != NULL)
1843 {
1844 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1845 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1846 else
1847 {
1848 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1849 return 2;
1850 }
1851 }
1852
1853 /* Check the values for Jeffrey Friedl's debugging options. */
1854
1855 #ifdef JFRIEDL_DEBUG
1856 if (S_arg > 9)
1857 {
1858 fprintf(stderr, "pcregrep: bad value for -S option\n");
1859 return 2;
1860 }
1861 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1862 {
1863 if (jfriedl_XT == 0) jfriedl_XT = 1;
1864 if (jfriedl_XR == 0) jfriedl_XR = 1;
1865 }
1866 #endif
1867
1868 /* Get memory to store the pattern and hints lists. */
1869
1870 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1871 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1872
1873 if (pattern_list == NULL || hints_list == NULL)
1874 {
1875 fprintf(stderr, "pcregrep: malloc failed\n");
1876 goto EXIT2;
1877 }
1878
1879 /* If no patterns were provided by -e, and there is no file provided by -f,
1880 the first argument is the one and only pattern, and it must exist. */
1881
1882 if (cmd_pattern_count == 0 && pattern_filename == NULL)
1883 {
1884 if (i >= argc) return usage(2);
1885 patterns[cmd_pattern_count++] = argv[i++];
1886 }
1887
1888 /* Compile the patterns that were provided on the command line, either by
1889 multiple uses of -e or as a single unkeyed pattern. */
1890
1891 for (j = 0; j < cmd_pattern_count; j++)
1892 {
1893 if (!compile_pattern(patterns[j], pcre_options, NULL,
1894 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1895 goto EXIT2;
1896 }
1897
1898 /* Compile the regular expressions that are provided in a file. */
1899
1900 if (pattern_filename != NULL)
1901 {
1902 int linenumber = 0;
1903 FILE *f;
1904 char *filename;
1905 char buffer[MBUFTHIRD];
1906
1907 if (strcmp(pattern_filename, "-") == 0)
1908 {
1909 f = stdin;
1910 filename = stdin_name;
1911 }
1912 else
1913 {
1914 f = fopen(pattern_filename, "r");
1915 if (f == NULL)
1916 {
1917 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1918 strerror(errno));
1919 goto EXIT2;
1920 }
1921 filename = pattern_filename;
1922 }
1923
1924 while (fgets(buffer, MBUFTHIRD, f) != NULL)
1925 {
1926 char *s = buffer + (int)strlen(buffer);
1927 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1928 *s = 0;
1929 linenumber++;
1930 if (buffer[0] == 0) continue; /* Skip blank lines */
1931 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1932 goto EXIT2;
1933 }
1934
1935 if (f != stdin) fclose(f);
1936 }
1937
1938 /* Study the regular expressions, as we will be running them many times */
1939
1940 for (j = 0; j < pattern_count; j++)
1941 {
1942 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1943 if (error != NULL)
1944 {
1945 char s[16];
1946 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1947 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1948 goto EXIT2;
1949 }
1950 hint_count++;
1951 }
1952
1953 /* If there are include or exclude patterns, compile them. */
1954
1955 if (exclude_pattern != NULL)
1956 {
1957 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1958 pcretables);
1959 if (exclude_compiled == NULL)
1960 {
1961 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1962 errptr, error);
1963 goto EXIT2;
1964 }
1965 }
1966
1967 if (include_pattern != NULL)
1968 {
1969 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1970 pcretables);
1971 if (include_compiled == NULL)
1972 {
1973 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1974 errptr, error);
1975 goto EXIT2;
1976 }
1977 }
1978
1979 /* If there are no further arguments, do the business on stdin and exit. */
1980
1981 if (i >= argc)
1982 {
1983 rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1984 goto EXIT;
1985 }
1986
1987 /* Otherwise, work through the remaining arguments as files or directories.
1988 Pass in the fact that there is only one argument at top level - this suppresses
1989 the file name if the argument is not a directory and filenames are not
1990 otherwise forced. */
1991
1992 only_one_at_top = i == argc - 1; /* Catch initial value of i */
1993
1994 for (; i < argc; i++)
1995 {
1996 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1997 only_one_at_top);
1998 if (frc > 1) rc = frc;
1999 else if (frc == 0 && rc == 1) rc = 0;
2000 }
2001
2002 EXIT:
2003 if (pattern_list != NULL)
2004 {
2005 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2006 free(pattern_list);
2007 }
2008 if (hints_list != NULL)
2009 {
2010 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2011 free(hints_list);
2012 }
2013 return rc;
2014
2015 EXIT2:
2016 rc = 2;
2017 goto EXIT;
2018 }
2019
2020 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12