/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 137 - (show annotations) (download)
Thu Mar 29 13:56:00 2007 UTC (7 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 57456 byte(s)
Daniel's patches.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 # include <config.h>
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53 #ifdef HAVE_UNISTD_H
54 # include <unistd.h>
55 #endif
56
57 #include <pcre.h>
58
59 #define FALSE 0
60 #define TRUE 1
61
62 typedef int BOOL;
63
64 #define MAX_PATTERN_COUNT 100
65
66 #if BUFSIZ > 8192
67 #define MBUFTHIRD BUFSIZ
68 #else
69 #define MBUFTHIRD 8192
70 #endif
71
72 /* Values for the "filenames" variable, which specifies options for file name
73 output. The order is important; it is assumed that a file name is wanted for
74 all values greater than FN_DEFAULT. */
75
76 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
77
78 /* Actions for the -d and -D options */
79
80 enum { dee_READ, dee_SKIP, dee_RECURSE };
81 enum { DEE_READ, DEE_SKIP };
82
83 /* Actions for special processing options (flag bits) */
84
85 #define PO_WORD_MATCH 0x0001
86 #define PO_LINE_MATCH 0x0002
87 #define PO_FIXED_STRINGS 0x0004
88
89 /* Line ending types */
90
91 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };
92
93
94
95 /*************************************************
96 * Global variables *
97 *************************************************/
98
99 /* Jeffrey Friedl has some debugging requirements that are not part of the
100 regular code. */
101
102 #ifdef JFRIEDL_DEBUG
103 static int S_arg = -1;
104 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
105 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
106 static const char *jfriedl_prefix = "";
107 static const char *jfriedl_postfix = "";
108 #endif
109
110 static int endlinetype;
111
112 static char *colour_string = (char *)"1;31";
113 static char *colour_option = NULL;
114 static char *dee_option = NULL;
115 static char *DEE_option = NULL;
116 static char *newline = NULL;
117 static char *pattern_filename = NULL;
118 static char *stdin_name = (char *)"(standard input)";
119 static char *locale = NULL;
120
121 static const unsigned char *pcretables = NULL;
122
123 static int pattern_count = 0;
124 static pcre **pattern_list = NULL;
125 static pcre_extra **hints_list = NULL;
126
127 static char *include_pattern = NULL;
128 static char *exclude_pattern = NULL;
129
130 static pcre *include_compiled = NULL;
131 static pcre *exclude_compiled = NULL;
132
133 static int after_context = 0;
134 static int before_context = 0;
135 static int both_context = 0;
136 static int dee_action = dee_READ;
137 static int DEE_action = DEE_READ;
138 static int error_count = 0;
139 static int filenames = FN_DEFAULT;
140 static int process_options = 0;
141
142 static BOOL count_only = FALSE;
143 static BOOL do_colour = FALSE;
144 static BOOL hyphenpending = FALSE;
145 static BOOL invert = FALSE;
146 static BOOL multiline = FALSE;
147 static BOOL number = FALSE;
148 static BOOL only_matching = FALSE;
149 static BOOL quiet = FALSE;
150 static BOOL silent = FALSE;
151 static BOOL utf8 = FALSE;
152
153 /* Structure for options and list of them */
154
155 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
156 OP_PATLIST };
157
158 typedef struct option_item {
159 int type;
160 int one_char;
161 void *dataptr;
162 const char *long_name;
163 const char *help_text;
164 } option_item;
165
166 /* Options without a single-letter equivalent get a negative value. This can be
167 used to identify them. */
168
169 #define N_COLOUR (-1)
170 #define N_EXCLUDE (-2)
171 #define N_HELP (-3)
172 #define N_INCLUDE (-4)
173 #define N_LABEL (-5)
174 #define N_LOCALE (-6)
175 #define N_NULL (-7)
176
177 static option_item optionlist[] = {
178 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
179 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
180 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
181 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
182 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
183 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
184 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
185 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
186 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
187 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
188 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
189 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
190 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
191 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
192 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
193 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
194 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
195 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
196 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
197 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
198 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
199 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LR, CRLF)" },
200 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
201 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
202 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
203 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
204 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
205 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
206 #ifdef JFRIEDL_DEBUG
207 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
208 #endif
209 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
210 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
211 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
212 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
213 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
214 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
215 { OP_NODATA, 0, NULL, NULL, NULL }
216 };
217
218 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
219 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
220 that the combination of -w and -x has the same effect as -x on its own, so we
221 can treat them as the same. */
222
223 static const char *prefix[] = {
224 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
225
226 static const char *suffix[] = {
227 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
228
229 /* UTF-8 tables - used only when the newline setting is "all". */
230
231 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
232
233 const char utf8_table4[] = {
234 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
235 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
237 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
238
239
240
241 /*************************************************
242 * OS-specific functions *
243 *************************************************/
244
245 /* These functions are defined so that they can be made system specific,
246 although at present the only ones are for Unix, Win32, and for "no support". */
247
248
249 /************* Directory scanning in Unix ***********/
250
251 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
252 #include <sys/types.h>
253 #include <sys/stat.h>
254 #include <dirent.h>
255
256 typedef DIR directory_type;
257
258 static int
259 isdirectory(char *filename)
260 {
261 struct stat statbuf;
262 if (stat(filename, &statbuf) < 0)
263 return 0; /* In the expectation that opening as a file will fail */
264 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
265 }
266
267 static directory_type *
268 opendirectory(char *filename)
269 {
270 return opendir(filename);
271 }
272
273 static char *
274 readdirectory(directory_type *dir)
275 {
276 for (;;)
277 {
278 struct dirent *dent = readdir(dir);
279 if (dent == NULL) return NULL;
280 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
281 return dent->d_name;
282 }
283 return NULL; /* Keep compiler happy; never executed */
284 }
285
286 static void
287 closedirectory(directory_type *dir)
288 {
289 closedir(dir);
290 }
291
292
293 /************* Test for regular file in Unix **********/
294
295 static int
296 isregfile(char *filename)
297 {
298 struct stat statbuf;
299 if (stat(filename, &statbuf) < 0)
300 return 1; /* In the expectation that opening as a file will fail */
301 return (statbuf.st_mode & S_IFMT) == S_IFREG;
302 }
303
304
305 /************* Test stdout for being a terminal in Unix **********/
306
307 static BOOL
308 is_stdout_tty(void)
309 {
310 return isatty(fileno(stdout));
311 }
312
313
314 /************* Directory scanning in Win32 ***********/
315
316 /* I (Philip Hazel) have no means of testing this code. It was contributed by
317 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
318 when it did not exist. */
319
320
321 #elif HAVE_WINDOWS_H
322
323 #ifndef STRICT
324 # define STRICT
325 #endif
326 #ifndef WIN32_LEAN_AND_MEAN
327 # define WIN32_LEAN_AND_MEAN
328 #endif
329 #ifndef INVALID_FILE_ATTRIBUTES
330 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
331 #endif
332
333 #include <windows.h>
334
335 typedef struct directory_type
336 {
337 HANDLE handle;
338 BOOL first;
339 WIN32_FIND_DATA data;
340 } directory_type;
341
342 int
343 isdirectory(char *filename)
344 {
345 DWORD attr = GetFileAttributes(filename);
346 if (attr == INVALID_FILE_ATTRIBUTES)
347 return 0;
348 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
349 }
350
351 directory_type *
352 opendirectory(char *filename)
353 {
354 size_t len;
355 char *pattern;
356 directory_type *dir;
357 DWORD err;
358 len = strlen(filename);
359 pattern = (char *) malloc(len + 3);
360 dir = (directory_type *) malloc(sizeof(*dir));
361 if ((pattern == NULL) || (dir == NULL))
362 {
363 fprintf(stderr, "pcregrep: malloc failed\n");
364 exit(2);
365 }
366 memcpy(pattern, filename, len);
367 memcpy(&(pattern[len]), "\\*", 3);
368 dir->handle = FindFirstFile(pattern, &(dir->data));
369 if (dir->handle != INVALID_HANDLE_VALUE)
370 {
371 free(pattern);
372 dir->first = TRUE;
373 return dir;
374 }
375 err = GetLastError();
376 free(pattern);
377 free(dir);
378 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
379 return NULL;
380 }
381
382 char *
383 readdirectory(directory_type *dir)
384 {
385 for (;;)
386 {
387 if (!dir->first)
388 {
389 if (!FindNextFile(dir->handle, &(dir->data)))
390 return NULL;
391 }
392 else
393 {
394 dir->first = FALSE;
395 }
396 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
397 return dir->data.cFileName;
398 }
399 #ifndef _MSC_VER
400 return NULL; /* Keep compiler happy; never executed */
401 #endif
402 }
403
404 void
405 closedirectory(directory_type *dir)
406 {
407 FindClose(dir->handle);
408 free(dir);
409 }
410
411
412 /************* Test for regular file in Win32 **********/
413
414 /* I don't know how to do this, or if it can be done; assume all paths are
415 regular if they are not directories. */
416
417 int isregfile(char *filename)
418 {
419 return !isdirectory(filename)
420 }
421
422
423 /************* Test stdout for being a terminal in Win32 **********/
424
425 /* I don't know how to do this; assume never */
426
427 static BOOL
428 is_stdout_tty(void)
429 {
430 FALSE;
431 }
432
433
434 /************* Directory scanning when we can't do it ***********/
435
436 /* The type is void, and apart from isdirectory(), the functions do nothing. */
437
438 #else
439
440 typedef void directory_type;
441
442 int isdirectory(char *filename) { return 0; }
443 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
444 char *readdirectory(directory_type *dir) { return (char*)0;}
445 void closedirectory(directory_type *dir) {}
446
447
448 /************* Test for regular when we can't do it **********/
449
450 /* Assume all files are regular. */
451
452 int isregfile(char *filename) { return 1; }
453
454
455 /************* Test stdout for being a terminal when we can't do it **********/
456
457 static BOOL
458 is_stdout_tty(void)
459 {
460 return FALSE;
461 }
462
463
464 #endif
465
466
467
468 #ifndef HAVE_STRERROR
469 /*************************************************
470 * Provide strerror() for non-ANSI libraries *
471 *************************************************/
472
473 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
474 in their libraries, but can provide the same facility by this simple
475 alternative function. */
476
477 extern int sys_nerr;
478 extern char *sys_errlist[];
479
480 char *
481 strerror(int n)
482 {
483 if (n < 0 || n >= sys_nerr) return "unknown error number";
484 return sys_errlist[n];
485 }
486 #endif /* HAVE_STRERROR */
487
488
489
490 /*************************************************
491 * Find end of line *
492 *************************************************/
493
494 /* The length of the endline sequence that is found is set via lenptr. This may
495 be zero at the very end of the file if there is no line-ending sequence there.
496
497 Arguments:
498 p current position in line
499 endptr end of available data
500 lenptr where to put the length of the eol sequence
501
502 Returns: pointer to the last byte of the line
503 */
504
505 static char *
506 end_of_line(char *p, char *endptr, int *lenptr)
507 {
508 switch(endlinetype)
509 {
510 default: /* Just in case */
511 case EL_LF:
512 while (p < endptr && *p != '\n') p++;
513 if (p < endptr)
514 {
515 *lenptr = 1;
516 return p + 1;
517 }
518 *lenptr = 0;
519 return endptr;
520
521 case EL_CR:
522 while (p < endptr && *p != '\r') p++;
523 if (p < endptr)
524 {
525 *lenptr = 1;
526 return p + 1;
527 }
528 *lenptr = 0;
529 return endptr;
530
531 case EL_CRLF:
532 for (;;)
533 {
534 while (p < endptr && *p != '\r') p++;
535 if (++p >= endptr)
536 {
537 *lenptr = 0;
538 return endptr;
539 }
540 if (*p == '\n')
541 {
542 *lenptr = 2;
543 return p + 1;
544 }
545 }
546 break;
547
548 case EL_ANY:
549 while (p < endptr)
550 {
551 int extra = 0;
552 register int c = *((unsigned char *)p);
553
554 if (utf8 && c >= 0xc0)
555 {
556 int gcii, gcss;
557 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
558 gcss = 6*extra;
559 c = (c & utf8_table3[extra]) << gcss;
560 for (gcii = 1; gcii <= extra; gcii++)
561 {
562 gcss -= 6;
563 c |= (p[gcii] & 0x3f) << gcss;
564 }
565 }
566
567 p += 1 + extra;
568
569 switch (c)
570 {
571 case 0x0a: /* LF */
572 case 0x0b: /* VT */
573 case 0x0c: /* FF */
574 *lenptr = 1;
575 return p;
576
577 case 0x0d: /* CR */
578 if (p < endptr && *p == 0x0a)
579 {
580 *lenptr = 2;
581 p++;
582 }
583 else *lenptr = 1;
584 return p;
585
586 case 0x85: /* NEL */
587 *lenptr = utf8? 2 : 1;
588 return p;
589
590 case 0x2028: /* LS */
591 case 0x2029: /* PS */
592 *lenptr = 3;
593 return p;
594
595 default:
596 break;
597 }
598 } /* End of loop for ANY case */
599
600 *lenptr = 0; /* Must have hit the end */
601 return endptr;
602 } /* End of overall switch */
603 }
604
605
606
607 /*************************************************
608 * Find start of previous line *
609 *************************************************/
610
611 /* This is called when looking back for before lines to print.
612
613 Arguments:
614 p start of the subsequent line
615 startptr start of available data
616
617 Returns: pointer to the start of the previous line
618 */
619
620 static char *
621 previous_line(char *p, char *startptr)
622 {
623 switch(endlinetype)
624 {
625 default: /* Just in case */
626 case EL_LF:
627 p--;
628 while (p > startptr && p[-1] != '\n') p--;
629 return p;
630
631 case EL_CR:
632 p--;
633 while (p > startptr && p[-1] != '\n') p--;
634 return p;
635
636 case EL_CRLF:
637 for (;;)
638 {
639 p -= 2;
640 while (p > startptr && p[-1] != '\n') p--;
641 if (p <= startptr + 1 || p[-2] == '\r') return p;
642 }
643 return p; /* But control should never get here */
644
645 case EL_ANY:
646 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
647 if (utf8) while ((*p & 0xc0) == 0x80) p--;
648
649 while (p > startptr)
650 {
651 register int c;
652 char *pp = p - 1;
653
654 if (utf8)
655 {
656 int extra = 0;
657 while ((*pp & 0xc0) == 0x80) pp--;
658 c = *((unsigned char *)pp);
659 if (c >= 0xc0)
660 {
661 int gcii, gcss;
662 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
663 gcss = 6*extra;
664 c = (c & utf8_table3[extra]) << gcss;
665 for (gcii = 1; gcii <= extra; gcii++)
666 {
667 gcss -= 6;
668 c |= (pp[gcii] & 0x3f) << gcss;
669 }
670 }
671 }
672 else c = *((unsigned char *)pp);
673
674 switch (c)
675 {
676 case 0x0a: /* LF */
677 case 0x0b: /* VT */
678 case 0x0c: /* FF */
679 case 0x0d: /* CR */
680 case 0x85: /* NEL */
681 case 0x2028: /* LS */
682 case 0x2029: /* PS */
683 return p;
684
685 default:
686 break;
687 }
688
689 p = pp; /* Back one character */
690 } /* End of loop for ANY case */
691
692 return startptr; /* Hit start of data */
693 } /* End of overall switch */
694 }
695
696
697
698
699
700 /*************************************************
701 * Print the previous "after" lines *
702 *************************************************/
703
704 /* This is called if we are about to lose said lines because of buffer filling,
705 and at the end of the file. The data in the line is written using fwrite() so
706 that a binary zero does not terminate it.
707
708 Arguments:
709 lastmatchnumber the number of the last matching line, plus one
710 lastmatchrestart where we restarted after the last match
711 endptr end of available data
712 printname filename for printing
713
714 Returns: nothing
715 */
716
717 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
718 char *endptr, char *printname)
719 {
720 if (after_context > 0 && lastmatchnumber > 0)
721 {
722 int count = 0;
723 while (lastmatchrestart < endptr && count++ < after_context)
724 {
725 int ellength;
726 char *pp = lastmatchrestart;
727 if (printname != NULL) fprintf(stdout, "%s-", printname);
728 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
729 pp = end_of_line(pp, endptr, &ellength);
730 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
731 lastmatchrestart = pp;
732 }
733 hyphenpending = TRUE;
734 }
735 }
736
737
738
739 /*************************************************
740 * Grep an individual file *
741 *************************************************/
742
743 /* This is called from grep_or_recurse() below. It uses a buffer that is three
744 times the value of MBUFTHIRD. The matching point is never allowed to stray into
745 the top third of the buffer, thus keeping more of the file available for
746 context printing or for multiline scanning. For large files, the pointer will
747 be in the middle third most of the time, so the bottom third is available for
748 "before" context printing.
749
750 Arguments:
751 in the fopened FILE stream
752 printname the file name if it is to be printed for each match
753 or NULL if the file name is not to be printed
754 it cannot be NULL if filenames[_nomatch]_only is set
755
756 Returns: 0 if there was at least one match
757 1 otherwise (no matches)
758 */
759
760 static int
761 pcregrep(FILE *in, char *printname)
762 {
763 int rc = 1;
764 int linenumber = 1;
765 int lastmatchnumber = 0;
766 int count = 0;
767 int offsets[99];
768 char *lastmatchrestart = NULL;
769 char buffer[3*MBUFTHIRD];
770 char *ptr = buffer;
771 char *endptr;
772 size_t bufflength;
773 BOOL endhyphenpending = FALSE;
774
775 /* Do the first read into the start of the buffer and set up the pointer to
776 end of what we have. */
777
778 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
779 endptr = buffer + bufflength;
780
781 /* Loop while the current pointer is not at the end of the file. For large
782 files, endptr will be at the end of the buffer when we are in the middle of the
783 file, but ptr will never get there, because as soon as it gets over 2/3 of the
784 way, the buffer is shifted left and re-filled. */
785
786 while (ptr < endptr)
787 {
788 int i, endlinelength;
789 int mrc = 0;
790 BOOL match = FALSE;
791 char *t = ptr;
792 size_t length, linelength;
793
794 /* At this point, ptr is at the start of a line. We need to find the length
795 of the subject string to pass to pcre_exec(). In multiline mode, it is the
796 length remainder of the data in the buffer. Otherwise, it is the length of
797 the next line. After matching, we always advance by the length of the next
798 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
799 that any match is constrained to be in the first line. */
800
801 t = end_of_line(t, endptr, &endlinelength);
802 linelength = t - ptr - endlinelength;
803 length = multiline? endptr - ptr : linelength;
804
805 /* Extra processing for Jeffrey Friedl's debugging. */
806
807 #ifdef JFRIEDL_DEBUG
808 if (jfriedl_XT || jfriedl_XR)
809 {
810 #include <sys/time.h>
811 #include <time.h>
812 struct timeval start_time, end_time;
813 struct timezone dummy;
814
815 if (jfriedl_XT)
816 {
817 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
818 const char *orig = ptr;
819 ptr = malloc(newlen + 1);
820 if (!ptr) {
821 printf("out of memory");
822 exit(2);
823 }
824 endptr = ptr;
825 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
826 for (i = 0; i < jfriedl_XT; i++) {
827 strncpy(endptr, orig, length);
828 endptr += length;
829 }
830 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
831 length = newlen;
832 }
833
834 if (gettimeofday(&start_time, &dummy) != 0)
835 perror("bad gettimeofday");
836
837
838 for (i = 0; i < jfriedl_XR; i++)
839 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
840
841 if (gettimeofday(&end_time, &dummy) != 0)
842 perror("bad gettimeofday");
843
844 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
845 -
846 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
847
848 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
849 return 0;
850 }
851 #endif
852
853
854 /* Run through all the patterns until one matches. Note that we don't include
855 the final newline in the subject string. */
856
857 for (i = 0; i < pattern_count; i++)
858 {
859 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
860 offsets, 99);
861 if (mrc >= 0) { match = TRUE; break; }
862 if (mrc != PCRE_ERROR_NOMATCH)
863 {
864 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
865 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
866 fprintf(stderr, "this line:\n");
867 fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
868 fprintf(stderr, "\n");
869 if (error_count == 0 &&
870 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
871 {
872 fprintf(stderr, "pcregrep: error %d means that a resource limit "
873 "was exceeded\n", mrc);
874 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
875 }
876 if (error_count++ > 20)
877 {
878 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
879 exit(2);
880 }
881 match = invert; /* No more matching; don't show the line again */
882 break;
883 }
884 }
885
886 /* If it's a match or a not-match (as required), do what's wanted. */
887
888 if (match != invert)
889 {
890 BOOL hyphenprinted = FALSE;
891
892 /* We've failed if we want a file that doesn't have any matches. */
893
894 if (filenames == FN_NOMATCH_ONLY) return 1;
895
896 /* Just count if just counting is wanted. */
897
898 if (count_only) count++;
899
900 /* If all we want is a file name, there is no need to scan any more lines
901 in the file. */
902
903 else if (filenames == FN_ONLY)
904 {
905 fprintf(stdout, "%s\n", printname);
906 return 0;
907 }
908
909 /* Likewise, if all we want is a yes/no answer. */
910
911 else if (quiet) return 0;
912
913 /* The --only-matching option prints just the substring that matched, and
914 does not pring any context. */
915
916 else if (only_matching)
917 {
918 if (printname != NULL) fprintf(stdout, "%s:", printname);
919 if (number) fprintf(stdout, "%d:", linenumber);
920 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
921 fprintf(stdout, "\n");
922 }
923
924 /* This is the default case when none of the above options is set. We print
925 the matching lines(s), possibly preceded and/or followed by other lines of
926 context. */
927
928 else
929 {
930 /* See if there is a requirement to print some "after" lines from a
931 previous match. We never print any overlaps. */
932
933 if (after_context > 0 && lastmatchnumber > 0)
934 {
935 int ellength;
936 int linecount = 0;
937 char *p = lastmatchrestart;
938
939 while (p < ptr && linecount < after_context)
940 {
941 p = end_of_line(p, ptr, &ellength);
942 linecount++;
943 }
944
945 /* It is important to advance lastmatchrestart during this printing so
946 that it interacts correctly with any "before" printing below. Print
947 each line's data using fwrite() in case there are binary zeroes. */
948
949 while (lastmatchrestart < p)
950 {
951 char *pp = lastmatchrestart;
952 if (printname != NULL) fprintf(stdout, "%s-", printname);
953 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
954 pp = end_of_line(pp, endptr, &ellength);
955 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
956 lastmatchrestart = pp;
957 }
958 if (lastmatchrestart != ptr) hyphenpending = TRUE;
959 }
960
961 /* If there were non-contiguous lines printed above, insert hyphens. */
962
963 if (hyphenpending)
964 {
965 fprintf(stdout, "--\n");
966 hyphenpending = FALSE;
967 hyphenprinted = TRUE;
968 }
969
970 /* See if there is a requirement to print some "before" lines for this
971 match. Again, don't print overlaps. */
972
973 if (before_context > 0)
974 {
975 int linecount = 0;
976 char *p = ptr;
977
978 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
979 linecount < before_context)
980 {
981 linecount++;
982 p = previous_line(p, buffer);
983 }
984
985 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
986 fprintf(stdout, "--\n");
987
988 while (p < ptr)
989 {
990 int ellength;
991 char *pp = p;
992 if (printname != NULL) fprintf(stdout, "%s-", printname);
993 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
994 pp = end_of_line(pp, endptr, &ellength);
995 fwrite(p, 1, pp - p, stdout);
996 p = pp;
997 }
998 }
999
1000 /* Now print the matching line(s); ensure we set hyphenpending at the end
1001 of the file if any context lines are being output. */
1002
1003 if (after_context > 0 || before_context > 0)
1004 endhyphenpending = TRUE;
1005
1006 if (printname != NULL) fprintf(stdout, "%s:", printname);
1007 if (number) fprintf(stdout, "%d:", linenumber);
1008
1009 /* In multiline mode, we want to print to the end of the line in which
1010 the end of the matched string is found, so we adjust linelength and the
1011 line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1012 start of the match will always be before the first newline sequence. */
1013
1014 if (multiline)
1015 {
1016 int ellength;
1017 char *endmatch = ptr + offsets[1];
1018 t = ptr;
1019 while (t < endmatch)
1020 {
1021 t = end_of_line(t, endptr, &ellength);
1022 if (t <= endmatch) linenumber++; else break;
1023 }
1024 endmatch = end_of_line(endmatch, endptr, &ellength);
1025 linelength = endmatch - ptr - ellength;
1026 }
1027
1028 /*** NOTE: Use only fwrite() to output the data line, so that binary
1029 zeroes are treated as just another data character. */
1030
1031 /* This extra option, for Jeffrey Friedl's debugging requirements,
1032 replaces the matched string, or a specific captured string if it exists,
1033 with X. When this happens, colouring is ignored. */
1034
1035 #ifdef JFRIEDL_DEBUG
1036 if (S_arg >= 0 && S_arg < mrc)
1037 {
1038 int first = S_arg * 2;
1039 int last = first + 1;
1040 fwrite(ptr, 1, offsets[first], stdout);
1041 fprintf(stdout, "X");
1042 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1043 }
1044 else
1045 #endif
1046
1047 /* We have to split the line(s) up if colouring. */
1048
1049 if (do_colour)
1050 {
1051 fwrite(ptr, 1, offsets[0], stdout);
1052 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1053 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1054 fprintf(stdout, "%c[00m", 0x1b);
1055 fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1056 }
1057 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1058 }
1059
1060 /* End of doing what has to be done for a match */
1061
1062 rc = 0; /* Had some success */
1063
1064 /* Remember where the last match happened for after_context. We remember
1065 where we are about to restart, and that line's number. */
1066
1067 lastmatchrestart = ptr + linelength + endlinelength;
1068 lastmatchnumber = linenumber + 1;
1069 }
1070
1071 /* Advance to after the newline and increment the line number. */
1072
1073 ptr += linelength + endlinelength;
1074 linenumber++;
1075
1076 /* If we haven't yet reached the end of the file (the buffer is full), and
1077 the current point is in the top 1/3 of the buffer, slide the buffer down by
1078 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1079 about to be lost, print them. */
1080
1081 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1082 {
1083 if (after_context > 0 &&
1084 lastmatchnumber > 0 &&
1085 lastmatchrestart < buffer + MBUFTHIRD)
1086 {
1087 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1088 lastmatchnumber = 0;
1089 }
1090
1091 /* Now do the shuffle */
1092
1093 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1094 ptr -= MBUFTHIRD;
1095 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1096 endptr = buffer + bufflength;
1097
1098 /* Adjust any last match point */
1099
1100 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1101 }
1102 } /* Loop through the whole file */
1103
1104 /* End of file; print final "after" lines if wanted; do_after_lines sets
1105 hyphenpending if it prints something. */
1106
1107 if (!only_matching && !count_only)
1108 {
1109 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1110 hyphenpending |= endhyphenpending;
1111 }
1112
1113 /* Print the file name if we are looking for those without matches and there
1114 were none. If we found a match, we won't have got this far. */
1115
1116 if (filenames == FN_NOMATCH_ONLY)
1117 {
1118 fprintf(stdout, "%s\n", printname);
1119 return 0;
1120 }
1121
1122 /* Print the match count if wanted */
1123
1124 if (count_only)
1125 {
1126 if (printname != NULL) fprintf(stdout, "%s:", printname);
1127 fprintf(stdout, "%d\n", count);
1128 }
1129
1130 return rc;
1131 }
1132
1133
1134
1135 /*************************************************
1136 * Grep a file or recurse into a directory *
1137 *************************************************/
1138
1139 /* Given a path name, if it's a directory, scan all the files if we are
1140 recursing; if it's a file, grep it.
1141
1142 Arguments:
1143 pathname the path to investigate
1144 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1145 only_one_at_top TRUE if the path is the only one at toplevel
1146
1147 Returns: 0 if there was at least one match
1148 1 if there were no matches
1149 2 there was some kind of error
1150
1151 However, file opening failures are suppressed if "silent" is set.
1152 */
1153
1154 static int
1155 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1156 {
1157 int rc = 1;
1158 int sep;
1159 FILE *in;
1160
1161 /* If the file name is "-" we scan stdin */
1162
1163 if (strcmp(pathname, "-") == 0)
1164 {
1165 return pcregrep(stdin,
1166 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1167 stdin_name : NULL);
1168 }
1169
1170
1171 /* If the file is a directory, skip if skipping or if we are recursing, scan
1172 each file within it, subject to any include or exclude patterns that were set.
1173 The scanning code is localized so it can be made system-specific. */
1174
1175 if ((sep = isdirectory(pathname)) != 0)
1176 {
1177 if (dee_action == dee_SKIP) return 1;
1178 if (dee_action == dee_RECURSE)
1179 {
1180 char buffer[1024];
1181 char *nextfile;
1182 directory_type *dir = opendirectory(pathname);
1183
1184 if (dir == NULL)
1185 {
1186 if (!silent)
1187 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1188 strerror(errno));
1189 return 2;
1190 }
1191
1192 while ((nextfile = readdirectory(dir)) != NULL)
1193 {
1194 int frc, blen;
1195 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1196 blen = strlen(buffer);
1197
1198 if (exclude_compiled != NULL &&
1199 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1200 continue;
1201
1202 if (include_compiled != NULL &&
1203 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1204 continue;
1205
1206 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1207 if (frc > 1) rc = frc;
1208 else if (frc == 0 && rc == 1) rc = 0;
1209 }
1210
1211 closedirectory(dir);
1212 return rc;
1213 }
1214 }
1215
1216 /* If the file is not a directory and not a regular file, skip it if that's
1217 been requested. */
1218
1219 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1220
1221 /* Control reaches here if we have a regular file, or if we have a directory
1222 and recursion or skipping was not requested, or if we have anything else and
1223 skipping was not requested. The scan proceeds. If this is the first and only
1224 argument at top level, we don't show the file name, unless we are only showing
1225 the file name, or the filename was forced (-H). */
1226
1227 in = fopen(pathname, "r");
1228 if (in == NULL)
1229 {
1230 if (!silent)
1231 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1232 strerror(errno));
1233 return 2;
1234 }
1235
1236 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1237 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1238
1239 fclose(in);
1240 return rc;
1241 }
1242
1243
1244
1245
1246 /*************************************************
1247 * Usage function *
1248 *************************************************/
1249
1250 static int
1251 usage(int rc)
1252 {
1253 option_item *op;
1254 fprintf(stderr, "Usage: pcregrep [-");
1255 for (op = optionlist; op->one_char != 0; op++)
1256 {
1257 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1258 }
1259 fprintf(stderr, "] [long options] [pattern] [files]\n");
1260 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1261 return rc;
1262 }
1263
1264
1265
1266
1267 /*************************************************
1268 * Help function *
1269 *************************************************/
1270
1271 static void
1272 help(void)
1273 {
1274 option_item *op;
1275
1276 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1277 printf("Search for PATTERN in each FILE or standard input.\n");
1278 printf("PATTERN must be present if neither -e nor -f is used.\n");
1279 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1280 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1281
1282 printf("Options:\n");
1283
1284 for (op = optionlist; op->one_char != 0; op++)
1285 {
1286 int n;
1287 char s[4];
1288 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1289 printf(" %s --%s%n", s, op->long_name, &n);
1290 n = 30 - n;
1291 if (n < 1) n = 1;
1292 printf("%.*s%s\n", n, " ", op->help_text);
1293 }
1294
1295 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1296 printf("trailing white space is removed and blank lines are ignored.\n");
1297 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1298
1299 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1300 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1301 }
1302
1303
1304
1305
1306 /*************************************************
1307 * Handle a single-letter, no data option *
1308 *************************************************/
1309
1310 static int
1311 handle_option(int letter, int options)
1312 {
1313 switch(letter)
1314 {
1315 case N_HELP: help(); exit(0);
1316 case 'c': count_only = TRUE; break;
1317 case 'F': process_options |= PO_FIXED_STRINGS; break;
1318 case 'H': filenames = FN_FORCE; break;
1319 case 'h': filenames = FN_NONE; break;
1320 case 'i': options |= PCRE_CASELESS; break;
1321 case 'l': filenames = FN_ONLY; break;
1322 case 'L': filenames = FN_NOMATCH_ONLY; break;
1323 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1324 case 'n': number = TRUE; break;
1325 case 'o': only_matching = TRUE; break;
1326 case 'q': quiet = TRUE; break;
1327 case 'r': dee_action = dee_RECURSE; break;
1328 case 's': silent = TRUE; break;
1329 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1330 case 'v': invert = TRUE; break;
1331 case 'w': process_options |= PO_WORD_MATCH; break;
1332 case 'x': process_options |= PO_LINE_MATCH; break;
1333
1334 case 'V':
1335 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1336 exit(0);
1337 break;
1338
1339 default:
1340 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1341 exit(usage(2));
1342 }
1343
1344 return options;
1345 }
1346
1347
1348
1349
1350 /*************************************************
1351 * Construct printed ordinal *
1352 *************************************************/
1353
1354 /* This turns a number into "1st", "3rd", etc. */
1355
1356 static char *
1357 ordin(int n)
1358 {
1359 static char buffer[8];
1360 char *p = buffer;
1361 sprintf(p, "%d", n);
1362 while (*p != 0) p++;
1363 switch (n%10)
1364 {
1365 case 1: strcpy(p, "st"); break;
1366 case 2: strcpy(p, "nd"); break;
1367 case 3: strcpy(p, "rd"); break;
1368 default: strcpy(p, "th"); break;
1369 }
1370 return buffer;
1371 }
1372
1373
1374
1375 /*************************************************
1376 * Compile a single pattern *
1377 *************************************************/
1378
1379 /* When the -F option has been used, this is called for each substring.
1380 Otherwise it's called for each supplied pattern.
1381
1382 Arguments:
1383 pattern the pattern string
1384 options the PCRE options
1385 filename the file name, or NULL for a command-line pattern
1386 count 0 if this is the only command line pattern, or
1387 number of the command line pattern, or
1388 linenumber for a pattern from a file
1389
1390 Returns: TRUE on success, FALSE after an error
1391 */
1392
1393 static BOOL
1394 compile_single_pattern(char *pattern, int options, char *filename, int count)
1395 {
1396 char buffer[MBUFTHIRD + 16];
1397 const char *error;
1398 int errptr;
1399
1400 if (pattern_count >= MAX_PATTERN_COUNT)
1401 {
1402 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1403 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1404 return FALSE;
1405 }
1406
1407 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1408 suffix[process_options]);
1409 pattern_list[pattern_count] =
1410 pcre_compile(buffer, options, &error, &errptr, pcretables);
1411 if (pattern_list[pattern_count++] != NULL) return TRUE;
1412
1413 /* Handle compile errors */
1414
1415 errptr -= (int)strlen(prefix[process_options]);
1416 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1417
1418 if (filename == NULL)
1419 {
1420 if (count == 0)
1421 fprintf(stderr, "pcregrep: Error in command-line regex "
1422 "at offset %d: %s\n", errptr, error);
1423 else
1424 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1425 "at offset %d: %s\n", ordin(count), errptr, error);
1426 }
1427 else
1428 {
1429 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1430 "at offset %d: %s\n", count, filename, errptr, error);
1431 }
1432
1433 return FALSE;
1434 }
1435
1436
1437
1438 /*************************************************
1439 * Compile one supplied pattern *
1440 *************************************************/
1441
1442 /* When the -F option has been used, each string may be a list of strings,
1443 separated by line breaks. They will be matched literally.
1444
1445 Arguments:
1446 pattern the pattern string
1447 options the PCRE options
1448 filename the file name, or NULL for a command-line pattern
1449 count 0 if this is the only command line pattern, or
1450 number of the command line pattern, or
1451 linenumber for a pattern from a file
1452
1453 Returns: TRUE on success, FALSE after an error
1454 */
1455
1456 static BOOL
1457 compile_pattern(char *pattern, int options, char *filename, int count)
1458 {
1459 if ((process_options & PO_FIXED_STRINGS) != 0)
1460 {
1461 char *eop = pattern + strlen(pattern);
1462 char buffer[MBUFTHIRD];
1463 for(;;)
1464 {
1465 int ellength;
1466 char *p = end_of_line(pattern, eop, &ellength);
1467 if (ellength == 0)
1468 return compile_single_pattern(pattern, options, filename, count);
1469 sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1470 pattern = p;
1471 if (!compile_single_pattern(buffer, options, filename, count))
1472 return FALSE;
1473 }
1474 }
1475 else return compile_single_pattern(pattern, options, filename, count);
1476 }
1477
1478
1479
1480 /*************************************************
1481 * Main program *
1482 *************************************************/
1483
1484 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1485
1486 int
1487 main(int argc, char **argv)
1488 {
1489 int i, j;
1490 int rc = 1;
1491 int pcre_options = 0;
1492 int cmd_pattern_count = 0;
1493 int errptr;
1494 BOOL only_one_at_top;
1495 char *patterns[MAX_PATTERN_COUNT];
1496 const char *locale_from = "--locale";
1497 const char *error;
1498
1499 /* Set the default line ending value from the default in the PCRE library;
1500 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1501 */
1502
1503 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1504 switch(i)
1505 {
1506 default: newline = (char *)"lf"; break;
1507 case '\r': newline = (char *)"cr"; break;
1508 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1509 case -1: newline = (char *)"any"; break;
1510 }
1511
1512 /* Process the options */
1513
1514 for (i = 1; i < argc; i++)
1515 {
1516 option_item *op = NULL;
1517 char *option_data = (char *)""; /* default to keep compiler happy */
1518 BOOL longop;
1519 BOOL longopwasequals = FALSE;
1520
1521 if (argv[i][0] != '-') break;
1522
1523 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1524 but only if we have previously had -e or -f to define the patterns. */
1525
1526 if (argv[i][1] == 0)
1527 {
1528 if (pattern_filename != NULL || pattern_count > 0) break;
1529 else exit(usage(2));
1530 }
1531
1532 /* Handle a long name option, or -- to terminate the options */
1533
1534 if (argv[i][1] == '-')
1535 {
1536 char *arg = argv[i] + 2;
1537 char *argequals = strchr(arg, '=');
1538
1539 if (*arg == 0) /* -- terminates options */
1540 {
1541 i++;
1542 break; /* out of the options-handling loop */
1543 }
1544
1545 longop = TRUE;
1546
1547 /* Some long options have data that follows after =, for example file=name.
1548 Some options have variations in the long name spelling: specifically, we
1549 allow "regexp" because GNU grep allows it, though I personally go along
1550 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1551 These options are entered in the table as "regex(p)". No option is in both
1552 these categories, fortunately. */
1553
1554 for (op = optionlist; op->one_char != 0; op++)
1555 {
1556 char *opbra = strchr(op->long_name, '(');
1557 char *equals = strchr(op->long_name, '=');
1558 if (opbra == NULL) /* Not a (p) case */
1559 {
1560 if (equals == NULL) /* Not thing=data case */
1561 {
1562 if (strcmp(arg, op->long_name) == 0) break;
1563 }
1564 else /* Special case xxx=data */
1565 {
1566 int oplen = equals - op->long_name;
1567 int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1568 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1569 {
1570 option_data = arg + arglen;
1571 if (*option_data == '=')
1572 {
1573 option_data++;
1574 longopwasequals = TRUE;
1575 }
1576 break;
1577 }
1578 }
1579 }
1580 else /* Special case xxxx(p) */
1581 {
1582 char buff1[24];
1583 char buff2[24];
1584 int baselen = opbra - op->long_name;
1585 sprintf(buff1, "%.*s", baselen, op->long_name);
1586 sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1587 opbra + 1);
1588 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1589 break;
1590 }
1591 }
1592
1593 if (op->one_char == 0)
1594 {
1595 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1596 exit(usage(2));
1597 }
1598 }
1599
1600
1601 /* Jeffrey Friedl's debugging harness uses these additional options which
1602 are not in the right form for putting in the option table because they use
1603 only one hyphen, yet are more than one character long. By putting them
1604 separately here, they will not get displayed as part of the help() output,
1605 but I don't think Jeffrey will care about that. */
1606
1607 #ifdef JFRIEDL_DEBUG
1608 else if (strcmp(argv[i], "-pre") == 0) {
1609 jfriedl_prefix = argv[++i];
1610 continue;
1611 } else if (strcmp(argv[i], "-post") == 0) {
1612 jfriedl_postfix = argv[++i];
1613 continue;
1614 } else if (strcmp(argv[i], "-XT") == 0) {
1615 sscanf(argv[++i], "%d", &jfriedl_XT);
1616 continue;
1617 } else if (strcmp(argv[i], "-XR") == 0) {
1618 sscanf(argv[++i], "%d", &jfriedl_XR);
1619 continue;
1620 }
1621 #endif
1622
1623
1624 /* One-char options; many that have no data may be in a single argument; we
1625 continue till we hit the last one or one that needs data. */
1626
1627 else
1628 {
1629 char *s = argv[i] + 1;
1630 longop = FALSE;
1631 while (*s != 0)
1632 {
1633 for (op = optionlist; op->one_char != 0; op++)
1634 { if (*s == op->one_char) break; }
1635 if (op->one_char == 0)
1636 {
1637 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1638 *s, argv[i]);
1639 exit(usage(2));
1640 }
1641 if (op->type != OP_NODATA || s[1] == 0)
1642 {
1643 option_data = s+1;
1644 break;
1645 }
1646 pcre_options = handle_option(*s++, pcre_options);
1647 }
1648 }
1649
1650 /* At this point we should have op pointing to a matched option. If the type
1651 is NO_DATA, it means that there is no data, and the option might set
1652 something in the PCRE options. */
1653
1654 if (op->type == OP_NODATA)
1655 {
1656 pcre_options = handle_option(op->one_char, pcre_options);
1657 continue;
1658 }
1659
1660 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1661 either has a value or defaults to something. It cannot have data in a
1662 separate item. At the moment, the only such options are "colo(u)r" and
1663 Jeffrey Friedl's special -S debugging option. */
1664
1665 if (*option_data == 0 &&
1666 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1667 {
1668 switch (op->one_char)
1669 {
1670 case N_COLOUR:
1671 colour_option = (char *)"auto";
1672 break;
1673 #ifdef JFRIEDL_DEBUG
1674 case 'S':
1675 S_arg = 0;
1676 break;
1677 #endif
1678 }
1679 continue;
1680 }
1681
1682 /* Otherwise, find the data string for the option. */
1683
1684 if (*option_data == 0)
1685 {
1686 if (i >= argc - 1 || longopwasequals)
1687 {
1688 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1689 exit(usage(2));
1690 }
1691 option_data = argv[++i];
1692 }
1693
1694 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1695 multiple times to create a list of patterns. */
1696
1697 if (op->type == OP_PATLIST)
1698 {
1699 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1700 {
1701 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1702 MAX_PATTERN_COUNT);
1703 return 2;
1704 }
1705 patterns[cmd_pattern_count++] = option_data;
1706 }
1707
1708 /* Otherwise, deal with single string or numeric data values. */
1709
1710 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1711 {
1712 *((char **)op->dataptr) = option_data;
1713 }
1714 else
1715 {
1716 char *endptr;
1717 int n = strtoul(option_data, &endptr, 10);
1718 if (*endptr != 0)
1719 {
1720 if (longop)
1721 {
1722 char *equals = strchr(op->long_name, '=');
1723 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1724 equals - op->long_name;
1725 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1726 option_data, nlen, op->long_name);
1727 }
1728 else
1729 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1730 option_data, op->one_char);
1731 exit(usage(2));
1732 }
1733 *((int *)op->dataptr) = n;
1734 }
1735 }
1736
1737 /* Options have been decoded. If -C was used, its value is used as a default
1738 for -A and -B. */
1739
1740 if (both_context > 0)
1741 {
1742 if (after_context == 0) after_context = both_context;
1743 if (before_context == 0) before_context = both_context;
1744 }
1745
1746 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1747 LC_ALL environment variable is set, and if so, use it. */
1748
1749 if (locale == NULL)
1750 {
1751 locale = getenv("LC_ALL");
1752 locale_from = "LCC_ALL";
1753 }
1754
1755 if (locale == NULL)
1756 {
1757 locale = getenv("LC_CTYPE");
1758 locale_from = "LC_CTYPE";
1759 }
1760
1761 /* If a locale has been provided, set it, and generate the tables the PCRE
1762 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1763
1764 if (locale != NULL)
1765 {
1766 if (setlocale(LC_CTYPE, locale) == NULL)
1767 {
1768 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1769 locale, locale_from);
1770 return 2;
1771 }
1772 pcretables = pcre_maketables();
1773 }
1774
1775 /* Sort out colouring */
1776
1777 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1778 {
1779 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1780 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1781 else
1782 {
1783 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1784 colour_option);
1785 return 2;
1786 }
1787 if (do_colour)
1788 {
1789 char *cs = getenv("PCREGREP_COLOUR");
1790 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1791 if (cs != NULL) colour_string = cs;
1792 }
1793 }
1794
1795 /* Interpret the newline type; the default settings are Unix-like. */
1796
1797 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1798 {
1799 pcre_options |= PCRE_NEWLINE_CR;
1800 endlinetype = EL_CR;
1801 }
1802 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1803 {
1804 pcre_options |= PCRE_NEWLINE_LF;
1805 endlinetype = EL_LF;
1806 }
1807 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1808 {
1809 pcre_options |= PCRE_NEWLINE_CRLF;
1810 endlinetype = EL_CRLF;
1811 }
1812 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1813 {
1814 pcre_options |= PCRE_NEWLINE_ANY;
1815 endlinetype = EL_ANY;
1816 }
1817 else
1818 {
1819 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1820 return 2;
1821 }
1822
1823 /* Interpret the text values for -d and -D */
1824
1825 if (dee_option != NULL)
1826 {
1827 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1828 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1829 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1830 else
1831 {
1832 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1833 return 2;
1834 }
1835 }
1836
1837 if (DEE_option != NULL)
1838 {
1839 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1840 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1841 else
1842 {
1843 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1844 return 2;
1845 }
1846 }
1847
1848 /* Check the values for Jeffrey Friedl's debugging options. */
1849
1850 #ifdef JFRIEDL_DEBUG
1851 if (S_arg > 9)
1852 {
1853 fprintf(stderr, "pcregrep: bad value for -S option\n");
1854 return 2;
1855 }
1856 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1857 {
1858 if (jfriedl_XT == 0) jfriedl_XT = 1;
1859 if (jfriedl_XR == 0) jfriedl_XR = 1;
1860 }
1861 #endif
1862
1863 /* Get memory to store the pattern and hints lists. */
1864
1865 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1866 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1867
1868 if (pattern_list == NULL || hints_list == NULL)
1869 {
1870 fprintf(stderr, "pcregrep: malloc failed\n");
1871 goto EXIT2;
1872 }
1873
1874 /* If no patterns were provided by -e, and there is no file provided by -f,
1875 the first argument is the one and only pattern, and it must exist. */
1876
1877 if (cmd_pattern_count == 0 && pattern_filename == NULL)
1878 {
1879 if (i >= argc) return usage(2);
1880 patterns[cmd_pattern_count++] = argv[i++];
1881 }
1882
1883 /* Compile the patterns that were provided on the command line, either by
1884 multiple uses of -e or as a single unkeyed pattern. */
1885
1886 for (j = 0; j < cmd_pattern_count; j++)
1887 {
1888 if (!compile_pattern(patterns[j], pcre_options, NULL,
1889 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1890 goto EXIT2;
1891 }
1892
1893 /* Compile the regular expressions that are provided in a file. */
1894
1895 if (pattern_filename != NULL)
1896 {
1897 int linenumber = 0;
1898 FILE *f;
1899 char *filename;
1900 char buffer[MBUFTHIRD];
1901
1902 if (strcmp(pattern_filename, "-") == 0)
1903 {
1904 f = stdin;
1905 filename = stdin_name;
1906 }
1907 else
1908 {
1909 f = fopen(pattern_filename, "r");
1910 if (f == NULL)
1911 {
1912 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1913 strerror(errno));
1914 goto EXIT2;
1915 }
1916 filename = pattern_filename;
1917 }
1918
1919 while (fgets(buffer, MBUFTHIRD, f) != NULL)
1920 {
1921 char *s = buffer + (int)strlen(buffer);
1922 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1923 *s = 0;
1924 linenumber++;
1925 if (buffer[0] == 0) continue; /* Skip blank lines */
1926 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1927 goto EXIT2;
1928 }
1929
1930 if (f != stdin) fclose(f);
1931 }
1932
1933 /* Study the regular expressions, as we will be running them many times */
1934
1935 for (j = 0; j < pattern_count; j++)
1936 {
1937 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1938 if (error != NULL)
1939 {
1940 char s[16];
1941 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1942 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1943 goto EXIT2;
1944 }
1945 }
1946
1947 /* If there are include or exclude patterns, compile them. */
1948
1949 if (exclude_pattern != NULL)
1950 {
1951 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1952 pcretables);
1953 if (exclude_compiled == NULL)
1954 {
1955 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1956 errptr, error);
1957 goto EXIT2;
1958 }
1959 }
1960
1961 if (include_pattern != NULL)
1962 {
1963 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1964 pcretables);
1965 if (include_compiled == NULL)
1966 {
1967 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1968 errptr, error);
1969 goto EXIT2;
1970 }
1971 }
1972
1973 /* If there are no further arguments, do the business on stdin and exit. */
1974
1975 if (i >= argc)
1976 {
1977 rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1978 goto EXIT;
1979 }
1980
1981 /* Otherwise, work through the remaining arguments as files or directories.
1982 Pass in the fact that there is only one argument at top level - this suppresses
1983 the file name if the argument is not a directory and filenames are not
1984 otherwise forced. */
1985
1986 only_one_at_top = i == argc - 1; /* Catch initial value of i */
1987
1988 for (; i < argc; i++)
1989 {
1990 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1991 only_one_at_top);
1992 if (frc > 1) rc = frc;
1993 else if (frc == 0 && rc == 1) rc = 0;
1994 }
1995
1996 EXIT:
1997 if (pattern_list != NULL)
1998 {
1999 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2000 free(pattern_list);
2001 }
2002 if (hints_list != NULL)
2003 {
2004 for (i = 0; i < pattern_count; i++) free(hints_list[i]);
2005 free(hints_list);
2006 }
2007 return rc;
2008
2009 EXIT2:
2010 rc = 2;
2011 goto EXIT;
2012 }
2013
2014 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12