/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 96 - (show annotations) (download)
Fri Mar 2 13:10:43 2007 UTC (7 years, 8 months ago) by nigel
File MIME type: text/plain
File size: 57059 byte(s)
 r6896@hex:  nm | 2007-03-02 13:09:14 +0000
 Added EOL and keywork properties throughout

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2006 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #include <ctype.h>
41 #include <locale.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <errno.h>
46
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 #include <unistd.h>
50
51 #include "config.h"
52 #include "pcre.h"
53
54 #define FALSE 0
55 #define TRUE 1
56
57 typedef int BOOL;
58
59 #define VERSION "4.4 29-Nov-2006"
60 #define MAX_PATTERN_COUNT 100
61
62 #if BUFSIZ > 8192
63 #define MBUFTHIRD BUFSIZ
64 #else
65 #define MBUFTHIRD 8192
66 #endif
67
68 /* Values for the "filenames" variable, which specifies options for file name
69 output. The order is important; it is assumed that a file name is wanted for
70 all values greater than FN_DEFAULT. */
71
72 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
73
74 /* Actions for the -d and -D options */
75
76 enum { dee_READ, dee_SKIP, dee_RECURSE };
77 enum { DEE_READ, DEE_SKIP };
78
79 /* Actions for special processing options (flag bits) */
80
81 #define PO_WORD_MATCH 0x0001
82 #define PO_LINE_MATCH 0x0002
83 #define PO_FIXED_STRINGS 0x0004
84
85 /* Line ending types */
86
87 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };
88
89
90
91 /*************************************************
92 * Global variables *
93 *************************************************/
94
95 /* Jeffrey Friedl has some debugging requirements that are not part of the
96 regular code. */
97
98 #ifdef JFRIEDL_DEBUG
99 static int S_arg = -1;
100 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
101 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
102 static const char *jfriedl_prefix = "";
103 static const char *jfriedl_postfix = "";
104 #endif
105
106 static int endlinetype;
107
108 static char *colour_string = (char *)"1;31";
109 static char *colour_option = NULL;
110 static char *dee_option = NULL;
111 static char *DEE_option = NULL;
112 static char *newline = NULL;
113 static char *pattern_filename = NULL;
114 static char *stdin_name = (char *)"(standard input)";
115 static char *locale = NULL;
116
117 static const unsigned char *pcretables = NULL;
118
119 static int pattern_count = 0;
120 static pcre **pattern_list;
121 static pcre_extra **hints_list;
122
123 static char *include_pattern = NULL;
124 static char *exclude_pattern = NULL;
125
126 static pcre *include_compiled = NULL;
127 static pcre *exclude_compiled = NULL;
128
129 static int after_context = 0;
130 static int before_context = 0;
131 static int both_context = 0;
132 static int dee_action = dee_READ;
133 static int DEE_action = DEE_READ;
134 static int error_count = 0;
135 static int filenames = FN_DEFAULT;
136 static int process_options = 0;
137
138 static BOOL count_only = FALSE;
139 static BOOL do_colour = FALSE;
140 static BOOL hyphenpending = FALSE;
141 static BOOL invert = FALSE;
142 static BOOL multiline = FALSE;
143 static BOOL number = FALSE;
144 static BOOL only_matching = FALSE;
145 static BOOL quiet = FALSE;
146 static BOOL silent = FALSE;
147 static BOOL utf8 = FALSE;
148
149 /* Structure for options and list of them */
150
151 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
152 OP_PATLIST };
153
154 typedef struct option_item {
155 int type;
156 int one_char;
157 void *dataptr;
158 const char *long_name;
159 const char *help_text;
160 } option_item;
161
162 /* Options without a single-letter equivalent get a negative value. This can be
163 used to identify them. */
164
165 #define N_COLOUR (-1)
166 #define N_EXCLUDE (-2)
167 #define N_HELP (-3)
168 #define N_INCLUDE (-4)
169 #define N_LABEL (-5)
170 #define N_LOCALE (-6)
171 #define N_NULL (-7)
172
173 static option_item optionlist[] = {
174 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
175 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
176 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
177 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
178 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
179 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
180 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
181 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
182 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
183 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
184 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
185 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
186 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
187 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
188 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
189 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
190 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
191 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
192 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
193 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
194 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
195 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LR, CRLF)" },
196 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
197 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
198 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
199 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
200 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
201 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
202 #ifdef JFRIEDL_DEBUG
203 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
204 #endif
205 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
206 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
207 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
208 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
209 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
210 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
211 { OP_NODATA, 0, NULL, NULL, NULL }
212 };
213
214 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
215 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
216 that the combination of -w and -x has the same effect as -x on its own, so we
217 can treat them as the same. */
218
219 static const char *prefix[] = {
220 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
221
222 static const char *suffix[] = {
223 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
224
225 /* UTF-8 tables - used only when the newline setting is "all". */
226
227 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
228
229 const char utf8_table4[] = {
230 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
231 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
232 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
233 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
234
235
236
237 /*************************************************
238 * OS-specific functions *
239 *************************************************/
240
241 /* These functions are defined so that they can be made system specific,
242 although at present the only ones are for Unix, Win32, and for "no support". */
243
244
245 /************* Directory scanning in Unix ***********/
246
247 #if IS_UNIX
248 #include <sys/types.h>
249 #include <sys/stat.h>
250 #include <dirent.h>
251
252 typedef DIR directory_type;
253
254 static int
255 isdirectory(char *filename)
256 {
257 struct stat statbuf;
258 if (stat(filename, &statbuf) < 0)
259 return 0; /* In the expectation that opening as a file will fail */
260 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
261 }
262
263 static directory_type *
264 opendirectory(char *filename)
265 {
266 return opendir(filename);
267 }
268
269 static char *
270 readdirectory(directory_type *dir)
271 {
272 for (;;)
273 {
274 struct dirent *dent = readdir(dir);
275 if (dent == NULL) return NULL;
276 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
277 return dent->d_name;
278 }
279 return NULL; /* Keep compiler happy; never executed */
280 }
281
282 static void
283 closedirectory(directory_type *dir)
284 {
285 closedir(dir);
286 }
287
288
289 /************* Test for regular file in Unix **********/
290
291 static int
292 isregfile(char *filename)
293 {
294 struct stat statbuf;
295 if (stat(filename, &statbuf) < 0)
296 return 1; /* In the expectation that opening as a file will fail */
297 return (statbuf.st_mode & S_IFMT) == S_IFREG;
298 }
299
300
301 /************* Test stdout for being a terminal in Unix **********/
302
303 static BOOL
304 is_stdout_tty(void)
305 {
306 return isatty(fileno(stdout));
307 }
308
309
310 /************* Directory scanning in Win32 ***********/
311
312 /* I (Philip Hazel) have no means of testing this code. It was contributed by
313 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
314 when it did not exist. */
315
316
317 #elif HAVE_WIN32API
318
319 #ifndef STRICT
320 # define STRICT
321 #endif
322 #ifndef WIN32_LEAN_AND_MEAN
323 # define WIN32_LEAN_AND_MEAN
324 #endif
325 #ifndef INVALID_FILE_ATTRIBUTES
326 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
327 #endif
328
329 #include <windows.h>
330
331 typedef struct directory_type
332 {
333 HANDLE handle;
334 BOOL first;
335 WIN32_FIND_DATA data;
336 } directory_type;
337
338 int
339 isdirectory(char *filename)
340 {
341 DWORD attr = GetFileAttributes(filename);
342 if (attr == INVALID_FILE_ATTRIBUTES)
343 return 0;
344 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
345 }
346
347 directory_type *
348 opendirectory(char *filename)
349 {
350 size_t len;
351 char *pattern;
352 directory_type *dir;
353 DWORD err;
354 len = strlen(filename);
355 pattern = (char *) malloc(len + 3);
356 dir = (directory_type *) malloc(sizeof(*dir));
357 if ((pattern == NULL) || (dir == NULL))
358 {
359 fprintf(stderr, "pcregrep: malloc failed\n");
360 exit(2);
361 }
362 memcpy(pattern, filename, len);
363 memcpy(&(pattern[len]), "\\*", 3);
364 dir->handle = FindFirstFile(pattern, &(dir->data));
365 if (dir->handle != INVALID_HANDLE_VALUE)
366 {
367 free(pattern);
368 dir->first = TRUE;
369 return dir;
370 }
371 err = GetLastError();
372 free(pattern);
373 free(dir);
374 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
375 return NULL;
376 }
377
378 char *
379 readdirectory(directory_type *dir)
380 {
381 for (;;)
382 {
383 if (!dir->first)
384 {
385 if (!FindNextFile(dir->handle, &(dir->data)))
386 return NULL;
387 }
388 else
389 {
390 dir->first = FALSE;
391 }
392 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
393 return dir->data.cFileName;
394 }
395 #ifndef _MSC_VER
396 return NULL; /* Keep compiler happy; never executed */
397 #endif
398 }
399
400 void
401 closedirectory(directory_type *dir)
402 {
403 FindClose(dir->handle);
404 free(dir);
405 }
406
407
408 /************* Test for regular file in Win32 **********/
409
410 /* I don't know how to do this, or if it can be done; assume all paths are
411 regular if they are not directories. */
412
413 int isregfile(char *filename)
414 {
415 return !isdirectory(filename)
416 }
417
418
419 /************* Test stdout for being a terminal in Win32 **********/
420
421 /* I don't know how to do this; assume never */
422
423 static BOOL
424 is_stdout_tty(void)
425 {
426 FALSE;
427 }
428
429
430 /************* Directory scanning when we can't do it ***********/
431
432 /* The type is void, and apart from isdirectory(), the functions do nothing. */
433
434 #else
435
436 typedef void directory_type;
437
438 int isdirectory(char *filename) { return 0; }
439 directory_type * opendirectory(char *filename) {}
440 char *readdirectory(directory_type *dir) {}
441 void closedirectory(directory_type *dir) {}
442
443
444 /************* Test for regular when we can't do it **********/
445
446 /* Assume all files are regular. */
447
448 int isregfile(char *filename) { return 1; }
449
450
451 /************* Test stdout for being a terminal when we can't do it **********/
452
453 static BOOL
454 is_stdout_tty(void)
455 {
456 return FALSE;
457 }
458
459
460 #endif
461
462
463
464 #if ! HAVE_STRERROR
465 /*************************************************
466 * Provide strerror() for non-ANSI libraries *
467 *************************************************/
468
469 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
470 in their libraries, but can provide the same facility by this simple
471 alternative function. */
472
473 extern int sys_nerr;
474 extern char *sys_errlist[];
475
476 char *
477 strerror(int n)
478 {
479 if (n < 0 || n >= sys_nerr) return "unknown error number";
480 return sys_errlist[n];
481 }
482 #endif /* HAVE_STRERROR */
483
484
485
486 /*************************************************
487 * Find end of line *
488 *************************************************/
489
490 /* The length of the endline sequence that is found is set via lenptr. This may
491 be zero at the very end of the file if there is no line-ending sequence there.
492
493 Arguments:
494 p current position in line
495 endptr end of available data
496 lenptr where to put the length of the eol sequence
497
498 Returns: pointer to the last byte of the line
499 */
500
501 static char *
502 end_of_line(char *p, char *endptr, int *lenptr)
503 {
504 switch(endlinetype)
505 {
506 default: /* Just in case */
507 case EL_LF:
508 while (p < endptr && *p != '\n') p++;
509 if (p < endptr)
510 {
511 *lenptr = 1;
512 return p + 1;
513 }
514 *lenptr = 0;
515 return endptr;
516
517 case EL_CR:
518 while (p < endptr && *p != '\r') p++;
519 if (p < endptr)
520 {
521 *lenptr = 1;
522 return p + 1;
523 }
524 *lenptr = 0;
525 return endptr;
526
527 case EL_CRLF:
528 for (;;)
529 {
530 while (p < endptr && *p != '\r') p++;
531 if (++p >= endptr)
532 {
533 *lenptr = 0;
534 return endptr;
535 }
536 if (*p == '\n')
537 {
538 *lenptr = 2;
539 return p + 1;
540 }
541 }
542 break;
543
544 case EL_ANY:
545 while (p < endptr)
546 {
547 int extra = 0;
548 register int c = *((unsigned char *)p);
549
550 if (utf8 && c >= 0xc0)
551 {
552 int gcii, gcss;
553 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
554 gcss = 6*extra;
555 c = (c & utf8_table3[extra]) << gcss;
556 for (gcii = 1; gcii <= extra; gcii++)
557 {
558 gcss -= 6;
559 c |= (p[gcii] & 0x3f) << gcss;
560 }
561 }
562
563 p += 1 + extra;
564
565 switch (c)
566 {
567 case 0x0a: /* LF */
568 case 0x0b: /* VT */
569 case 0x0c: /* FF */
570 *lenptr = 1;
571 return p;
572
573 case 0x0d: /* CR */
574 if (p < endptr && *p == 0x0a)
575 {
576 *lenptr = 2;
577 p++;
578 }
579 else *lenptr = 1;
580 return p;
581
582 case 0x85: /* NEL */
583 *lenptr = utf8? 2 : 1;
584 return p;
585
586 case 0x2028: /* LS */
587 case 0x2029: /* PS */
588 *lenptr = 3;
589 return p;
590
591 default:
592 break;
593 }
594 } /* End of loop for ANY case */
595
596 *lenptr = 0; /* Must have hit the end */
597 return endptr;
598 } /* End of overall switch */
599 }
600
601
602
603 /*************************************************
604 * Find start of previous line *
605 *************************************************/
606
607 /* This is called when looking back for before lines to print.
608
609 Arguments:
610 p start of the subsequent line
611 startptr start of available data
612
613 Returns: pointer to the start of the previous line
614 */
615
616 static char *
617 previous_line(char *p, char *startptr)
618 {
619 switch(endlinetype)
620 {
621 default: /* Just in case */
622 case EL_LF:
623 p--;
624 while (p > startptr && p[-1] != '\n') p--;
625 return p;
626
627 case EL_CR:
628 p--;
629 while (p > startptr && p[-1] != '\n') p--;
630 return p;
631
632 case EL_CRLF:
633 for (;;)
634 {
635 p -= 2;
636 while (p > startptr && p[-1] != '\n') p--;
637 if (p <= startptr + 1 || p[-2] == '\r') return p;
638 }
639 return p; /* But control should never get here */
640
641 case EL_ANY:
642 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
643 if (utf8) while ((*p & 0xc0) == 0x80) p--;
644
645 while (p > startptr)
646 {
647 register int c;
648 char *pp = p - 1;
649
650 if (utf8)
651 {
652 int extra = 0;
653 while ((*pp & 0xc0) == 0x80) pp--;
654 c = *((unsigned char *)pp);
655 if (c >= 0xc0)
656 {
657 int gcii, gcss;
658 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
659 gcss = 6*extra;
660 c = (c & utf8_table3[extra]) << gcss;
661 for (gcii = 1; gcii <= extra; gcii++)
662 {
663 gcss -= 6;
664 c |= (pp[gcii] & 0x3f) << gcss;
665 }
666 }
667 }
668 else c = *((unsigned char *)pp);
669
670 switch (c)
671 {
672 case 0x0a: /* LF */
673 case 0x0b: /* VT */
674 case 0x0c: /* FF */
675 case 0x0d: /* CR */
676 case 0x85: /* NEL */
677 case 0x2028: /* LS */
678 case 0x2029: /* PS */
679 return p;
680
681 default:
682 break;
683 }
684
685 p = pp; /* Back one character */
686 } /* End of loop for ANY case */
687
688 return startptr; /* Hit start of data */
689 } /* End of overall switch */
690 }
691
692
693
694
695
696 /*************************************************
697 * Print the previous "after" lines *
698 *************************************************/
699
700 /* This is called if we are about to lose said lines because of buffer filling,
701 and at the end of the file. The data in the line is written using fwrite() so
702 that a binary zero does not terminate it.
703
704 Arguments:
705 lastmatchnumber the number of the last matching line, plus one
706 lastmatchrestart where we restarted after the last match
707 endptr end of available data
708 printname filename for printing
709
710 Returns: nothing
711 */
712
713 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
714 char *endptr, char *printname)
715 {
716 if (after_context > 0 && lastmatchnumber > 0)
717 {
718 int count = 0;
719 while (lastmatchrestart < endptr && count++ < after_context)
720 {
721 int ellength;
722 char *pp = lastmatchrestart;
723 if (printname != NULL) fprintf(stdout, "%s-", printname);
724 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
725 pp = end_of_line(pp, endptr, &ellength);
726 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
727 lastmatchrestart = pp;
728 }
729 hyphenpending = TRUE;
730 }
731 }
732
733
734
735 /*************************************************
736 * Grep an individual file *
737 *************************************************/
738
739 /* This is called from grep_or_recurse() below. It uses a buffer that is three
740 times the value of MBUFTHIRD. The matching point is never allowed to stray into
741 the top third of the buffer, thus keeping more of the file available for
742 context printing or for multiline scanning. For large files, the pointer will
743 be in the middle third most of the time, so the bottom third is available for
744 "before" context printing.
745
746 Arguments:
747 in the fopened FILE stream
748 printname the file name if it is to be printed for each match
749 or NULL if the file name is not to be printed
750 it cannot be NULL if filenames[_nomatch]_only is set
751
752 Returns: 0 if there was at least one match
753 1 otherwise (no matches)
754 */
755
756 static int
757 pcregrep(FILE *in, char *printname)
758 {
759 int rc = 1;
760 int linenumber = 1;
761 int lastmatchnumber = 0;
762 int count = 0;
763 int offsets[99];
764 char *lastmatchrestart = NULL;
765 char buffer[3*MBUFTHIRD];
766 char *ptr = buffer;
767 char *endptr;
768 size_t bufflength;
769 BOOL endhyphenpending = FALSE;
770
771 /* Do the first read into the start of the buffer and set up the pointer to
772 end of what we have. */
773
774 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
775 endptr = buffer + bufflength;
776
777 /* Loop while the current pointer is not at the end of the file. For large
778 files, endptr will be at the end of the buffer when we are in the middle of the
779 file, but ptr will never get there, because as soon as it gets over 2/3 of the
780 way, the buffer is shifted left and re-filled. */
781
782 while (ptr < endptr)
783 {
784 int i, endlinelength;
785 int mrc = 0;
786 BOOL match = FALSE;
787 char *t = ptr;
788 size_t length, linelength;
789
790 /* At this point, ptr is at the start of a line. We need to find the length
791 of the subject string to pass to pcre_exec(). In multiline mode, it is the
792 length remainder of the data in the buffer. Otherwise, it is the length of
793 the next line. After matching, we always advance by the length of the next
794 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
795 that any match is constrained to be in the first line. */
796
797 t = end_of_line(t, endptr, &endlinelength);
798 linelength = t - ptr - endlinelength;
799 length = multiline? endptr - ptr : linelength;
800
801 /* Extra processing for Jeffrey Friedl's debugging. */
802
803 #ifdef JFRIEDL_DEBUG
804 if (jfriedl_XT || jfriedl_XR)
805 {
806 #include <sys/time.h>
807 #include <time.h>
808 struct timeval start_time, end_time;
809 struct timezone dummy;
810
811 if (jfriedl_XT)
812 {
813 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
814 const char *orig = ptr;
815 ptr = malloc(newlen + 1);
816 if (!ptr) {
817 printf("out of memory");
818 exit(2);
819 }
820 endptr = ptr;
821 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
822 for (i = 0; i < jfriedl_XT; i++) {
823 strncpy(endptr, orig, length);
824 endptr += length;
825 }
826 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
827 length = newlen;
828 }
829
830 if (gettimeofday(&start_time, &dummy) != 0)
831 perror("bad gettimeofday");
832
833
834 for (i = 0; i < jfriedl_XR; i++)
835 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
836
837 if (gettimeofday(&end_time, &dummy) != 0)
838 perror("bad gettimeofday");
839
840 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
841 -
842 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
843
844 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
845 return 0;
846 }
847 #endif
848
849
850 /* Run through all the patterns until one matches. Note that we don't include
851 the final newline in the subject string. */
852
853 for (i = 0; i < pattern_count; i++)
854 {
855 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
856 offsets, 99);
857 if (mrc >= 0) { match = TRUE; break; }
858 if (mrc != PCRE_ERROR_NOMATCH)
859 {
860 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
861 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
862 fprintf(stderr, "this line:\n");
863 fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
864 fprintf(stderr, "\n");
865 if (error_count == 0 &&
866 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
867 {
868 fprintf(stderr, "pcregrep: error %d means that a resource limit "
869 "was exceeded\n", mrc);
870 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
871 }
872 if (error_count++ > 20)
873 {
874 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
875 exit(2);
876 }
877 match = invert; /* No more matching; don't show the line again */
878 break;
879 }
880 }
881
882 /* If it's a match or a not-match (as required), do what's wanted. */
883
884 if (match != invert)
885 {
886 BOOL hyphenprinted = FALSE;
887
888 /* We've failed if we want a file that doesn't have any matches. */
889
890 if (filenames == FN_NOMATCH_ONLY) return 1;
891
892 /* Just count if just counting is wanted. */
893
894 if (count_only) count++;
895
896 /* If all we want is a file name, there is no need to scan any more lines
897 in the file. */
898
899 else if (filenames == FN_ONLY)
900 {
901 fprintf(stdout, "%s\n", printname);
902 return 0;
903 }
904
905 /* Likewise, if all we want is a yes/no answer. */
906
907 else if (quiet) return 0;
908
909 /* The --only-matching option prints just the substring that matched, and
910 does not pring any context. */
911
912 else if (only_matching)
913 {
914 if (printname != NULL) fprintf(stdout, "%s:", printname);
915 if (number) fprintf(stdout, "%d:", linenumber);
916 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
917 fprintf(stdout, "\n");
918 }
919
920 /* This is the default case when none of the above options is set. We print
921 the matching lines(s), possibly preceded and/or followed by other lines of
922 context. */
923
924 else
925 {
926 /* See if there is a requirement to print some "after" lines from a
927 previous match. We never print any overlaps. */
928
929 if (after_context > 0 && lastmatchnumber > 0)
930 {
931 int ellength;
932 int linecount = 0;
933 char *p = lastmatchrestart;
934
935 while (p < ptr && linecount < after_context)
936 {
937 p = end_of_line(p, ptr, &ellength);
938 linecount++;
939 }
940
941 /* It is important to advance lastmatchrestart during this printing so
942 that it interacts correctly with any "before" printing below. Print
943 each line's data using fwrite() in case there are binary zeroes. */
944
945 while (lastmatchrestart < p)
946 {
947 char *pp = lastmatchrestart;
948 if (printname != NULL) fprintf(stdout, "%s-", printname);
949 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
950 pp = end_of_line(pp, endptr, &ellength);
951 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
952 lastmatchrestart = pp;
953 }
954 if (lastmatchrestart != ptr) hyphenpending = TRUE;
955 }
956
957 /* If there were non-contiguous lines printed above, insert hyphens. */
958
959 if (hyphenpending)
960 {
961 fprintf(stdout, "--\n");
962 hyphenpending = FALSE;
963 hyphenprinted = TRUE;
964 }
965
966 /* See if there is a requirement to print some "before" lines for this
967 match. Again, don't print overlaps. */
968
969 if (before_context > 0)
970 {
971 int linecount = 0;
972 char *p = ptr;
973
974 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
975 linecount < before_context)
976 {
977 linecount++;
978 p = previous_line(p, buffer);
979 }
980
981 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
982 fprintf(stdout, "--\n");
983
984 while (p < ptr)
985 {
986 int ellength;
987 char *pp = p;
988 if (printname != NULL) fprintf(stdout, "%s-", printname);
989 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
990 pp = end_of_line(pp, endptr, &ellength);
991 fwrite(p, 1, pp - p, stdout);
992 p = pp;
993 }
994 }
995
996 /* Now print the matching line(s); ensure we set hyphenpending at the end
997 of the file if any context lines are being output. */
998
999 if (after_context > 0 || before_context > 0)
1000 endhyphenpending = TRUE;
1001
1002 if (printname != NULL) fprintf(stdout, "%s:", printname);
1003 if (number) fprintf(stdout, "%d:", linenumber);
1004
1005 /* In multiline mode, we want to print to the end of the line in which
1006 the end of the matched string is found, so we adjust linelength and the
1007 line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1008 start of the match will always be before the first newline sequence. */
1009
1010 if (multiline)
1011 {
1012 int ellength;
1013 char *endmatch = ptr + offsets[1];
1014 t = ptr;
1015 while (t < endmatch)
1016 {
1017 t = end_of_line(t, endptr, &ellength);
1018 if (t <= endmatch) linenumber++; else break;
1019 }
1020 endmatch = end_of_line(endmatch, endptr, &ellength);
1021 linelength = endmatch - ptr - ellength;
1022 }
1023
1024 /*** NOTE: Use only fwrite() to output the data line, so that binary
1025 zeroes are treated as just another data character. */
1026
1027 /* This extra option, for Jeffrey Friedl's debugging requirements,
1028 replaces the matched string, or a specific captured string if it exists,
1029 with X. When this happens, colouring is ignored. */
1030
1031 #ifdef JFRIEDL_DEBUG
1032 if (S_arg >= 0 && S_arg < mrc)
1033 {
1034 int first = S_arg * 2;
1035 int last = first + 1;
1036 fwrite(ptr, 1, offsets[first], stdout);
1037 fprintf(stdout, "X");
1038 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1039 }
1040 else
1041 #endif
1042
1043 /* We have to split the line(s) up if colouring. */
1044
1045 if (do_colour)
1046 {
1047 fwrite(ptr, 1, offsets[0], stdout);
1048 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1049 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1050 fprintf(stdout, "%c[00m", 0x1b);
1051 fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1052 }
1053 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1054 }
1055
1056 /* End of doing what has to be done for a match */
1057
1058 rc = 0; /* Had some success */
1059
1060 /* Remember where the last match happened for after_context. We remember
1061 where we are about to restart, and that line's number. */
1062
1063 lastmatchrestart = ptr + linelength + endlinelength;
1064 lastmatchnumber = linenumber + 1;
1065 }
1066
1067 /* Advance to after the newline and increment the line number. */
1068
1069 ptr += linelength + endlinelength;
1070 linenumber++;
1071
1072 /* If we haven't yet reached the end of the file (the buffer is full), and
1073 the current point is in the top 1/3 of the buffer, slide the buffer down by
1074 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1075 about to be lost, print them. */
1076
1077 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1078 {
1079 if (after_context > 0 &&
1080 lastmatchnumber > 0 &&
1081 lastmatchrestart < buffer + MBUFTHIRD)
1082 {
1083 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1084 lastmatchnumber = 0;
1085 }
1086
1087 /* Now do the shuffle */
1088
1089 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1090 ptr -= MBUFTHIRD;
1091 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1092 endptr = buffer + bufflength;
1093
1094 /* Adjust any last match point */
1095
1096 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1097 }
1098 } /* Loop through the whole file */
1099
1100 /* End of file; print final "after" lines if wanted; do_after_lines sets
1101 hyphenpending if it prints something. */
1102
1103 if (!only_matching && !count_only)
1104 {
1105 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1106 hyphenpending |= endhyphenpending;
1107 }
1108
1109 /* Print the file name if we are looking for those without matches and there
1110 were none. If we found a match, we won't have got this far. */
1111
1112 if (filenames == FN_NOMATCH_ONLY)
1113 {
1114 fprintf(stdout, "%s\n", printname);
1115 return 0;
1116 }
1117
1118 /* Print the match count if wanted */
1119
1120 if (count_only)
1121 {
1122 if (printname != NULL) fprintf(stdout, "%s:", printname);
1123 fprintf(stdout, "%d\n", count);
1124 }
1125
1126 return rc;
1127 }
1128
1129
1130
1131 /*************************************************
1132 * Grep a file or recurse into a directory *
1133 *************************************************/
1134
1135 /* Given a path name, if it's a directory, scan all the files if we are
1136 recursing; if it's a file, grep it.
1137
1138 Arguments:
1139 pathname the path to investigate
1140 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1141 only_one_at_top TRUE if the path is the only one at toplevel
1142
1143 Returns: 0 if there was at least one match
1144 1 if there were no matches
1145 2 there was some kind of error
1146
1147 However, file opening failures are suppressed if "silent" is set.
1148 */
1149
1150 static int
1151 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1152 {
1153 int rc = 1;
1154 int sep;
1155 FILE *in;
1156
1157 /* If the file name is "-" we scan stdin */
1158
1159 if (strcmp(pathname, "-") == 0)
1160 {
1161 return pcregrep(stdin,
1162 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1163 stdin_name : NULL);
1164 }
1165
1166
1167 /* If the file is a directory, skip if skipping or if we are recursing, scan
1168 each file within it, subject to any include or exclude patterns that were set.
1169 The scanning code is localized so it can be made system-specific. */
1170
1171 if ((sep = isdirectory(pathname)) != 0)
1172 {
1173 if (dee_action == dee_SKIP) return 1;
1174 if (dee_action == dee_RECURSE)
1175 {
1176 char buffer[1024];
1177 char *nextfile;
1178 directory_type *dir = opendirectory(pathname);
1179
1180 if (dir == NULL)
1181 {
1182 if (!silent)
1183 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1184 strerror(errno));
1185 return 2;
1186 }
1187
1188 while ((nextfile = readdirectory(dir)) != NULL)
1189 {
1190 int frc, blen;
1191 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1192 blen = strlen(buffer);
1193
1194 if (exclude_compiled != NULL &&
1195 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1196 continue;
1197
1198 if (include_compiled != NULL &&
1199 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1200 continue;
1201
1202 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1203 if (frc > 1) rc = frc;
1204 else if (frc == 0 && rc == 1) rc = 0;
1205 }
1206
1207 closedirectory(dir);
1208 return rc;
1209 }
1210 }
1211
1212 /* If the file is not a directory and not a regular file, skip it if that's
1213 been requested. */
1214
1215 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1216
1217 /* Control reaches here if we have a regular file, or if we have a directory
1218 and recursion or skipping was not requested, or if we have anything else and
1219 skipping was not requested. The scan proceeds. If this is the first and only
1220 argument at top level, we don't show the file name, unless we are only showing
1221 the file name, or the filename was forced (-H). */
1222
1223 in = fopen(pathname, "r");
1224 if (in == NULL)
1225 {
1226 if (!silent)
1227 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1228 strerror(errno));
1229 return 2;
1230 }
1231
1232 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1233 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1234
1235 fclose(in);
1236 return rc;
1237 }
1238
1239
1240
1241
1242 /*************************************************
1243 * Usage function *
1244 *************************************************/
1245
1246 static int
1247 usage(int rc)
1248 {
1249 option_item *op;
1250 fprintf(stderr, "Usage: pcregrep [-");
1251 for (op = optionlist; op->one_char != 0; op++)
1252 {
1253 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1254 }
1255 fprintf(stderr, "] [long options] [pattern] [files]\n");
1256 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1257 return rc;
1258 }
1259
1260
1261
1262
1263 /*************************************************
1264 * Help function *
1265 *************************************************/
1266
1267 static void
1268 help(void)
1269 {
1270 option_item *op;
1271
1272 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1273 printf("Search for PATTERN in each FILE or standard input.\n");
1274 printf("PATTERN must be present if neither -e nor -f is used.\n");
1275 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1276 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1277
1278 printf("Options:\n");
1279
1280 for (op = optionlist; op->one_char != 0; op++)
1281 {
1282 int n;
1283 char s[4];
1284 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1285 printf(" %s --%s%n", s, op->long_name, &n);
1286 n = 30 - n;
1287 if (n < 1) n = 1;
1288 printf("%.*s%s\n", n, " ", op->help_text);
1289 }
1290
1291 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1292 printf("trailing white space is removed and blank lines are ignored.\n");
1293 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1294
1295 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1296 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1297 }
1298
1299
1300
1301
1302 /*************************************************
1303 * Handle a single-letter, no data option *
1304 *************************************************/
1305
1306 static int
1307 handle_option(int letter, int options)
1308 {
1309 switch(letter)
1310 {
1311 case N_HELP: help(); exit(0);
1312 case 'c': count_only = TRUE; break;
1313 case 'F': process_options |= PO_FIXED_STRINGS; break;
1314 case 'H': filenames = FN_FORCE; break;
1315 case 'h': filenames = FN_NONE; break;
1316 case 'i': options |= PCRE_CASELESS; break;
1317 case 'l': filenames = FN_ONLY; break;
1318 case 'L': filenames = FN_NOMATCH_ONLY; break;
1319 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1320 case 'n': number = TRUE; break;
1321 case 'o': only_matching = TRUE; break;
1322 case 'q': quiet = TRUE; break;
1323 case 'r': dee_action = dee_RECURSE; break;
1324 case 's': silent = TRUE; break;
1325 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1326 case 'v': invert = TRUE; break;
1327 case 'w': process_options |= PO_WORD_MATCH; break;
1328 case 'x': process_options |= PO_LINE_MATCH; break;
1329
1330 case 'V':
1331 fprintf(stderr, "pcregrep version %s using ", VERSION);
1332 fprintf(stderr, "PCRE version %s\n", pcre_version());
1333 exit(0);
1334 break;
1335
1336 default:
1337 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1338 exit(usage(2));
1339 }
1340
1341 return options;
1342 }
1343
1344
1345
1346
1347 /*************************************************
1348 * Construct printed ordinal *
1349 *************************************************/
1350
1351 /* This turns a number into "1st", "3rd", etc. */
1352
1353 static char *
1354 ordin(int n)
1355 {
1356 static char buffer[8];
1357 char *p = buffer;
1358 sprintf(p, "%d", n);
1359 while (*p != 0) p++;
1360 switch (n%10)
1361 {
1362 case 1: strcpy(p, "st"); break;
1363 case 2: strcpy(p, "nd"); break;
1364 case 3: strcpy(p, "rd"); break;
1365 default: strcpy(p, "th"); break;
1366 }
1367 return buffer;
1368 }
1369
1370
1371
1372 /*************************************************
1373 * Compile a single pattern *
1374 *************************************************/
1375
1376 /* When the -F option has been used, this is called for each substring.
1377 Otherwise it's called for each supplied pattern.
1378
1379 Arguments:
1380 pattern the pattern string
1381 options the PCRE options
1382 filename the file name, or NULL for a command-line pattern
1383 count 0 if this is the only command line pattern, or
1384 number of the command line pattern, or
1385 linenumber for a pattern from a file
1386
1387 Returns: TRUE on success, FALSE after an error
1388 */
1389
1390 static BOOL
1391 compile_single_pattern(char *pattern, int options, char *filename, int count)
1392 {
1393 char buffer[MBUFTHIRD + 16];
1394 const char *error;
1395 int errptr;
1396
1397 if (pattern_count >= MAX_PATTERN_COUNT)
1398 {
1399 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1400 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1401 return FALSE;
1402 }
1403
1404 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1405 suffix[process_options]);
1406 pattern_list[pattern_count] =
1407 pcre_compile(buffer, options, &error, &errptr, pcretables);
1408 if (pattern_list[pattern_count++] != NULL) return TRUE;
1409
1410 /* Handle compile errors */
1411
1412 errptr -= (int)strlen(prefix[process_options]);
1413 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1414
1415 if (filename == NULL)
1416 {
1417 if (count == 0)
1418 fprintf(stderr, "pcregrep: Error in command-line regex "
1419 "at offset %d: %s\n", errptr, error);
1420 else
1421 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1422 "at offset %d: %s\n", ordin(count), errptr, error);
1423 }
1424 else
1425 {
1426 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1427 "at offset %d: %s\n", count, filename, errptr, error);
1428 }
1429
1430 return FALSE;
1431 }
1432
1433
1434
1435 /*************************************************
1436 * Compile one supplied pattern *
1437 *************************************************/
1438
1439 /* When the -F option has been used, each string may be a list of strings,
1440 separated by line breaks. They will be matched literally.
1441
1442 Arguments:
1443 pattern the pattern string
1444 options the PCRE options
1445 filename the file name, or NULL for a command-line pattern
1446 count 0 if this is the only command line pattern, or
1447 number of the command line pattern, or
1448 linenumber for a pattern from a file
1449
1450 Returns: TRUE on success, FALSE after an error
1451 */
1452
1453 static BOOL
1454 compile_pattern(char *pattern, int options, char *filename, int count)
1455 {
1456 if ((process_options & PO_FIXED_STRINGS) != 0)
1457 {
1458 char *eop = pattern + strlen(pattern);
1459 char buffer[MBUFTHIRD];
1460 for(;;)
1461 {
1462 int ellength;
1463 char *p = end_of_line(pattern, eop, &ellength);
1464 if (ellength == 0)
1465 return compile_single_pattern(pattern, options, filename, count);
1466 sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1467 pattern = p;
1468 if (!compile_single_pattern(buffer, options, filename, count))
1469 return FALSE;
1470 }
1471 }
1472 else return compile_single_pattern(pattern, options, filename, count);
1473 }
1474
1475
1476
1477 /*************************************************
1478 * Main program *
1479 *************************************************/
1480
1481 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1482
1483 int
1484 main(int argc, char **argv)
1485 {
1486 int i, j;
1487 int rc = 1;
1488 int pcre_options = 0;
1489 int cmd_pattern_count = 0;
1490 int errptr;
1491 BOOL only_one_at_top;
1492 char *patterns[MAX_PATTERN_COUNT];
1493 const char *locale_from = "--locale";
1494 const char *error;
1495
1496 /* Set the default line ending value from the default in the PCRE library;
1497 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1498 */
1499
1500 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1501 switch(i)
1502 {
1503 default: newline = (char *)"lf"; break;
1504 case '\r': newline = (char *)"cr"; break;
1505 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1506 case -1: newline = (char *)"any"; break;
1507 }
1508
1509 /* Process the options */
1510
1511 for (i = 1; i < argc; i++)
1512 {
1513 option_item *op = NULL;
1514 char *option_data = (char *)""; /* default to keep compiler happy */
1515 BOOL longop;
1516 BOOL longopwasequals = FALSE;
1517
1518 if (argv[i][0] != '-') break;
1519
1520 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1521 but only if we have previously had -e or -f to define the patterns. */
1522
1523 if (argv[i][1] == 0)
1524 {
1525 if (pattern_filename != NULL || pattern_count > 0) break;
1526 else exit(usage(2));
1527 }
1528
1529 /* Handle a long name option, or -- to terminate the options */
1530
1531 if (argv[i][1] == '-')
1532 {
1533 char *arg = argv[i] + 2;
1534 char *argequals = strchr(arg, '=');
1535
1536 if (*arg == 0) /* -- terminates options */
1537 {
1538 i++;
1539 break; /* out of the options-handling loop */
1540 }
1541
1542 longop = TRUE;
1543
1544 /* Some long options have data that follows after =, for example file=name.
1545 Some options have variations in the long name spelling: specifically, we
1546 allow "regexp" because GNU grep allows it, though I personally go along
1547 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1548 These options are entered in the table as "regex(p)". No option is in both
1549 these categories, fortunately. */
1550
1551 for (op = optionlist; op->one_char != 0; op++)
1552 {
1553 char *opbra = strchr(op->long_name, '(');
1554 char *equals = strchr(op->long_name, '=');
1555 if (opbra == NULL) /* Not a (p) case */
1556 {
1557 if (equals == NULL) /* Not thing=data case */
1558 {
1559 if (strcmp(arg, op->long_name) == 0) break;
1560 }
1561 else /* Special case xxx=data */
1562 {
1563 int oplen = equals - op->long_name;
1564 int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1565 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1566 {
1567 option_data = arg + arglen;
1568 if (*option_data == '=')
1569 {
1570 option_data++;
1571 longopwasequals = TRUE;
1572 }
1573 break;
1574 }
1575 }
1576 }
1577 else /* Special case xxxx(p) */
1578 {
1579 char buff1[24];
1580 char buff2[24];
1581 int baselen = opbra - op->long_name;
1582 sprintf(buff1, "%.*s", baselen, op->long_name);
1583 sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1584 opbra + 1);
1585 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1586 break;
1587 }
1588 }
1589
1590 if (op->one_char == 0)
1591 {
1592 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1593 exit(usage(2));
1594 }
1595 }
1596
1597
1598 /* Jeffrey Friedl's debugging harness uses these additional options which
1599 are not in the right form for putting in the option table because they use
1600 only one hyphen, yet are more than one character long. By putting them
1601 separately here, they will not get displayed as part of the help() output,
1602 but I don't think Jeffrey will care about that. */
1603
1604 #ifdef JFRIEDL_DEBUG
1605 else if (strcmp(argv[i], "-pre") == 0) {
1606 jfriedl_prefix = argv[++i];
1607 continue;
1608 } else if (strcmp(argv[i], "-post") == 0) {
1609 jfriedl_postfix = argv[++i];
1610 continue;
1611 } else if (strcmp(argv[i], "-XT") == 0) {
1612 sscanf(argv[++i], "%d", &jfriedl_XT);
1613 continue;
1614 } else if (strcmp(argv[i], "-XR") == 0) {
1615 sscanf(argv[++i], "%d", &jfriedl_XR);
1616 continue;
1617 }
1618 #endif
1619
1620
1621 /* One-char options; many that have no data may be in a single argument; we
1622 continue till we hit the last one or one that needs data. */
1623
1624 else
1625 {
1626 char *s = argv[i] + 1;
1627 longop = FALSE;
1628 while (*s != 0)
1629 {
1630 for (op = optionlist; op->one_char != 0; op++)
1631 { if (*s == op->one_char) break; }
1632 if (op->one_char == 0)
1633 {
1634 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1635 *s, argv[i]);
1636 exit(usage(2));
1637 }
1638 if (op->type != OP_NODATA || s[1] == 0)
1639 {
1640 option_data = s+1;
1641 break;
1642 }
1643 pcre_options = handle_option(*s++, pcre_options);
1644 }
1645 }
1646
1647 /* At this point we should have op pointing to a matched option. If the type
1648 is NO_DATA, it means that there is no data, and the option might set
1649 something in the PCRE options. */
1650
1651 if (op->type == OP_NODATA)
1652 {
1653 pcre_options = handle_option(op->one_char, pcre_options);
1654 continue;
1655 }
1656
1657 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1658 either has a value or defaults to something. It cannot have data in a
1659 separate item. At the moment, the only such options are "colo(u)r" and
1660 Jeffrey Friedl's special -S debugging option. */
1661
1662 if (*option_data == 0 &&
1663 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1664 {
1665 switch (op->one_char)
1666 {
1667 case N_COLOUR:
1668 colour_option = (char *)"auto";
1669 break;
1670 #ifdef JFRIEDL_DEBUG
1671 case 'S':
1672 S_arg = 0;
1673 break;
1674 #endif
1675 }
1676 continue;
1677 }
1678
1679 /* Otherwise, find the data string for the option. */
1680
1681 if (*option_data == 0)
1682 {
1683 if (i >= argc - 1 || longopwasequals)
1684 {
1685 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1686 exit(usage(2));
1687 }
1688 option_data = argv[++i];
1689 }
1690
1691 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1692 multiple times to create a list of patterns. */
1693
1694 if (op->type == OP_PATLIST)
1695 {
1696 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1697 {
1698 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1699 MAX_PATTERN_COUNT);
1700 return 2;
1701 }
1702 patterns[cmd_pattern_count++] = option_data;
1703 }
1704
1705 /* Otherwise, deal with single string or numeric data values. */
1706
1707 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1708 {
1709 *((char **)op->dataptr) = option_data;
1710 }
1711 else
1712 {
1713 char *endptr;
1714 int n = strtoul(option_data, &endptr, 10);
1715 if (*endptr != 0)
1716 {
1717 if (longop)
1718 {
1719 char *equals = strchr(op->long_name, '=');
1720 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1721 equals - op->long_name;
1722 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1723 option_data, nlen, op->long_name);
1724 }
1725 else
1726 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1727 option_data, op->one_char);
1728 exit(usage(2));
1729 }
1730 *((int *)op->dataptr) = n;
1731 }
1732 }
1733
1734 /* Options have been decoded. If -C was used, its value is used as a default
1735 for -A and -B. */
1736
1737 if (both_context > 0)
1738 {
1739 if (after_context == 0) after_context = both_context;
1740 if (before_context == 0) before_context = both_context;
1741 }
1742
1743 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1744 LC_ALL environment variable is set, and if so, use it. */
1745
1746 if (locale == NULL)
1747 {
1748 locale = getenv("LC_ALL");
1749 locale_from = "LCC_ALL";
1750 }
1751
1752 if (locale == NULL)
1753 {
1754 locale = getenv("LC_CTYPE");
1755 locale_from = "LC_CTYPE";
1756 }
1757
1758 /* If a locale has been provided, set it, and generate the tables the PCRE
1759 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1760
1761 if (locale != NULL)
1762 {
1763 if (setlocale(LC_CTYPE, locale) == NULL)
1764 {
1765 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1766 locale, locale_from);
1767 return 2;
1768 }
1769 pcretables = pcre_maketables();
1770 }
1771
1772 /* Sort out colouring */
1773
1774 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1775 {
1776 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1777 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1778 else
1779 {
1780 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1781 colour_option);
1782 return 2;
1783 }
1784 if (do_colour)
1785 {
1786 char *cs = getenv("PCREGREP_COLOUR");
1787 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1788 if (cs != NULL) colour_string = cs;
1789 }
1790 }
1791
1792 /* Interpret the newline type; the default settings are Unix-like. */
1793
1794 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1795 {
1796 pcre_options |= PCRE_NEWLINE_CR;
1797 endlinetype = EL_CR;
1798 }
1799 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1800 {
1801 pcre_options |= PCRE_NEWLINE_LF;
1802 endlinetype = EL_LF;
1803 }
1804 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1805 {
1806 pcre_options |= PCRE_NEWLINE_CRLF;
1807 endlinetype = EL_CRLF;
1808 }
1809 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1810 {
1811 pcre_options |= PCRE_NEWLINE_ANY;
1812 endlinetype = EL_ANY;
1813 }
1814 else
1815 {
1816 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1817 return 2;
1818 }
1819
1820 /* Interpret the text values for -d and -D */
1821
1822 if (dee_option != NULL)
1823 {
1824 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1825 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1826 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1827 else
1828 {
1829 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1830 return 2;
1831 }
1832 }
1833
1834 if (DEE_option != NULL)
1835 {
1836 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1837 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1838 else
1839 {
1840 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1841 return 2;
1842 }
1843 }
1844
1845 /* Check the values for Jeffrey Friedl's debugging options. */
1846
1847 #ifdef JFRIEDL_DEBUG
1848 if (S_arg > 9)
1849 {
1850 fprintf(stderr, "pcregrep: bad value for -S option\n");
1851 return 2;
1852 }
1853 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1854 {
1855 if (jfriedl_XT == 0) jfriedl_XT = 1;
1856 if (jfriedl_XR == 0) jfriedl_XR = 1;
1857 }
1858 #endif
1859
1860 /* Get memory to store the pattern and hints lists. */
1861
1862 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1863 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1864
1865 if (pattern_list == NULL || hints_list == NULL)
1866 {
1867 fprintf(stderr, "pcregrep: malloc failed\n");
1868 return 2;
1869 }
1870
1871 /* If no patterns were provided by -e, and there is no file provided by -f,
1872 the first argument is the one and only pattern, and it must exist. */
1873
1874 if (cmd_pattern_count == 0 && pattern_filename == NULL)
1875 {
1876 if (i >= argc) return usage(2);
1877 patterns[cmd_pattern_count++] = argv[i++];
1878 }
1879
1880 /* Compile the patterns that were provided on the command line, either by
1881 multiple uses of -e or as a single unkeyed pattern. */
1882
1883 for (j = 0; j < cmd_pattern_count; j++)
1884 {
1885 if (!compile_pattern(patterns[j], pcre_options, NULL,
1886 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1887 return 2;
1888 }
1889
1890 /* Compile the regular expressions that are provided in a file. */
1891
1892 if (pattern_filename != NULL)
1893 {
1894 int linenumber = 0;
1895 FILE *f;
1896 char *filename;
1897 char buffer[MBUFTHIRD];
1898
1899 if (strcmp(pattern_filename, "-") == 0)
1900 {
1901 f = stdin;
1902 filename = stdin_name;
1903 }
1904 else
1905 {
1906 f = fopen(pattern_filename, "r");
1907 if (f == NULL)
1908 {
1909 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1910 strerror(errno));
1911 return 2;
1912 }
1913 filename = pattern_filename;
1914 }
1915
1916 while (fgets(buffer, MBUFTHIRD, f) != NULL)
1917 {
1918 char *s = buffer + (int)strlen(buffer);
1919 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1920 *s = 0;
1921 linenumber++;
1922 if (buffer[0] == 0) continue; /* Skip blank lines */
1923 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1924 return 2;
1925 }
1926
1927 if (f != stdin) fclose(f);
1928 }
1929
1930 /* Study the regular expressions, as we will be running them many times */
1931
1932 for (j = 0; j < pattern_count; j++)
1933 {
1934 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1935 if (error != NULL)
1936 {
1937 char s[16];
1938 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1939 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1940 return 2;
1941 }
1942 }
1943
1944 /* If there are include or exclude patterns, compile them. */
1945
1946 if (exclude_pattern != NULL)
1947 {
1948 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1949 pcretables);
1950 if (exclude_compiled == NULL)
1951 {
1952 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1953 errptr, error);
1954 return 2;
1955 }
1956 }
1957
1958 if (include_pattern != NULL)
1959 {
1960 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1961 pcretables);
1962 if (include_compiled == NULL)
1963 {
1964 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1965 errptr, error);
1966 return 2;
1967 }
1968 }
1969
1970 /* If there are no further arguments, do the business on stdin and exit. */
1971
1972 if (i >= argc)
1973 return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1974
1975 /* Otherwise, work through the remaining arguments as files or directories.
1976 Pass in the fact that there is only one argument at top level - this suppresses
1977 the file name if the argument is not a directory and filenames are not
1978 otherwise forced. */
1979
1980 only_one_at_top = i == argc - 1; /* Catch initial value of i */
1981
1982 for (; i < argc; i++)
1983 {
1984 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1985 only_one_at_top);
1986 if (frc > 1) rc = frc;
1987 else if (frc == 0 && rc == 1) rc = 0;
1988 }
1989
1990 return rc;
1991 }
1992
1993 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12