/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 87 - (show annotations) (download)
Sat Feb 24 21:41:21 2007 UTC (7 years, 1 month ago) by nigel
File MIME type: text/plain
File size: 48071 byte(s)
Load pcre-6.5 into code/trunk.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2006 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #include <ctype.h>
41 #include <locale.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <errno.h>
46
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 #include <unistd.h>
50
51 #include "config.h"
52 #include "pcre.h"
53
54 #define FALSE 0
55 #define TRUE 1
56
57 typedef int BOOL;
58
59 #define VERSION "4.2 09-Jan-2006"
60 #define MAX_PATTERN_COUNT 100
61
62 #if BUFSIZ > 8192
63 #define MBUFTHIRD BUFSIZ
64 #else
65 #define MBUFTHIRD 8192
66 #endif
67
68
69 /* Values for the "filenames" variable, which specifies options for file name
70 output. The order is important; it is assumed that a file name is wanted for
71 all values greater than FN_DEFAULT. */
72
73 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
74
75 /* Actions for the -d and -D options */
76
77 enum { dee_READ, dee_SKIP, dee_RECURSE };
78 enum { DEE_READ, DEE_SKIP };
79
80 /* Actions for special processing options (flag bits) */
81
82 #define PO_WORD_MATCH 0x0001
83 #define PO_LINE_MATCH 0x0002
84 #define PO_FIXED_STRINGS 0x0004
85
86
87
88 /*************************************************
89 * Global variables *
90 *************************************************/
91
92 /* Jeffrey Friedl has some debugging requirements that are not part of the
93 regular code. */
94
95 #ifdef JFRIEDL_DEBUG
96 static int S_arg = -1;
97 #endif
98
99 static char *colour_string = (char *)"1;31";
100 static char *colour_option = NULL;
101 static char *dee_option = NULL;
102 static char *DEE_option = NULL;
103 static char *pattern_filename = NULL;
104 static char *stdin_name = (char *)"(standard input)";
105 static char *locale = NULL;
106
107 static const unsigned char *pcretables = NULL;
108
109 static int pattern_count = 0;
110 static pcre **pattern_list;
111 static pcre_extra **hints_list;
112
113 static char *include_pattern = NULL;
114 static char *exclude_pattern = NULL;
115
116 static pcre *include_compiled = NULL;
117 static pcre *exclude_compiled = NULL;
118
119 static int after_context = 0;
120 static int before_context = 0;
121 static int both_context = 0;
122 static int dee_action = dee_READ;
123 static int DEE_action = DEE_READ;
124 static int error_count = 0;
125 static int filenames = FN_DEFAULT;
126 static int process_options = 0;
127
128 static BOOL count_only = FALSE;
129 static BOOL do_colour = FALSE;
130 static BOOL hyphenpending = FALSE;
131 static BOOL invert = FALSE;
132 static BOOL multiline = FALSE;
133 static BOOL number = FALSE;
134 static BOOL only_matching = FALSE;
135 static BOOL quiet = FALSE;
136 static BOOL silent = FALSE;
137
138 /* Structure for options and list of them */
139
140 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
141 OP_PATLIST };
142
143 typedef struct option_item {
144 int type;
145 int one_char;
146 void *dataptr;
147 const char *long_name;
148 const char *help_text;
149 } option_item;
150
151 /* Options without a single-letter equivalent get a negative value. This can be
152 used to identify them. */
153
154 #define N_COLOUR (-1)
155 #define N_EXCLUDE (-2)
156 #define N_HELP (-3)
157 #define N_INCLUDE (-4)
158 #define N_LABEL (-5)
159 #define N_LOCALE (-6)
160 #define N_NULL (-7)
161
162 static option_item optionlist[] = {
163 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
164 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
165 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
166 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
167 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
168 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
169 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
170 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
171 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
172 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
173 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
174 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
175 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
176 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
177 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
178 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
179 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
180 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
181 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
182 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
183 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
184 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
185 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
186 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
187 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
188 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
189 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
190 #ifdef JFRIEDL_DEBUG
191 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
192 #endif
193 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
194 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
195 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
196 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
197 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
198 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
199 { OP_NODATA, 0, NULL, NULL, NULL }
200 };
201
202 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
203 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
204 that the combination of -w and -x has the same effect as -x on its own, so we
205 can treat them as the same. */
206
207 static const char *prefix[] = {
208 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
209
210 static const char *suffix[] = {
211 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
212
213
214
215 /*************************************************
216 * OS-specific functions *
217 *************************************************/
218
219 /* These functions are defined so that they can be made system specific,
220 although at present the only ones are for Unix, Win32, and for "no support". */
221
222
223 /************* Directory scanning in Unix ***********/
224
225 #if IS_UNIX
226 #include <sys/types.h>
227 #include <sys/stat.h>
228 #include <dirent.h>
229
230 typedef DIR directory_type;
231
232 static int
233 isdirectory(char *filename)
234 {
235 struct stat statbuf;
236 if (stat(filename, &statbuf) < 0)
237 return 0; /* In the expectation that opening as a file will fail */
238 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
239 }
240
241 static directory_type *
242 opendirectory(char *filename)
243 {
244 return opendir(filename);
245 }
246
247 static char *
248 readdirectory(directory_type *dir)
249 {
250 for (;;)
251 {
252 struct dirent *dent = readdir(dir);
253 if (dent == NULL) return NULL;
254 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
255 return dent->d_name;
256 }
257 return NULL; /* Keep compiler happy; never executed */
258 }
259
260 static void
261 closedirectory(directory_type *dir)
262 {
263 closedir(dir);
264 }
265
266
267 /************* Test for regular file in Unix **********/
268
269 static int
270 isregfile(char *filename)
271 {
272 struct stat statbuf;
273 if (stat(filename, &statbuf) < 0)
274 return 1; /* In the expectation that opening as a file will fail */
275 return (statbuf.st_mode & S_IFMT) == S_IFREG;
276 }
277
278
279 /************* Test stdout for being a terminal in Unix **********/
280
281 static BOOL
282 is_stdout_tty(void)
283 {
284 return isatty(fileno(stdout));
285 }
286
287
288 /************* Directory scanning in Win32 ***********/
289
290 /* I (Philip Hazel) have no means of testing this code. It was contributed by
291 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
292 when it did not exist. */
293
294
295 #elif HAVE_WIN32API
296
297 #ifndef STRICT
298 # define STRICT
299 #endif
300 #ifndef WIN32_LEAN_AND_MEAN
301 # define WIN32_LEAN_AND_MEAN
302 #endif
303 #ifndef INVALID_FILE_ATTRIBUTES
304 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
305 #endif
306
307 #include <windows.h>
308
309 typedef struct directory_type
310 {
311 HANDLE handle;
312 BOOL first;
313 WIN32_FIND_DATA data;
314 } directory_type;
315
316 int
317 isdirectory(char *filename)
318 {
319 DWORD attr = GetFileAttributes(filename);
320 if (attr == INVALID_FILE_ATTRIBUTES)
321 return 0;
322 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
323 }
324
325 directory_type *
326 opendirectory(char *filename)
327 {
328 size_t len;
329 char *pattern;
330 directory_type *dir;
331 DWORD err;
332 len = strlen(filename);
333 pattern = (char *) malloc(len + 3);
334 dir = (directory_type *) malloc(sizeof(*dir));
335 if ((pattern == NULL) || (dir == NULL))
336 {
337 fprintf(stderr, "pcregrep: malloc failed\n");
338 exit(2);
339 }
340 memcpy(pattern, filename, len);
341 memcpy(&(pattern[len]), "\\*", 3);
342 dir->handle = FindFirstFile(pattern, &(dir->data));
343 if (dir->handle != INVALID_HANDLE_VALUE)
344 {
345 free(pattern);
346 dir->first = TRUE;
347 return dir;
348 }
349 err = GetLastError();
350 free(pattern);
351 free(dir);
352 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
353 return NULL;
354 }
355
356 char *
357 readdirectory(directory_type *dir)
358 {
359 for (;;)
360 {
361 if (!dir->first)
362 {
363 if (!FindNextFile(dir->handle, &(dir->data)))
364 return NULL;
365 }
366 else
367 {
368 dir->first = FALSE;
369 }
370 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
371 return dir->data.cFileName;
372 }
373 #ifndef _MSC_VER
374 return NULL; /* Keep compiler happy; never executed */
375 #endif
376 }
377
378 void
379 closedirectory(directory_type *dir)
380 {
381 FindClose(dir->handle);
382 free(dir);
383 }
384
385
386 /************* Test for regular file in Win32 **********/
387
388 /* I don't know how to do this, or if it can be done; assume all paths are
389 regular if they are not directories. */
390
391 int isregfile(char *filename)
392 {
393 return !isdirectory(filename)
394 }
395
396
397 /************* Test stdout for being a terminal in Win32 **********/
398
399 /* I don't know how to do this; assume never */
400
401 static BOOL
402 is_stdout_tty(void)
403 {
404 FALSE;
405 }
406
407
408 /************* Directory scanning when we can't do it ***********/
409
410 /* The type is void, and apart from isdirectory(), the functions do nothing. */
411
412 #else
413
414 typedef void directory_type;
415
416 int isdirectory(char *filename) { return 0; }
417 directory_type * opendirectory(char *filename) {}
418 char *readdirectory(directory_type *dir) {}
419 void closedirectory(directory_type *dir) {}
420
421
422 /************* Test for regular when we can't do it **********/
423
424 /* Assume all files are regular. */
425
426 int isregfile(char *filename) { return 1; }
427
428
429 /************* Test stdout for being a terminal when we can't do it **********/
430
431 static BOOL
432 is_stdout_tty(void)
433 {
434 return FALSE;
435 }
436
437
438 #endif
439
440
441
442 #if ! HAVE_STRERROR
443 /*************************************************
444 * Provide strerror() for non-ANSI libraries *
445 *************************************************/
446
447 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
448 in their libraries, but can provide the same facility by this simple
449 alternative function. */
450
451 extern int sys_nerr;
452 extern char *sys_errlist[];
453
454 char *
455 strerror(int n)
456 {
457 if (n < 0 || n >= sys_nerr) return "unknown error number";
458 return sys_errlist[n];
459 }
460 #endif /* HAVE_STRERROR */
461
462
463
464 /*************************************************
465 * Print the previous "after" lines *
466 *************************************************/
467
468 /* This is called if we are about to lose said lines because of buffer filling,
469 and at the end of the file. The data in the line is written using fwrite() so
470 that a binary zero does not terminate it.
471
472 Arguments:
473 lastmatchnumber the number of the last matching line, plus one
474 lastmatchrestart where we restarted after the last match
475 endptr end of available data
476 printname filename for printing
477
478 Returns: nothing
479 */
480
481 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
482 char *endptr, char *printname)
483 {
484 if (after_context > 0 && lastmatchnumber > 0)
485 {
486 int count = 0;
487 while (lastmatchrestart < endptr && count++ < after_context)
488 {
489 char *pp = lastmatchrestart;
490 if (printname != NULL) fprintf(stdout, "%s-", printname);
491 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
492 while (*pp != '\n') pp++;
493 fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
494 lastmatchrestart = pp + 1;
495 }
496 hyphenpending = TRUE;
497 }
498 }
499
500
501
502 /*************************************************
503 * Grep an individual file *
504 *************************************************/
505
506 /* This is called from grep_or_recurse() below. It uses a buffer that is three
507 times the value of MBUFTHIRD. The matching point is never allowed to stray into
508 the top third of the buffer, thus keeping more of the file available for
509 context printing or for multiline scanning. For large files, the pointer will
510 be in the middle third most of the time, so the bottom third is available for
511 "before" context printing.
512
513 Arguments:
514 in the fopened FILE stream
515 printname the file name if it is to be printed for each match
516 or NULL if the file name is not to be printed
517 it cannot be NULL if filenames[_nomatch]_only is set
518
519 Returns: 0 if there was at least one match
520 1 otherwise (no matches)
521 */
522
523 static int
524 pcregrep(FILE *in, char *printname)
525 {
526 int rc = 1;
527 int linenumber = 1;
528 int lastmatchnumber = 0;
529 int count = 0;
530 int offsets[99];
531 char *lastmatchrestart = NULL;
532 char buffer[3*MBUFTHIRD];
533 char *ptr = buffer;
534 char *endptr;
535 size_t bufflength;
536 BOOL endhyphenpending = FALSE;
537
538 /* Do the first read into the start of the buffer and set up the pointer to
539 end of what we have. */
540
541 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
542 endptr = buffer + bufflength;
543
544 /* Loop while the current pointer is not at the end of the file. For large
545 files, endptr will be at the end of the buffer when we are in the middle of the
546 file, but ptr will never get there, because as soon as it gets over 2/3 of the
547 way, the buffer is shifted left and re-filled. */
548
549 while (ptr < endptr)
550 {
551 int i;
552 int mrc = 0;
553 BOOL match = FALSE;
554 char *t = ptr;
555 size_t length, linelength;
556
557 /* At this point, ptr is at the start of a line. We need to find the length
558 of the subject string to pass to pcre_exec(). In multiline mode, it is the
559 length remainder of the data in the buffer. Otherwise, it is the length of
560 the next line. After matching, we always advance by the length of the next
561 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
562 that any match is constrained to be in the first line. */
563
564 linelength = 0;
565 while (t < endptr && *t++ != '\n') linelength++;
566 length = multiline? endptr - ptr : linelength;
567
568 /* Run through all the patterns until one matches. Note that we don't include
569 the final newline in the subject string. */
570
571 for (i = 0; i < pattern_count; i++)
572 {
573 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
574 offsets, 99);
575 if (mrc >= 0) { match = TRUE; break; }
576 if (mrc != PCRE_ERROR_NOMATCH)
577 {
578 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
579 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
580 fprintf(stderr, "this line:\n");
581 fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
582 fprintf(stderr, "\n");
583 if (error_count == 0 &&
584 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
585 {
586 fprintf(stderr, "pcregrep: error %d means that a resource limit "
587 "was exceeded\n", mrc);
588 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
589 }
590 if (error_count++ > 20)
591 {
592 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
593 exit(2);
594 }
595 match = invert; /* No more matching; don't show the line again */
596 break;
597 }
598 }
599
600 /* If it's a match or a not-match (as required), do what's wanted. */
601
602 if (match != invert)
603 {
604 BOOL hyphenprinted = FALSE;
605
606 /* We've failed if we want a file that doesn't have any matches. */
607
608 if (filenames == FN_NOMATCH_ONLY) return 1;
609
610 /* Just count if just counting is wanted. */
611
612 if (count_only) count++;
613
614 /* If all we want is a file name, there is no need to scan any more lines
615 in the file. */
616
617 else if (filenames == FN_ONLY)
618 {
619 fprintf(stdout, "%s\n", printname);
620 return 0;
621 }
622
623 /* Likewise, if all we want is a yes/no answer. */
624
625 else if (quiet) return 0;
626
627 /* The --only-matching option prints just the substring that matched, and
628 does not pring any context. */
629
630 else if (only_matching)
631 {
632 if (printname != NULL) fprintf(stdout, "%s:", printname);
633 if (number) fprintf(stdout, "%d:", linenumber);
634 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
635 fprintf(stdout, "\n");
636 }
637
638 /* This is the default case when none of the above options is set. We print
639 the matching lines(s), possibly preceded and/or followed by other lines of
640 context. */
641
642 else
643 {
644 /* See if there is a requirement to print some "after" lines from a
645 previous match. We never print any overlaps. */
646
647 if (after_context > 0 && lastmatchnumber > 0)
648 {
649 int linecount = 0;
650 char *p = lastmatchrestart;
651
652 while (p < ptr && linecount < after_context)
653 {
654 while (*p != '\n') p++;
655 p++;
656 linecount++;
657 }
658
659 /* It is important to advance lastmatchrestart during this printing so
660 that it interacts correctly with any "before" printing below. Print
661 each line's data using fwrite() in case there are binary zeroes. */
662
663 while (lastmatchrestart < p)
664 {
665 char *pp = lastmatchrestart;
666 if (printname != NULL) fprintf(stdout, "%s-", printname);
667 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
668 while (*pp != '\n') pp++;
669 fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
670 lastmatchrestart = pp + 1;
671 }
672 if (lastmatchrestart != ptr) hyphenpending = TRUE;
673 }
674
675 /* If there were non-contiguous lines printed above, insert hyphens. */
676
677 if (hyphenpending)
678 {
679 fprintf(stdout, "--\n");
680 hyphenpending = FALSE;
681 hyphenprinted = TRUE;
682 }
683
684 /* See if there is a requirement to print some "before" lines for this
685 match. Again, don't print overlaps. */
686
687 if (before_context > 0)
688 {
689 int linecount = 0;
690 char *p = ptr;
691
692 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
693 linecount < before_context)
694 {
695 linecount++;
696 p--;
697 while (p > buffer && p[-1] != '\n') p--;
698 }
699
700 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
701 fprintf(stdout, "--\n");
702
703 while (p < ptr)
704 {
705 char *pp = p;
706 if (printname != NULL) fprintf(stdout, "%s-", printname);
707 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
708 while (*pp != '\n') pp++;
709 fwrite(p, 1, pp - p + 1, stdout); /* In case binary zero */
710 p = pp + 1;
711 }
712 }
713
714 /* Now print the matching line(s); ensure we set hyphenpending at the end
715 of the file if any context lines are being output. */
716
717 if (after_context > 0 || before_context > 0)
718 endhyphenpending = TRUE;
719
720 if (printname != NULL) fprintf(stdout, "%s:", printname);
721 if (number) fprintf(stdout, "%d:", linenumber);
722
723 /* In multiline mode, we want to print to the end of the line in which
724 the end of the matched string is found, so we adjust linelength and the
725 line number appropriately. Because the PCRE_FIRSTLINE option is set, the
726 start of the match will always be before the first \n character. */
727
728 if (multiline)
729 {
730 char *endmatch = ptr + offsets[1];
731 t = ptr;
732 while (t < endmatch) { if (*t++ == '\n') linenumber++; }
733 while (endmatch < endptr && *endmatch != '\n') endmatch++;
734 linelength = endmatch - ptr;
735 }
736
737 /*** NOTE: Use only fwrite() to output the data line, so that binary
738 zeroes are treated as just another data character. */
739
740 /* This extra option, for Jeffrey Friedl's debugging requirements,
741 replaces the matched string, or a specific captured string if it exists,
742 with X. When this happens, colouring is ignored. */
743
744 #ifdef JFRIEDL_DEBUG
745 if (S_arg >= 0 && S_arg < mrc)
746 {
747 int first = S_arg * 2;
748 int last = first + 1;
749 fwrite(ptr, 1, offsets[first], stdout);
750 fprintf(stdout, "X");
751 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
752 }
753 else
754 #endif
755
756 /* We have to split the line(s) up if colouring. */
757
758 if (do_colour)
759 {
760 fwrite(ptr, 1, offsets[0], stdout);
761 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
762 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
763 fprintf(stdout, "%c[00m", 0x1b);
764 fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
765 }
766 else fwrite(ptr, 1, linelength, stdout);
767
768 fprintf(stdout, "\n");
769 }
770
771 /* End of doing what has to be done for a match */
772
773 rc = 0; /* Had some success */
774
775 /* Remember where the last match happened for after_context. We remember
776 where we are about to restart, and that line's number. */
777
778 lastmatchrestart = ptr + linelength + 1;
779 lastmatchnumber = linenumber + 1;
780 }
781
782 /* Advance to after the newline and increment the line number. */
783
784 ptr += linelength + 1;
785 linenumber++;
786
787 /* If we haven't yet reached the end of the file (the buffer is full), and
788 the current point is in the top 1/3 of the buffer, slide the buffer down by
789 1/3 and refill it. Before we do this, if some unprinted "after" lines are
790 about to be lost, print them. */
791
792 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
793 {
794 if (after_context > 0 &&
795 lastmatchnumber > 0 &&
796 lastmatchrestart < buffer + MBUFTHIRD)
797 {
798 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
799 lastmatchnumber = 0;
800 }
801
802 /* Now do the shuffle */
803
804 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
805 ptr -= MBUFTHIRD;
806 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
807 endptr = buffer + bufflength;
808
809 /* Adjust any last match point */
810
811 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
812 }
813 } /* Loop through the whole file */
814
815 /* End of file; print final "after" lines if wanted; do_after_lines sets
816 hyphenpending if it prints something. */
817
818 if (!only_matching && !count_only)
819 {
820 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
821 hyphenpending |= endhyphenpending;
822 }
823
824 /* Print the file name if we are looking for those without matches and there
825 were none. If we found a match, we won't have got this far. */
826
827 if (filenames == FN_NOMATCH_ONLY)
828 {
829 fprintf(stdout, "%s\n", printname);
830 return 0;
831 }
832
833 /* Print the match count if wanted */
834
835 if (count_only)
836 {
837 if (printname != NULL) fprintf(stdout, "%s:", printname);
838 fprintf(stdout, "%d\n", count);
839 }
840
841 return rc;
842 }
843
844
845
846 /*************************************************
847 * Grep a file or recurse into a directory *
848 *************************************************/
849
850 /* Given a path name, if it's a directory, scan all the files if we are
851 recursing; if it's a file, grep it.
852
853 Arguments:
854 pathname the path to investigate
855 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
856 only_one_at_top TRUE if the path is the only one at toplevel
857
858 Returns: 0 if there was at least one match
859 1 if there were no matches
860 2 there was some kind of error
861
862 However, file opening failures are suppressed if "silent" is set.
863 */
864
865 static int
866 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
867 {
868 int rc = 1;
869 int sep;
870 FILE *in;
871
872 /* If the file name is "-" we scan stdin */
873
874 if (strcmp(pathname, "-") == 0)
875 {
876 return pcregrep(stdin,
877 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
878 stdin_name : NULL);
879 }
880
881
882 /* If the file is a directory, skip if skipping or if we are recursing, scan
883 each file within it, subject to any include or exclude patterns that were set.
884 The scanning code is localized so it can be made system-specific. */
885
886 if ((sep = isdirectory(pathname)) != 0)
887 {
888 if (dee_action == dee_SKIP) return 1;
889 if (dee_action == dee_RECURSE)
890 {
891 char buffer[1024];
892 char *nextfile;
893 directory_type *dir = opendirectory(pathname);
894
895 if (dir == NULL)
896 {
897 if (!silent)
898 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
899 strerror(errno));
900 return 2;
901 }
902
903 while ((nextfile = readdirectory(dir)) != NULL)
904 {
905 int frc, blen;
906 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
907 blen = strlen(buffer);
908
909 if (exclude_compiled != NULL &&
910 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
911 continue;
912
913 if (include_compiled != NULL &&
914 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
915 continue;
916
917 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
918 if (frc > 1) rc = frc;
919 else if (frc == 0 && rc == 1) rc = 0;
920 }
921
922 closedirectory(dir);
923 return rc;
924 }
925 }
926
927 /* If the file is not a directory and not a regular file, skip it if that's
928 been requested. */
929
930 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
931
932 /* Control reaches here if we have a regular file, or if we have a directory
933 and recursion or skipping was not requested, or if we have anything else and
934 skipping was not requested. The scan proceeds. If this is the first and only
935 argument at top level, we don't show the file name, unless we are only showing
936 the file name, or the filename was forced (-H). */
937
938 in = fopen(pathname, "r");
939 if (in == NULL)
940 {
941 if (!silent)
942 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
943 strerror(errno));
944 return 2;
945 }
946
947 rc = pcregrep(in, (filenames > FN_DEFAULT ||
948 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
949
950 fclose(in);
951 return rc;
952 }
953
954
955
956
957 /*************************************************
958 * Usage function *
959 *************************************************/
960
961 static int
962 usage(int rc)
963 {
964 option_item *op;
965 fprintf(stderr, "Usage: pcregrep [-");
966 for (op = optionlist; op->one_char != 0; op++)
967 {
968 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
969 }
970 fprintf(stderr, "] [long options] [pattern] [files]\n");
971 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
972 return rc;
973 }
974
975
976
977
978 /*************************************************
979 * Help function *
980 *************************************************/
981
982 static void
983 help(void)
984 {
985 option_item *op;
986
987 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
988 printf("Search for PATTERN in each FILE or standard input.\n");
989 printf("PATTERN must be present if neither -e nor -f is used.\n");
990 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
991 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
992
993 printf("Options:\n");
994
995 for (op = optionlist; op->one_char != 0; op++)
996 {
997 int n;
998 char s[4];
999 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1000 printf(" %s --%s%n", s, op->long_name, &n);
1001 n = 30 - n;
1002 if (n < 1) n = 1;
1003 printf("%.*s%s\n", n, " ", op->help_text);
1004 }
1005
1006 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1007 printf("trailing white space is removed and blank lines are ignored.\n");
1008 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1009
1010 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1011 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1012 }
1013
1014
1015
1016
1017 /*************************************************
1018 * Handle a single-letter, no data option *
1019 *************************************************/
1020
1021 static int
1022 handle_option(int letter, int options)
1023 {
1024 switch(letter)
1025 {
1026 case N_HELP: help(); exit(0);
1027 case 'c': count_only = TRUE; break;
1028 case 'F': process_options |= PO_FIXED_STRINGS; break;
1029 case 'H': filenames = FN_FORCE; break;
1030 case 'h': filenames = FN_NONE; break;
1031 case 'i': options |= PCRE_CASELESS; break;
1032 case 'l': filenames = FN_ONLY; break;
1033 case 'L': filenames = FN_NOMATCH_ONLY; break;
1034 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1035 case 'n': number = TRUE; break;
1036 case 'o': only_matching = TRUE; break;
1037 case 'q': quiet = TRUE; break;
1038 case 'r': dee_action = dee_RECURSE; break;
1039 case 's': silent = TRUE; break;
1040 case 'u': options |= PCRE_UTF8; break;
1041 case 'v': invert = TRUE; break;
1042 case 'w': process_options |= PO_WORD_MATCH; break;
1043 case 'x': process_options |= PO_LINE_MATCH; break;
1044
1045 case 'V':
1046 fprintf(stderr, "pcregrep version %s using ", VERSION);
1047 fprintf(stderr, "PCRE version %s\n", pcre_version());
1048 exit(0);
1049 break;
1050
1051 default:
1052 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1053 exit(usage(2));
1054 }
1055
1056 return options;
1057 }
1058
1059
1060
1061
1062 /*************************************************
1063 * Construct printed ordinal *
1064 *************************************************/
1065
1066 /* This turns a number into "1st", "3rd", etc. */
1067
1068 static char *
1069 ordin(int n)
1070 {
1071 static char buffer[8];
1072 char *p = buffer;
1073 sprintf(p, "%d", n);
1074 while (*p != 0) p++;
1075 switch (n%10)
1076 {
1077 case 1: strcpy(p, "st"); break;
1078 case 2: strcpy(p, "nd"); break;
1079 case 3: strcpy(p, "rd"); break;
1080 default: strcpy(p, "th"); break;
1081 }
1082 return buffer;
1083 }
1084
1085
1086
1087 /*************************************************
1088 * Compile a single pattern *
1089 *************************************************/
1090
1091 /* When the -F option has been used, this is called for each substring.
1092 Otherwise it's called for each supplied pattern.
1093
1094 Arguments:
1095 pattern the pattern string
1096 options the PCRE options
1097 filename the file name, or NULL for a command-line pattern
1098 count 0 if this is the only command line pattern, or
1099 number of the command line pattern, or
1100 linenumber for a pattern from a file
1101
1102 Returns: TRUE on success, FALSE after an error
1103 */
1104
1105 static BOOL
1106 compile_single_pattern(char *pattern, int options, char *filename, int count)
1107 {
1108 char buffer[MBUFTHIRD + 16];
1109 const char *error;
1110 int errptr;
1111
1112 if (pattern_count >= MAX_PATTERN_COUNT)
1113 {
1114 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1115 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1116 return FALSE;
1117 }
1118
1119 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1120 suffix[process_options]);
1121 pattern_list[pattern_count] =
1122 pcre_compile(buffer, options, &error, &errptr, pcretables);
1123 if (pattern_list[pattern_count++] != NULL) return TRUE;
1124
1125 /* Handle compile errors */
1126
1127 errptr -= (int)strlen(prefix[process_options]);
1128 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1129
1130 if (filename == NULL)
1131 {
1132 if (count == 0)
1133 fprintf(stderr, "pcregrep: Error in command-line regex "
1134 "at offset %d: %s\n", errptr, error);
1135 else
1136 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1137 "at offset %d: %s\n", ordin(count), errptr, error);
1138 }
1139 else
1140 {
1141 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1142 "at offset %d: %s\n", count, filename, errptr, error);
1143 }
1144
1145 return FALSE;
1146 }
1147
1148
1149
1150 /*************************************************
1151 * Compile one supplied pattern *
1152 *************************************************/
1153
1154 /* When the -F option has been used, each string may be a list of strings,
1155 separated by newlines. They will be matched literally.
1156
1157 Arguments:
1158 pattern the pattern string
1159 options the PCRE options
1160 filename the file name, or NULL for a command-line pattern
1161 count 0 if this is the only command line pattern, or
1162 number of the command line pattern, or
1163 linenumber for a pattern from a file
1164
1165 Returns: TRUE on success, FALSE after an error
1166 */
1167
1168 static BOOL
1169 compile_pattern(char *pattern, int options, char *filename, int count)
1170 {
1171 if ((process_options & PO_FIXED_STRINGS) != 0)
1172 {
1173 char buffer[MBUFTHIRD];
1174 for(;;)
1175 {
1176 char *p = strchr(pattern, '\n');
1177 if (p == NULL)
1178 return compile_single_pattern(pattern, options, filename, count);
1179 sprintf(buffer, "%.*s", p - pattern, pattern);
1180 pattern = p + 1;
1181 if (!compile_single_pattern(buffer, options, filename, count))
1182 return FALSE;
1183 }
1184 }
1185 else return compile_single_pattern(pattern, options, filename, count);
1186 }
1187
1188
1189
1190 /*************************************************
1191 * Main program *
1192 *************************************************/
1193
1194 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1195
1196 int
1197 main(int argc, char **argv)
1198 {
1199 int i, j;
1200 int rc = 1;
1201 int pcre_options = 0;
1202 int cmd_pattern_count = 0;
1203 int errptr;
1204 BOOL only_one_at_top;
1205 char *patterns[MAX_PATTERN_COUNT];
1206 const char *locale_from = "--locale";
1207 const char *error;
1208
1209 /* Process the options */
1210
1211 for (i = 1; i < argc; i++)
1212 {
1213 option_item *op = NULL;
1214 char *option_data = (char *)""; /* default to keep compiler happy */
1215 BOOL longop;
1216 BOOL longopwasequals = FALSE;
1217
1218 if (argv[i][0] != '-') break;
1219
1220 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1221 but only if we have previously had -e or -f to define the patterns. */
1222
1223 if (argv[i][1] == 0)
1224 {
1225 if (pattern_filename != NULL || pattern_count > 0) break;
1226 else exit(usage(2));
1227 }
1228
1229 /* Handle a long name option, or -- to terminate the options */
1230
1231 if (argv[i][1] == '-')
1232 {
1233 char *arg = argv[i] + 2;
1234 char *argequals = strchr(arg, '=');
1235
1236 if (*arg == 0) /* -- terminates options */
1237 {
1238 i++;
1239 break; /* out of the options-handling loop */
1240 }
1241
1242 longop = TRUE;
1243
1244 /* Some long options have data that follows after =, for example file=name.
1245 Some options have variations in the long name spelling: specifically, we
1246 allow "regexp" because GNU grep allows it, though I personally go along
1247 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1248 These options are entered in the table as "regex(p)". No option is in both
1249 these categories, fortunately. */
1250
1251 for (op = optionlist; op->one_char != 0; op++)
1252 {
1253 char *opbra = strchr(op->long_name, '(');
1254 char *equals = strchr(op->long_name, '=');
1255 if (opbra == NULL) /* Not a (p) case */
1256 {
1257 if (equals == NULL) /* Not thing=data case */
1258 {
1259 if (strcmp(arg, op->long_name) == 0) break;
1260 }
1261 else /* Special case xxx=data */
1262 {
1263 int oplen = equals - op->long_name;
1264 int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1265 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1266 {
1267 option_data = arg + arglen;
1268 if (*option_data == '=')
1269 {
1270 option_data++;
1271 longopwasequals = TRUE;
1272 }
1273 break;
1274 }
1275 }
1276 }
1277 else /* Special case xxxx(p) */
1278 {
1279 char buff1[24];
1280 char buff2[24];
1281 int baselen = opbra - op->long_name;
1282 sprintf(buff1, "%.*s", baselen, op->long_name);
1283 sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1284 opbra + 1);
1285 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1286 break;
1287 }
1288 }
1289
1290 if (op->one_char == 0)
1291 {
1292 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1293 exit(usage(2));
1294 }
1295 }
1296
1297 /* One-char options; many that have no data may be in a single argument; we
1298 continue till we hit the last one or one that needs data. */
1299
1300 else
1301 {
1302 char *s = argv[i] + 1;
1303 longop = FALSE;
1304 while (*s != 0)
1305 {
1306 for (op = optionlist; op->one_char != 0; op++)
1307 { if (*s == op->one_char) break; }
1308 if (op->one_char == 0)
1309 {
1310 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1311 *s, argv[i]);
1312 exit(usage(2));
1313 }
1314 if (op->type != OP_NODATA || s[1] == 0)
1315 {
1316 option_data = s+1;
1317 break;
1318 }
1319 pcre_options = handle_option(*s++, pcre_options);
1320 }
1321 }
1322
1323 /* At this point we should have op pointing to a matched option. If the type
1324 is NO_DATA, it means that there is no data, and the option might set
1325 something in the PCRE options. */
1326
1327 if (op->type == OP_NODATA)
1328 {
1329 pcre_options = handle_option(op->one_char, pcre_options);
1330 continue;
1331 }
1332
1333 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1334 either has a value or defaults to something. It cannot have data in a
1335 separate item. At the moment, the only such options are "colo(u)r" and
1336 Jeffrey Friedl's special debugging option. */
1337
1338 if (*option_data == 0 &&
1339 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1340 {
1341 switch (op->one_char)
1342 {
1343 case N_COLOUR:
1344 colour_option = (char *)"auto";
1345 break;
1346 #ifdef JFRIEDL_DEBUG
1347 case 'S':
1348 S_arg = 0;
1349 break;
1350 #endif
1351 }
1352 continue;
1353 }
1354
1355 /* Otherwise, find the data string for the option. */
1356
1357 if (*option_data == 0)
1358 {
1359 if (i >= argc - 1 || longopwasequals)
1360 {
1361 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1362 exit(usage(2));
1363 }
1364 option_data = argv[++i];
1365 }
1366
1367 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1368 multiple times to create a list of patterns. */
1369
1370 if (op->type == OP_PATLIST)
1371 {
1372 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1373 {
1374 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1375 MAX_PATTERN_COUNT);
1376 return 2;
1377 }
1378 patterns[cmd_pattern_count++] = option_data;
1379 }
1380
1381 /* Otherwise, deal with single string or numeric data values. */
1382
1383 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1384 {
1385 *((char **)op->dataptr) = option_data;
1386 }
1387 else
1388 {
1389 char *endptr;
1390 int n = strtoul(option_data, &endptr, 10);
1391 if (*endptr != 0)
1392 {
1393 if (longop)
1394 {
1395 char *equals = strchr(op->long_name, '=');
1396 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1397 equals - op->long_name;
1398 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1399 option_data, nlen, op->long_name);
1400 }
1401 else
1402 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1403 option_data, op->one_char);
1404 exit(usage(2));
1405 }
1406 *((int *)op->dataptr) = n;
1407 }
1408 }
1409
1410 /* Options have been decoded. If -C was used, its value is used as a default
1411 for -A and -B. */
1412
1413 if (both_context > 0)
1414 {
1415 if (after_context == 0) after_context = both_context;
1416 if (before_context == 0) before_context = both_context;
1417 }
1418
1419 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1420 LC_ALL environment variable is set, and if so, use it. */
1421
1422 if (locale == NULL)
1423 {
1424 locale = getenv("LC_ALL");
1425 locale_from = "LCC_ALL";
1426 }
1427
1428 if (locale == NULL)
1429 {
1430 locale = getenv("LC_CTYPE");
1431 locale_from = "LC_CTYPE";
1432 }
1433
1434 /* If a locale has been provided, set it, and generate the tables the PCRE
1435 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1436
1437 if (locale != NULL)
1438 {
1439 if (setlocale(LC_CTYPE, locale) == NULL)
1440 {
1441 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1442 locale, locale_from);
1443 return 2;
1444 }
1445 pcretables = pcre_maketables();
1446 }
1447
1448 /* Sort out colouring */
1449
1450 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1451 {
1452 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1453 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1454 else
1455 {
1456 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1457 colour_option);
1458 return 2;
1459 }
1460 if (do_colour)
1461 {
1462 char *cs = getenv("PCREGREP_COLOUR");
1463 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1464 if (cs != NULL) colour_string = cs;
1465 }
1466 }
1467
1468 /* Interpret the text values for -d and -D */
1469
1470 if (dee_option != NULL)
1471 {
1472 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1473 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1474 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1475 else
1476 {
1477 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1478 return 2;
1479 }
1480 }
1481
1482 if (DEE_option != NULL)
1483 {
1484 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1485 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1486 else
1487 {
1488 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1489 return 2;
1490 }
1491 }
1492
1493 /* Check the value for Jeff Friedl's debugging option. */
1494
1495 #ifdef JFRIEDL_DEBUG
1496 if (S_arg > 9)
1497 {
1498 fprintf(stderr, "pcregrep: bad value for -S option\n");
1499 return 2;
1500 }
1501 #endif
1502
1503 /* Get memory to store the pattern and hints lists. */
1504
1505 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1506 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1507
1508 if (pattern_list == NULL || hints_list == NULL)
1509 {
1510 fprintf(stderr, "pcregrep: malloc failed\n");
1511 return 2;
1512 }
1513
1514 /* If no patterns were provided by -e, and there is no file provided by -f,
1515 the first argument is the one and only pattern, and it must exist. */
1516
1517 if (cmd_pattern_count == 0 && pattern_filename == NULL)
1518 {
1519 if (i >= argc) return usage(2);
1520 patterns[cmd_pattern_count++] = argv[i++];
1521 }
1522
1523 /* Compile the patterns that were provided on the command line, either by
1524 multiple uses of -e or as a single unkeyed pattern. */
1525
1526 for (j = 0; j < cmd_pattern_count; j++)
1527 {
1528 if (!compile_pattern(patterns[j], pcre_options, NULL,
1529 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1530 return 2;
1531 }
1532
1533 /* Compile the regular expressions that are provided in a file. */
1534
1535 if (pattern_filename != NULL)
1536 {
1537 int linenumber = 0;
1538 FILE *f;
1539 char *filename;
1540 char buffer[MBUFTHIRD];
1541
1542 if (strcmp(pattern_filename, "-") == 0)
1543 {
1544 f = stdin;
1545 filename = stdin_name;
1546 }
1547 else
1548 {
1549 f = fopen(pattern_filename, "r");
1550 if (f == NULL)
1551 {
1552 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1553 strerror(errno));
1554 return 2;
1555 }
1556 filename = pattern_filename;
1557 }
1558
1559 while (fgets(buffer, MBUFTHIRD, f) != NULL)
1560 {
1561 char *s = buffer + (int)strlen(buffer);
1562 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1563 *s = 0;
1564 linenumber++;
1565 if (buffer[0] == 0) continue; /* Skip blank lines */
1566 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1567 return 2;
1568 }
1569
1570 if (f != stdin) fclose(f);
1571 }
1572
1573 /* Study the regular expressions, as we will be running them many times */
1574
1575 for (j = 0; j < pattern_count; j++)
1576 {
1577 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1578 if (error != NULL)
1579 {
1580 char s[16];
1581 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1582 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1583 return 2;
1584 }
1585 }
1586
1587 /* If there are include or exclude patterns, compile them. */
1588
1589 if (exclude_pattern != NULL)
1590 {
1591 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1592 pcretables);
1593 if (exclude_compiled == NULL)
1594 {
1595 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1596 errptr, error);
1597 return 2;
1598 }
1599 }
1600
1601 if (include_pattern != NULL)
1602 {
1603 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1604 pcretables);
1605 if (include_compiled == NULL)
1606 {
1607 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1608 errptr, error);
1609 return 2;
1610 }
1611 }
1612
1613 /* If there are no further arguments, do the business on stdin and exit. */
1614
1615 if (i >= argc)
1616 return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1617
1618 /* Otherwise, work through the remaining arguments as files or directories.
1619 Pass in the fact that there is only one argument at top level - this suppresses
1620 the file name if the argument is not a directory and filenames are not
1621 otherwise forced. */
1622
1623 only_one_at_top = i == argc - 1; /* Catch initial value of i */
1624
1625 for (; i < argc; i++)
1626 {
1627 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1628 only_one_at_top);
1629 if (frc > 1) rc = frc;
1630 else if (frc == 0 && rc == 1) rc = 0;
1631 }
1632
1633 return rc;
1634 }
1635
1636 /* End of pcregrep */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12