/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 89 - (show annotations) (download)
Sat Feb 24 21:41:27 2007 UTC (7 years, 1 month ago) by nigel
File MIME type: text/plain
File size: 50765 byte(s)
Load pcre-6.6 into code/trunk.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2006 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #include <ctype.h>
41 #include <locale.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <errno.h>
46
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 #include <unistd.h>
50
51 #include "config.h"
52 #include "pcre.h"
53
54 #define FALSE 0
55 #define TRUE 1
56
57 typedef int BOOL;
58
59 #define VERSION "4.2 09-Jan-2006"
60 #define MAX_PATTERN_COUNT 100
61
62 #if BUFSIZ > 8192
63 #define MBUFTHIRD BUFSIZ
64 #else
65 #define MBUFTHIRD 8192
66 #endif
67
68
69 /* Values for the "filenames" variable, which specifies options for file name
70 output. The order is important; it is assumed that a file name is wanted for
71 all values greater than FN_DEFAULT. */
72
73 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
74
75 /* Actions for the -d and -D options */
76
77 enum { dee_READ, dee_SKIP, dee_RECURSE };
78 enum { DEE_READ, DEE_SKIP };
79
80 /* Actions for special processing options (flag bits) */
81
82 #define PO_WORD_MATCH 0x0001
83 #define PO_LINE_MATCH 0x0002
84 #define PO_FIXED_STRINGS 0x0004
85
86
87
88 /*************************************************
89 * Global variables *
90 *************************************************/
91
92 /* Jeffrey Friedl has some debugging requirements that are not part of the
93 regular code. */
94
95 #ifdef JFRIEDL_DEBUG
96 static int S_arg = -1;
97 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
98 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
99 static const char *jfriedl_prefix = "";
100 static const char *jfriedl_postfix = "";
101 #endif
102
103 static char *colour_string = (char *)"1;31";
104 static char *colour_option = NULL;
105 static char *dee_option = NULL;
106 static char *DEE_option = NULL;
107 static char *pattern_filename = NULL;
108 static char *stdin_name = (char *)"(standard input)";
109 static char *locale = NULL;
110
111 static const unsigned char *pcretables = NULL;
112
113 static int pattern_count = 0;
114 static pcre **pattern_list;
115 static pcre_extra **hints_list;
116
117 static char *include_pattern = NULL;
118 static char *exclude_pattern = NULL;
119
120 static pcre *include_compiled = NULL;
121 static pcre *exclude_compiled = NULL;
122
123 static int after_context = 0;
124 static int before_context = 0;
125 static int both_context = 0;
126 static int dee_action = dee_READ;
127 static int DEE_action = DEE_READ;
128 static int error_count = 0;
129 static int filenames = FN_DEFAULT;
130 static int process_options = 0;
131
132 static BOOL count_only = FALSE;
133 static BOOL do_colour = FALSE;
134 static BOOL hyphenpending = FALSE;
135 static BOOL invert = FALSE;
136 static BOOL multiline = FALSE;
137 static BOOL number = FALSE;
138 static BOOL only_matching = FALSE;
139 static BOOL quiet = FALSE;
140 static BOOL silent = FALSE;
141
142 /* Structure for options and list of them */
143
144 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
145 OP_PATLIST };
146
147 typedef struct option_item {
148 int type;
149 int one_char;
150 void *dataptr;
151 const char *long_name;
152 const char *help_text;
153 } option_item;
154
155 /* Options without a single-letter equivalent get a negative value. This can be
156 used to identify them. */
157
158 #define N_COLOUR (-1)
159 #define N_EXCLUDE (-2)
160 #define N_HELP (-3)
161 #define N_INCLUDE (-4)
162 #define N_LABEL (-5)
163 #define N_LOCALE (-6)
164 #define N_NULL (-7)
165
166 static option_item optionlist[] = {
167 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
168 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
169 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
170 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
171 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
172 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
173 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
174 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
175 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
176 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
177 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
178 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
179 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
180 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
181 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
182 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
183 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
184 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
185 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
186 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
187 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
188 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
189 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
190 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
191 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
192 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
193 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
194 #ifdef JFRIEDL_DEBUG
195 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
196 #endif
197 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
198 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
199 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
200 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
201 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
202 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
203 { OP_NODATA, 0, NULL, NULL, NULL }
204 };
205
206 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
207 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
208 that the combination of -w and -x has the same effect as -x on its own, so we
209 can treat them as the same. */
210
211 static const char *prefix[] = {
212 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
213
214 static const char *suffix[] = {
215 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
216
217
218
219 /*************************************************
220 * OS-specific functions *
221 *************************************************/
222
223 /* These functions are defined so that they can be made system specific,
224 although at present the only ones are for Unix, Win32, and for "no support". */
225
226
227 /************* Directory scanning in Unix ***********/
228
229 #if IS_UNIX
230 #include <sys/types.h>
231 #include <sys/stat.h>
232 #include <dirent.h>
233
234 typedef DIR directory_type;
235
236 static int
237 isdirectory(char *filename)
238 {
239 struct stat statbuf;
240 if (stat(filename, &statbuf) < 0)
241 return 0; /* In the expectation that opening as a file will fail */
242 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
243 }
244
245 static directory_type *
246 opendirectory(char *filename)
247 {
248 return opendir(filename);
249 }
250
251 static char *
252 readdirectory(directory_type *dir)
253 {
254 for (;;)
255 {
256 struct dirent *dent = readdir(dir);
257 if (dent == NULL) return NULL;
258 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
259 return dent->d_name;
260 }
261 return NULL; /* Keep compiler happy; never executed */
262 }
263
264 static void
265 closedirectory(directory_type *dir)
266 {
267 closedir(dir);
268 }
269
270
271 /************* Test for regular file in Unix **********/
272
273 static int
274 isregfile(char *filename)
275 {
276 struct stat statbuf;
277 if (stat(filename, &statbuf) < 0)
278 return 1; /* In the expectation that opening as a file will fail */
279 return (statbuf.st_mode & S_IFMT) == S_IFREG;
280 }
281
282
283 /************* Test stdout for being a terminal in Unix **********/
284
285 static BOOL
286 is_stdout_tty(void)
287 {
288 return isatty(fileno(stdout));
289 }
290
291
292 /************* Directory scanning in Win32 ***********/
293
294 /* I (Philip Hazel) have no means of testing this code. It was contributed by
295 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
296 when it did not exist. */
297
298
299 #elif HAVE_WIN32API
300
301 #ifndef STRICT
302 # define STRICT
303 #endif
304 #ifndef WIN32_LEAN_AND_MEAN
305 # define WIN32_LEAN_AND_MEAN
306 #endif
307 #ifndef INVALID_FILE_ATTRIBUTES
308 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
309 #endif
310
311 #include <windows.h>
312
313 typedef struct directory_type
314 {
315 HANDLE handle;
316 BOOL first;
317 WIN32_FIND_DATA data;
318 } directory_type;
319
320 int
321 isdirectory(char *filename)
322 {
323 DWORD attr = GetFileAttributes(filename);
324 if (attr == INVALID_FILE_ATTRIBUTES)
325 return 0;
326 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
327 }
328
329 directory_type *
330 opendirectory(char *filename)
331 {
332 size_t len;
333 char *pattern;
334 directory_type *dir;
335 DWORD err;
336 len = strlen(filename);
337 pattern = (char *) malloc(len + 3);
338 dir = (directory_type *) malloc(sizeof(*dir));
339 if ((pattern == NULL) || (dir == NULL))
340 {
341 fprintf(stderr, "pcregrep: malloc failed\n");
342 exit(2);
343 }
344 memcpy(pattern, filename, len);
345 memcpy(&(pattern[len]), "\\*", 3);
346 dir->handle = FindFirstFile(pattern, &(dir->data));
347 if (dir->handle != INVALID_HANDLE_VALUE)
348 {
349 free(pattern);
350 dir->first = TRUE;
351 return dir;
352 }
353 err = GetLastError();
354 free(pattern);
355 free(dir);
356 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
357 return NULL;
358 }
359
360 char *
361 readdirectory(directory_type *dir)
362 {
363 for (;;)
364 {
365 if (!dir->first)
366 {
367 if (!FindNextFile(dir->handle, &(dir->data)))
368 return NULL;
369 }
370 else
371 {
372 dir->first = FALSE;
373 }
374 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
375 return dir->data.cFileName;
376 }
377 #ifndef _MSC_VER
378 return NULL; /* Keep compiler happy; never executed */
379 #endif
380 }
381
382 void
383 closedirectory(directory_type *dir)
384 {
385 FindClose(dir->handle);
386 free(dir);
387 }
388
389
390 /************* Test for regular file in Win32 **********/
391
392 /* I don't know how to do this, or if it can be done; assume all paths are
393 regular if they are not directories. */
394
395 int isregfile(char *filename)
396 {
397 return !isdirectory(filename)
398 }
399
400
401 /************* Test stdout for being a terminal in Win32 **********/
402
403 /* I don't know how to do this; assume never */
404
405 static BOOL
406 is_stdout_tty(void)
407 {
408 FALSE;
409 }
410
411
412 /************* Directory scanning when we can't do it ***********/
413
414 /* The type is void, and apart from isdirectory(), the functions do nothing. */
415
416 #else
417
418 typedef void directory_type;
419
420 int isdirectory(char *filename) { return 0; }
421 directory_type * opendirectory(char *filename) {}
422 char *readdirectory(directory_type *dir) {}
423 void closedirectory(directory_type *dir) {}
424
425
426 /************* Test for regular when we can't do it **********/
427
428 /* Assume all files are regular. */
429
430 int isregfile(char *filename) { return 1; }
431
432
433 /************* Test stdout for being a terminal when we can't do it **********/
434
435 static BOOL
436 is_stdout_tty(void)
437 {
438 return FALSE;
439 }
440
441
442 #endif
443
444
445
446 #if ! HAVE_STRERROR
447 /*************************************************
448 * Provide strerror() for non-ANSI libraries *
449 *************************************************/
450
451 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
452 in their libraries, but can provide the same facility by this simple
453 alternative function. */
454
455 extern int sys_nerr;
456 extern char *sys_errlist[];
457
458 char *
459 strerror(int n)
460 {
461 if (n < 0 || n >= sys_nerr) return "unknown error number";
462 return sys_errlist[n];
463 }
464 #endif /* HAVE_STRERROR */
465
466
467
468 /*************************************************
469 * Print the previous "after" lines *
470 *************************************************/
471
472 /* This is called if we are about to lose said lines because of buffer filling,
473 and at the end of the file. The data in the line is written using fwrite() so
474 that a binary zero does not terminate it.
475
476 Arguments:
477 lastmatchnumber the number of the last matching line, plus one
478 lastmatchrestart where we restarted after the last match
479 endptr end of available data
480 printname filename for printing
481
482 Returns: nothing
483 */
484
485 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
486 char *endptr, char *printname)
487 {
488 if (after_context > 0 && lastmatchnumber > 0)
489 {
490 int count = 0;
491 while (lastmatchrestart < endptr && count++ < after_context)
492 {
493 char *pp = lastmatchrestart;
494 if (printname != NULL) fprintf(stdout, "%s-", printname);
495 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
496 while (*pp != '\n') pp++;
497 fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
498 lastmatchrestart = pp + 1;
499 }
500 hyphenpending = TRUE;
501 }
502 }
503
504
505
506 /*************************************************
507 * Grep an individual file *
508 *************************************************/
509
510 /* This is called from grep_or_recurse() below. It uses a buffer that is three
511 times the value of MBUFTHIRD. The matching point is never allowed to stray into
512 the top third of the buffer, thus keeping more of the file available for
513 context printing or for multiline scanning. For large files, the pointer will
514 be in the middle third most of the time, so the bottom third is available for
515 "before" context printing.
516
517 Arguments:
518 in the fopened FILE stream
519 printname the file name if it is to be printed for each match
520 or NULL if the file name is not to be printed
521 it cannot be NULL if filenames[_nomatch]_only is set
522
523 Returns: 0 if there was at least one match
524 1 otherwise (no matches)
525 */
526
527 static int
528 pcregrep(FILE *in, char *printname)
529 {
530 int rc = 1;
531 int linenumber = 1;
532 int lastmatchnumber = 0;
533 int count = 0;
534 int offsets[99];
535 char *lastmatchrestart = NULL;
536 char buffer[3*MBUFTHIRD];
537 char *ptr = buffer;
538 char *endptr;
539 size_t bufflength;
540 BOOL endhyphenpending = FALSE;
541
542 /* Do the first read into the start of the buffer and set up the pointer to
543 end of what we have. */
544
545 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
546 endptr = buffer + bufflength;
547
548 /* Loop while the current pointer is not at the end of the file. For large
549 files, endptr will be at the end of the buffer when we are in the middle of the
550 file, but ptr will never get there, because as soon as it gets over 2/3 of the
551 way, the buffer is shifted left and re-filled. */
552
553 while (ptr < endptr)
554 {
555 int i;
556 int mrc = 0;
557 BOOL match = FALSE;
558 char *t = ptr;
559 size_t length, linelength;
560
561 /* At this point, ptr is at the start of a line. We need to find the length
562 of the subject string to pass to pcre_exec(). In multiline mode, it is the
563 length remainder of the data in the buffer. Otherwise, it is the length of
564 the next line. After matching, we always advance by the length of the next
565 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
566 that any match is constrained to be in the first line. */
567
568 linelength = 0;
569 while (t < endptr && *t++ != '\n') linelength++;
570 length = multiline? endptr - ptr : linelength;
571
572
573 /* Extra processing for Jeffrey Friedl's debugging. */
574
575 #ifdef JFRIEDL_DEBUG
576 if (jfriedl_XT || jfriedl_XR)
577 {
578 #include <sys/time.h>
579 #include <time.h>
580 struct timeval start_time, end_time;
581 struct timezone dummy;
582
583 if (jfriedl_XT)
584 {
585 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
586 const char *orig = ptr;
587 ptr = malloc(newlen + 1);
588 if (!ptr) {
589 printf("out of memory");
590 exit(2);
591 }
592 endptr = ptr;
593 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
594 for (i = 0; i < jfriedl_XT; i++) {
595 strncpy(endptr, orig, length);
596 endptr += length;
597 }
598 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
599 length = newlen;
600 }
601
602 if (gettimeofday(&start_time, &dummy) != 0)
603 perror("bad gettimeofday");
604
605
606 for (i = 0; i < jfriedl_XR; i++)
607 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
608
609 if (gettimeofday(&end_time, &dummy) != 0)
610 perror("bad gettimeofday");
611
612 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
613 -
614 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
615
616 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
617 return 0;
618 }
619 #endif
620
621
622 /* Run through all the patterns until one matches. Note that we don't include
623 the final newline in the subject string. */
624
625 for (i = 0; i < pattern_count; i++)
626 {
627 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
628 offsets, 99);
629 if (mrc >= 0) { match = TRUE; break; }
630 if (mrc != PCRE_ERROR_NOMATCH)
631 {
632 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
633 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
634 fprintf(stderr, "this line:\n");
635 fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
636 fprintf(stderr, "\n");
637 if (error_count == 0 &&
638 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
639 {
640 fprintf(stderr, "pcregrep: error %d means that a resource limit "
641 "was exceeded\n", mrc);
642 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
643 }
644 if (error_count++ > 20)
645 {
646 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
647 exit(2);
648 }
649 match = invert; /* No more matching; don't show the line again */
650 break;
651 }
652 }
653
654 /* If it's a match or a not-match (as required), do what's wanted. */
655
656 if (match != invert)
657 {
658 BOOL hyphenprinted = FALSE;
659
660 /* We've failed if we want a file that doesn't have any matches. */
661
662 if (filenames == FN_NOMATCH_ONLY) return 1;
663
664 /* Just count if just counting is wanted. */
665
666 if (count_only) count++;
667
668 /* If all we want is a file name, there is no need to scan any more lines
669 in the file. */
670
671 else if (filenames == FN_ONLY)
672 {
673 fprintf(stdout, "%s\n", printname);
674 return 0;
675 }
676
677 /* Likewise, if all we want is a yes/no answer. */
678
679 else if (quiet) return 0;
680
681 /* The --only-matching option prints just the substring that matched, and
682 does not pring any context. */
683
684 else if (only_matching)
685 {
686 if (printname != NULL) fprintf(stdout, "%s:", printname);
687 if (number) fprintf(stdout, "%d:", linenumber);
688 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
689 fprintf(stdout, "\n");
690 }
691
692 /* This is the default case when none of the above options is set. We print
693 the matching lines(s), possibly preceded and/or followed by other lines of
694 context. */
695
696 else
697 {
698 /* See if there is a requirement to print some "after" lines from a
699 previous match. We never print any overlaps. */
700
701 if (after_context > 0 && lastmatchnumber > 0)
702 {
703 int linecount = 0;
704 char *p = lastmatchrestart;
705
706 while (p < ptr && linecount < after_context)
707 {
708 while (*p != '\n') p++;
709 p++;
710 linecount++;
711 }
712
713 /* It is important to advance lastmatchrestart during this printing so
714 that it interacts correctly with any "before" printing below. Print
715 each line's data using fwrite() in case there are binary zeroes. */
716
717 while (lastmatchrestart < p)
718 {
719 char *pp = lastmatchrestart;
720 if (printname != NULL) fprintf(stdout, "%s-", printname);
721 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
722 while (*pp != '\n') pp++;
723 fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
724 lastmatchrestart = pp + 1;
725 }
726 if (lastmatchrestart != ptr) hyphenpending = TRUE;
727 }
728
729 /* If there were non-contiguous lines printed above, insert hyphens. */
730
731 if (hyphenpending)
732 {
733 fprintf(stdout, "--\n");
734 hyphenpending = FALSE;
735 hyphenprinted = TRUE;
736 }
737
738 /* See if there is a requirement to print some "before" lines for this
739 match. Again, don't print overlaps. */
740
741 if (before_context > 0)
742 {
743 int linecount = 0;
744 char *p = ptr;
745
746 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
747 linecount < before_context)
748 {
749 linecount++;
750 p--;
751 while (p > buffer && p[-1] != '\n') p--;
752 }
753
754 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
755 fprintf(stdout, "--\n");
756
757 while (p < ptr)
758 {
759 char *pp = p;
760 if (printname != NULL) fprintf(stdout, "%s-", printname);
761 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
762 while (*pp != '\n') pp++;
763 fwrite(p, 1, pp - p + 1, stdout); /* In case binary zero */
764 p = pp + 1;
765 }
766 }
767
768 /* Now print the matching line(s); ensure we set hyphenpending at the end
769 of the file if any context lines are being output. */
770
771 if (after_context > 0 || before_context > 0)
772 endhyphenpending = TRUE;
773
774 if (printname != NULL) fprintf(stdout, "%s:", printname);
775 if (number) fprintf(stdout, "%d:", linenumber);
776
777 /* In multiline mode, we want to print to the end of the line in which
778 the end of the matched string is found, so we adjust linelength and the
779 line number appropriately. Because the PCRE_FIRSTLINE option is set, the
780 start of the match will always be before the first \n character. */
781
782 if (multiline)
783 {
784 char *endmatch = ptr + offsets[1];
785 t = ptr;
786 while (t < endmatch) { if (*t++ == '\n') linenumber++; }
787 while (endmatch < endptr && *endmatch != '\n') endmatch++;
788 linelength = endmatch - ptr;
789 }
790
791 /*** NOTE: Use only fwrite() to output the data line, so that binary
792 zeroes are treated as just another data character. */
793
794 /* This extra option, for Jeffrey Friedl's debugging requirements,
795 replaces the matched string, or a specific captured string if it exists,
796 with X. When this happens, colouring is ignored. */
797
798 #ifdef JFRIEDL_DEBUG
799 if (S_arg >= 0 && S_arg < mrc)
800 {
801 int first = S_arg * 2;
802 int last = first + 1;
803 fwrite(ptr, 1, offsets[first], stdout);
804 fprintf(stdout, "X");
805 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
806 }
807 else
808 #endif
809
810 /* We have to split the line(s) up if colouring. */
811
812 if (do_colour)
813 {
814 fwrite(ptr, 1, offsets[0], stdout);
815 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
816 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
817 fprintf(stdout, "%c[00m", 0x1b);
818 fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
819 }
820 else fwrite(ptr, 1, linelength, stdout);
821
822 fprintf(stdout, "\n");
823 }
824
825 /* End of doing what has to be done for a match */
826
827 rc = 0; /* Had some success */
828
829 /* Remember where the last match happened for after_context. We remember
830 where we are about to restart, and that line's number. */
831
832 lastmatchrestart = ptr + linelength + 1;
833 lastmatchnumber = linenumber + 1;
834 }
835
836 /* Advance to after the newline and increment the line number. */
837
838 ptr += linelength + 1;
839 linenumber++;
840
841 /* If we haven't yet reached the end of the file (the buffer is full), and
842 the current point is in the top 1/3 of the buffer, slide the buffer down by
843 1/3 and refill it. Before we do this, if some unprinted "after" lines are
844 about to be lost, print them. */
845
846 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
847 {
848 if (after_context > 0 &&
849 lastmatchnumber > 0 &&
850 lastmatchrestart < buffer + MBUFTHIRD)
851 {
852 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
853 lastmatchnumber = 0;
854 }
855
856 /* Now do the shuffle */
857
858 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
859 ptr -= MBUFTHIRD;
860 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
861 endptr = buffer + bufflength;
862
863 /* Adjust any last match point */
864
865 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
866 }
867 } /* Loop through the whole file */
868
869 /* End of file; print final "after" lines if wanted; do_after_lines sets
870 hyphenpending if it prints something. */
871
872 if (!only_matching && !count_only)
873 {
874 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
875 hyphenpending |= endhyphenpending;
876 }
877
878 /* Print the file name if we are looking for those without matches and there
879 were none. If we found a match, we won't have got this far. */
880
881 if (filenames == FN_NOMATCH_ONLY)
882 {
883 fprintf(stdout, "%s\n", printname);
884 return 0;
885 }
886
887 /* Print the match count if wanted */
888
889 if (count_only)
890 {
891 if (printname != NULL) fprintf(stdout, "%s:", printname);
892 fprintf(stdout, "%d\n", count);
893 }
894
895 return rc;
896 }
897
898
899
900 /*************************************************
901 * Grep a file or recurse into a directory *
902 *************************************************/
903
904 /* Given a path name, if it's a directory, scan all the files if we are
905 recursing; if it's a file, grep it.
906
907 Arguments:
908 pathname the path to investigate
909 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
910 only_one_at_top TRUE if the path is the only one at toplevel
911
912 Returns: 0 if there was at least one match
913 1 if there were no matches
914 2 there was some kind of error
915
916 However, file opening failures are suppressed if "silent" is set.
917 */
918
919 static int
920 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
921 {
922 int rc = 1;
923 int sep;
924 FILE *in;
925
926 /* If the file name is "-" we scan stdin */
927
928 if (strcmp(pathname, "-") == 0)
929 {
930 return pcregrep(stdin,
931 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
932 stdin_name : NULL);
933 }
934
935
936 /* If the file is a directory, skip if skipping or if we are recursing, scan
937 each file within it, subject to any include or exclude patterns that were set.
938 The scanning code is localized so it can be made system-specific. */
939
940 if ((sep = isdirectory(pathname)) != 0)
941 {
942 if (dee_action == dee_SKIP) return 1;
943 if (dee_action == dee_RECURSE)
944 {
945 char buffer[1024];
946 char *nextfile;
947 directory_type *dir = opendirectory(pathname);
948
949 if (dir == NULL)
950 {
951 if (!silent)
952 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
953 strerror(errno));
954 return 2;
955 }
956
957 while ((nextfile = readdirectory(dir)) != NULL)
958 {
959 int frc, blen;
960 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
961 blen = strlen(buffer);
962
963 if (exclude_compiled != NULL &&
964 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
965 continue;
966
967 if (include_compiled != NULL &&
968 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
969 continue;
970
971 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
972 if (frc > 1) rc = frc;
973 else if (frc == 0 && rc == 1) rc = 0;
974 }
975
976 closedirectory(dir);
977 return rc;
978 }
979 }
980
981 /* If the file is not a directory and not a regular file, skip it if that's
982 been requested. */
983
984 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
985
986 /* Control reaches here if we have a regular file, or if we have a directory
987 and recursion or skipping was not requested, or if we have anything else and
988 skipping was not requested. The scan proceeds. If this is the first and only
989 argument at top level, we don't show the file name, unless we are only showing
990 the file name, or the filename was forced (-H). */
991
992 in = fopen(pathname, "r");
993 if (in == NULL)
994 {
995 if (!silent)
996 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
997 strerror(errno));
998 return 2;
999 }
1000
1001 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1002 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1003
1004 fclose(in);
1005 return rc;
1006 }
1007
1008
1009
1010
1011 /*************************************************
1012 * Usage function *
1013 *************************************************/
1014
1015 static int
1016 usage(int rc)
1017 {
1018 option_item *op;
1019 fprintf(stderr, "Usage: pcregrep [-");
1020 for (op = optionlist; op->one_char != 0; op++)
1021 {
1022 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1023 }
1024 fprintf(stderr, "] [long options] [pattern] [files]\n");
1025 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1026 return rc;
1027 }
1028
1029
1030
1031
1032 /*************************************************
1033 * Help function *
1034 *************************************************/
1035
1036 static void
1037 help(void)
1038 {
1039 option_item *op;
1040
1041 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1042 printf("Search for PATTERN in each FILE or standard input.\n");
1043 printf("PATTERN must be present if neither -e nor -f is used.\n");
1044 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1045 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1046
1047 printf("Options:\n");
1048
1049 for (op = optionlist; op->one_char != 0; op++)
1050 {
1051 int n;
1052 char s[4];
1053 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1054 printf(" %s --%s%n", s, op->long_name, &n);
1055 n = 30 - n;
1056 if (n < 1) n = 1;
1057 printf("%.*s%s\n", n, " ", op->help_text);
1058 }
1059
1060 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1061 printf("trailing white space is removed and blank lines are ignored.\n");
1062 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1063
1064 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1065 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1066 }
1067
1068
1069
1070
1071 /*************************************************
1072 * Handle a single-letter, no data option *
1073 *************************************************/
1074
1075 static int
1076 handle_option(int letter, int options)
1077 {
1078 switch(letter)
1079 {
1080 case N_HELP: help(); exit(0);
1081 case 'c': count_only = TRUE; break;
1082 case 'F': process_options |= PO_FIXED_STRINGS; break;
1083 case 'H': filenames = FN_FORCE; break;
1084 case 'h': filenames = FN_NONE; break;
1085 case 'i': options |= PCRE_CASELESS; break;
1086 case 'l': filenames = FN_ONLY; break;
1087 case 'L': filenames = FN_NOMATCH_ONLY; break;
1088 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1089 case 'n': number = TRUE; break;
1090 case 'o': only_matching = TRUE; break;
1091 case 'q': quiet = TRUE; break;
1092 case 'r': dee_action = dee_RECURSE; break;
1093 case 's': silent = TRUE; break;
1094 case 'u': options |= PCRE_UTF8; break;
1095 case 'v': invert = TRUE; break;
1096 case 'w': process_options |= PO_WORD_MATCH; break;
1097 case 'x': process_options |= PO_LINE_MATCH; break;
1098
1099 case 'V':
1100 fprintf(stderr, "pcregrep version %s using ", VERSION);
1101 fprintf(stderr, "PCRE version %s\n", pcre_version());
1102 exit(0);
1103 break;
1104
1105 default:
1106 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1107 exit(usage(2));
1108 }
1109
1110 return options;
1111 }
1112
1113
1114
1115
1116 /*************************************************
1117 * Construct printed ordinal *
1118 *************************************************/
1119
1120 /* This turns a number into "1st", "3rd", etc. */
1121
1122 static char *
1123 ordin(int n)
1124 {
1125 static char buffer[8];
1126 char *p = buffer;
1127 sprintf(p, "%d", n);
1128 while (*p != 0) p++;
1129 switch (n%10)
1130 {
1131 case 1: strcpy(p, "st"); break;
1132 case 2: strcpy(p, "nd"); break;
1133 case 3: strcpy(p, "rd"); break;
1134 default: strcpy(p, "th"); break;
1135 }
1136 return buffer;
1137 }
1138
1139
1140
1141 /*************************************************
1142 * Compile a single pattern *
1143 *************************************************/
1144
1145 /* When the -F option has been used, this is called for each substring.
1146 Otherwise it's called for each supplied pattern.
1147
1148 Arguments:
1149 pattern the pattern string
1150 options the PCRE options
1151 filename the file name, or NULL for a command-line pattern
1152 count 0 if this is the only command line pattern, or
1153 number of the command line pattern, or
1154 linenumber for a pattern from a file
1155
1156 Returns: TRUE on success, FALSE after an error
1157 */
1158
1159 static BOOL
1160 compile_single_pattern(char *pattern, int options, char *filename, int count)
1161 {
1162 char buffer[MBUFTHIRD + 16];
1163 const char *error;
1164 int errptr;
1165
1166 if (pattern_count >= MAX_PATTERN_COUNT)
1167 {
1168 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1169 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1170 return FALSE;
1171 }
1172
1173 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1174 suffix[process_options]);
1175 pattern_list[pattern_count] =
1176 pcre_compile(buffer, options, &error, &errptr, pcretables);
1177 if (pattern_list[pattern_count++] != NULL) return TRUE;
1178
1179 /* Handle compile errors */
1180
1181 errptr -= (int)strlen(prefix[process_options]);
1182 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1183
1184 if (filename == NULL)
1185 {
1186 if (count == 0)
1187 fprintf(stderr, "pcregrep: Error in command-line regex "
1188 "at offset %d: %s\n", errptr, error);
1189 else
1190 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1191 "at offset %d: %s\n", ordin(count), errptr, error);
1192 }
1193 else
1194 {
1195 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1196 "at offset %d: %s\n", count, filename, errptr, error);
1197 }
1198
1199 return FALSE;
1200 }
1201
1202
1203
1204 /*************************************************
1205 * Compile one supplied pattern *
1206 *************************************************/
1207
1208 /* When the -F option has been used, each string may be a list of strings,
1209 separated by newlines. They will be matched literally.
1210
1211 Arguments:
1212 pattern the pattern string
1213 options the PCRE options
1214 filename the file name, or NULL for a command-line pattern
1215 count 0 if this is the only command line pattern, or
1216 number of the command line pattern, or
1217 linenumber for a pattern from a file
1218
1219 Returns: TRUE on success, FALSE after an error
1220 */
1221
1222 static BOOL
1223 compile_pattern(char *pattern, int options, char *filename, int count)
1224 {
1225 if ((process_options & PO_FIXED_STRINGS) != 0)
1226 {
1227 char buffer[MBUFTHIRD];
1228 for(;;)
1229 {
1230 char *p = strchr(pattern, '\n');
1231 if (p == NULL)
1232 return compile_single_pattern(pattern, options, filename, count);
1233 sprintf(buffer, "%.*s", p - pattern, pattern);
1234 pattern = p + 1;
1235 if (!compile_single_pattern(buffer, options, filename, count))
1236 return FALSE;
1237 }
1238 }
1239 else return compile_single_pattern(pattern, options, filename, count);
1240 }
1241
1242
1243
1244 /*************************************************
1245 * Main program *
1246 *************************************************/
1247
1248 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1249
1250 int
1251 main(int argc, char **argv)
1252 {
1253 int i, j;
1254 int rc = 1;
1255 int pcre_options = 0;
1256 int cmd_pattern_count = 0;
1257 int errptr;
1258 BOOL only_one_at_top;
1259 char *patterns[MAX_PATTERN_COUNT];
1260 const char *locale_from = "--locale";
1261 const char *error;
1262
1263 /* Process the options */
1264
1265 for (i = 1; i < argc; i++)
1266 {
1267 option_item *op = NULL;
1268 char *option_data = (char *)""; /* default to keep compiler happy */
1269 BOOL longop;
1270 BOOL longopwasequals = FALSE;
1271
1272 if (argv[i][0] != '-') break;
1273
1274 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1275 but only if we have previously had -e or -f to define the patterns. */
1276
1277 if (argv[i][1] == 0)
1278 {
1279 if (pattern_filename != NULL || pattern_count > 0) break;
1280 else exit(usage(2));
1281 }
1282
1283 /* Handle a long name option, or -- to terminate the options */
1284
1285 if (argv[i][1] == '-')
1286 {
1287 char *arg = argv[i] + 2;
1288 char *argequals = strchr(arg, '=');
1289
1290 if (*arg == 0) /* -- terminates options */
1291 {
1292 i++;
1293 break; /* out of the options-handling loop */
1294 }
1295
1296 longop = TRUE;
1297
1298 /* Some long options have data that follows after =, for example file=name.
1299 Some options have variations in the long name spelling: specifically, we
1300 allow "regexp" because GNU grep allows it, though I personally go along
1301 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1302 These options are entered in the table as "regex(p)". No option is in both
1303 these categories, fortunately. */
1304
1305 for (op = optionlist; op->one_char != 0; op++)
1306 {
1307 char *opbra = strchr(op->long_name, '(');
1308 char *equals = strchr(op->long_name, '=');
1309 if (opbra == NULL) /* Not a (p) case */
1310 {
1311 if (equals == NULL) /* Not thing=data case */
1312 {
1313 if (strcmp(arg, op->long_name) == 0) break;
1314 }
1315 else /* Special case xxx=data */
1316 {
1317 int oplen = equals - op->long_name;
1318 int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1319 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1320 {
1321 option_data = arg + arglen;
1322 if (*option_data == '=')
1323 {
1324 option_data++;
1325 longopwasequals = TRUE;
1326 }
1327 break;
1328 }
1329 }
1330 }
1331 else /* Special case xxxx(p) */
1332 {
1333 char buff1[24];
1334 char buff2[24];
1335 int baselen = opbra - op->long_name;
1336 sprintf(buff1, "%.*s", baselen, op->long_name);
1337 sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1338 opbra + 1);
1339 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1340 break;
1341 }
1342 }
1343
1344 if (op->one_char == 0)
1345 {
1346 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1347 exit(usage(2));
1348 }
1349 }
1350
1351
1352 /* Jeffrey Friedl's debugging harness uses these additional options which
1353 are not in the right form for putting in the option table because they use
1354 only one hyphen, yet are more than one character long. By putting them
1355 separately here, they will not get displayed as part of the help() output,
1356 but I don't think Jeffrey will care about that. */
1357
1358 #ifdef JFRIEDL_DEBUG
1359 else if (strcmp(argv[i], "-pre") == 0) {
1360 jfriedl_prefix = argv[++i];
1361 continue;
1362 } else if (strcmp(argv[i], "-post") == 0) {
1363 jfriedl_postfix = argv[++i];
1364 continue;
1365 } else if (strcmp(argv[i], "-XT") == 0) {
1366 sscanf(argv[++i], "%d", &jfriedl_XT);
1367 continue;
1368 } else if (strcmp(argv[i], "-XR") == 0) {
1369 sscanf(argv[++i], "%d", &jfriedl_XR);
1370 continue;
1371 }
1372 #endif
1373
1374
1375 /* One-char options; many that have no data may be in a single argument; we
1376 continue till we hit the last one or one that needs data. */
1377
1378 else
1379 {
1380 char *s = argv[i] + 1;
1381 longop = FALSE;
1382 while (*s != 0)
1383 {
1384 for (op = optionlist; op->one_char != 0; op++)
1385 { if (*s == op->one_char) break; }
1386 if (op->one_char == 0)
1387 {
1388 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1389 *s, argv[i]);
1390 exit(usage(2));
1391 }
1392 if (op->type != OP_NODATA || s[1] == 0)
1393 {
1394 option_data = s+1;
1395 break;
1396 }
1397 pcre_options = handle_option(*s++, pcre_options);
1398 }
1399 }
1400
1401 /* At this point we should have op pointing to a matched option. If the type
1402 is NO_DATA, it means that there is no data, and the option might set
1403 something in the PCRE options. */
1404
1405 if (op->type == OP_NODATA)
1406 {
1407 pcre_options = handle_option(op->one_char, pcre_options);
1408 continue;
1409 }
1410
1411 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1412 either has a value or defaults to something. It cannot have data in a
1413 separate item. At the moment, the only such options are "colo(u)r" and
1414 Jeffrey Friedl's special -S debugging option. */
1415
1416 if (*option_data == 0 &&
1417 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1418 {
1419 switch (op->one_char)
1420 {
1421 case N_COLOUR:
1422 colour_option = (char *)"auto";
1423 break;
1424 #ifdef JFRIEDL_DEBUG
1425 case 'S':
1426 S_arg = 0;
1427 break;
1428 #endif
1429 }
1430 continue;
1431 }
1432
1433 /* Otherwise, find the data string for the option. */
1434
1435 if (*option_data == 0)
1436 {
1437 if (i >= argc - 1 || longopwasequals)
1438 {
1439 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1440 exit(usage(2));
1441 }
1442 option_data = argv[++i];
1443 }
1444
1445 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1446 multiple times to create a list of patterns. */
1447
1448 if (op->type == OP_PATLIST)
1449 {
1450 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1451 {
1452 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1453 MAX_PATTERN_COUNT);
1454 return 2;
1455 }
1456 patterns[cmd_pattern_count++] = option_data;
1457 }
1458
1459 /* Otherwise, deal with single string or numeric data values. */
1460
1461 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1462 {
1463 *((char **)op->dataptr) = option_data;
1464 }
1465 else
1466 {
1467 char *endptr;
1468 int n = strtoul(option_data, &endptr, 10);
1469 if (*endptr != 0)
1470 {
1471 if (longop)
1472 {
1473 char *equals = strchr(op->long_name, '=');
1474 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1475 equals - op->long_name;
1476 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1477 option_data, nlen, op->long_name);
1478 }
1479 else
1480 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1481 option_data, op->one_char);
1482 exit(usage(2));
1483 }
1484 *((int *)op->dataptr) = n;
1485 }
1486 }
1487
1488 /* Options have been decoded. If -C was used, its value is used as a default
1489 for -A and -B. */
1490
1491 if (both_context > 0)
1492 {
1493 if (after_context == 0) after_context = both_context;
1494 if (before_context == 0) before_context = both_context;
1495 }
1496
1497 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1498 LC_ALL environment variable is set, and if so, use it. */
1499
1500 if (locale == NULL)
1501 {
1502 locale = getenv("LC_ALL");
1503 locale_from = "LCC_ALL";
1504 }
1505
1506 if (locale == NULL)
1507 {
1508 locale = getenv("LC_CTYPE");
1509 locale_from = "LC_CTYPE";
1510 }
1511
1512 /* If a locale has been provided, set it, and generate the tables the PCRE
1513 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1514
1515 if (locale != NULL)
1516 {
1517 if (setlocale(LC_CTYPE, locale) == NULL)
1518 {
1519 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1520 locale, locale_from);
1521 return 2;
1522 }
1523 pcretables = pcre_maketables();
1524 }
1525
1526 /* Sort out colouring */
1527
1528 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1529 {
1530 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1531 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1532 else
1533 {
1534 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1535 colour_option);
1536 return 2;
1537 }
1538 if (do_colour)
1539 {
1540 char *cs = getenv("PCREGREP_COLOUR");
1541 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1542 if (cs != NULL) colour_string = cs;
1543 }
1544 }
1545
1546 /* Interpret the text values for -d and -D */
1547
1548 if (dee_option != NULL)
1549 {
1550 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1551 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1552 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1553 else
1554 {
1555 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1556 return 2;
1557 }
1558 }
1559
1560 if (DEE_option != NULL)
1561 {
1562 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1563 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1564 else
1565 {
1566 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1567 return 2;
1568 }
1569 }
1570
1571 /* Check the values for Jeffrey Friedl's debugging options. */
1572
1573 #ifdef JFRIEDL_DEBUG
1574 if (S_arg > 9)
1575 {
1576 fprintf(stderr, "pcregrep: bad value for -S option\n");
1577 return 2;
1578 }
1579 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1580 {
1581 if (jfriedl_XT == 0) jfriedl_XT = 1;
1582 if (jfriedl_XR == 0) jfriedl_XR = 1;
1583 }
1584 #endif
1585
1586 /* Get memory to store the pattern and hints lists. */
1587
1588 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1589 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1590
1591 if (pattern_list == NULL || hints_list == NULL)
1592 {
1593 fprintf(stderr, "pcregrep: malloc failed\n");
1594 return 2;
1595 }
1596
1597 /* If no patterns were provided by -e, and there is no file provided by -f,
1598 the first argument is the one and only pattern, and it must exist. */
1599
1600 if (cmd_pattern_count == 0 && pattern_filename == NULL)
1601 {
1602 if (i >= argc) return usage(2);
1603 patterns[cmd_pattern_count++] = argv[i++];
1604 }
1605
1606 /* Compile the patterns that were provided on the command line, either by
1607 multiple uses of -e or as a single unkeyed pattern. */
1608
1609 for (j = 0; j < cmd_pattern_count; j++)
1610 {
1611 if (!compile_pattern(patterns[j], pcre_options, NULL,
1612 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1613 return 2;
1614 }
1615
1616 /* Compile the regular expressions that are provided in a file. */
1617
1618 if (pattern_filename != NULL)
1619 {
1620 int linenumber = 0;
1621 FILE *f;
1622 char *filename;
1623 char buffer[MBUFTHIRD];
1624
1625 if (strcmp(pattern_filename, "-") == 0)
1626 {
1627 f = stdin;
1628 filename = stdin_name;
1629 }
1630 else
1631 {
1632 f = fopen(pattern_filename, "r");
1633 if (f == NULL)
1634 {
1635 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1636 strerror(errno));
1637 return 2;
1638 }
1639 filename = pattern_filename;
1640 }
1641
1642 while (fgets(buffer, MBUFTHIRD, f) != NULL)
1643 {
1644 char *s = buffer + (int)strlen(buffer);
1645 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1646 *s = 0;
1647 linenumber++;
1648 if (buffer[0] == 0) continue; /* Skip blank lines */
1649 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1650 return 2;
1651 }
1652
1653 if (f != stdin) fclose(f);
1654 }
1655
1656 /* Study the regular expressions, as we will be running them many times */
1657
1658 for (j = 0; j < pattern_count; j++)
1659 {
1660 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1661 if (error != NULL)
1662 {
1663 char s[16];
1664 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1665 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1666 return 2;
1667 }
1668 }
1669
1670 /* If there are include or exclude patterns, compile them. */
1671
1672 if (exclude_pattern != NULL)
1673 {
1674 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1675 pcretables);
1676 if (exclude_compiled == NULL)
1677 {
1678 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1679 errptr, error);
1680 return 2;
1681 }
1682 }
1683
1684 if (include_pattern != NULL)
1685 {
1686 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1687 pcretables);
1688 if (include_compiled == NULL)
1689 {
1690 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1691 errptr, error);
1692 return 2;
1693 }
1694 }
1695
1696 /* If there are no further arguments, do the business on stdin and exit. */
1697
1698 if (i >= argc)
1699 return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1700
1701 /* Otherwise, work through the remaining arguments as files or directories.
1702 Pass in the fact that there is only one argument at top level - this suppresses
1703 the file name if the argument is not a directory and filenames are not
1704 otherwise forced. */
1705
1706 only_one_at_top = i == argc - 1; /* Catch initial value of i */
1707
1708 for (; i < argc; i++)
1709 {
1710 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1711 only_one_at_top);
1712 if (frc > 1) rc = frc;
1713 else if (frc == 0 && rc == 1) rc = 0;
1714 }
1715
1716 return rc;
1717 }
1718
1719 /* End of pcregrep */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12