/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 91 - (show annotations) (download)
Sat Feb 24 21:41:34 2007 UTC (7 years, 9 months ago) by nigel
File MIME type: text/plain
File size: 52037 byte(s)
Load pcre-6.7 into code/trunk.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2006 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #include <ctype.h>
41 #include <locale.h>
42 #include <stdio.h>
43 #include <string.h>
44 #include <stdlib.h>
45 #include <errno.h>
46
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 #include <unistd.h>
50
51 #include "config.h"
52 #include "pcre.h"
53
54 #define FALSE 0
55 #define TRUE 1
56
57 typedef int BOOL;
58
59 #define VERSION "4.3 01-Jun-2006"
60 #define MAX_PATTERN_COUNT 100
61
62 #if BUFSIZ > 8192
63 #define MBUFTHIRD BUFSIZ
64 #else
65 #define MBUFTHIRD 8192
66 #endif
67
68
69 /* Values for the "filenames" variable, which specifies options for file name
70 output. The order is important; it is assumed that a file name is wanted for
71 all values greater than FN_DEFAULT. */
72
73 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
74
75 /* Actions for the -d and -D options */
76
77 enum { dee_READ, dee_SKIP, dee_RECURSE };
78 enum { DEE_READ, DEE_SKIP };
79
80 /* Actions for special processing options (flag bits) */
81
82 #define PO_WORD_MATCH 0x0001
83 #define PO_LINE_MATCH 0x0002
84 #define PO_FIXED_STRINGS 0x0004
85
86
87
88 /*************************************************
89 * Global variables *
90 *************************************************/
91
92 /* Jeffrey Friedl has some debugging requirements that are not part of the
93 regular code. */
94
95 #ifdef JFRIEDL_DEBUG
96 static int S_arg = -1;
97 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
98 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
99 static const char *jfriedl_prefix = "";
100 static const char *jfriedl_postfix = "";
101 #endif
102
103 static int endlinebyte = '\n'; /* Last byte of endline sequence */
104 static int endlineextra = 0; /* Extra bytes for endline sequence */
105
106 static char *colour_string = (char *)"1;31";
107 static char *colour_option = NULL;
108 static char *dee_option = NULL;
109 static char *DEE_option = NULL;
110 static char *newline = NULL;
111 static char *pattern_filename = NULL;
112 static char *stdin_name = (char *)"(standard input)";
113 static char *locale = NULL;
114
115 static const unsigned char *pcretables = NULL;
116
117 static int pattern_count = 0;
118 static pcre **pattern_list;
119 static pcre_extra **hints_list;
120
121 static char *include_pattern = NULL;
122 static char *exclude_pattern = NULL;
123
124 static pcre *include_compiled = NULL;
125 static pcre *exclude_compiled = NULL;
126
127 static int after_context = 0;
128 static int before_context = 0;
129 static int both_context = 0;
130 static int dee_action = dee_READ;
131 static int DEE_action = DEE_READ;
132 static int error_count = 0;
133 static int filenames = FN_DEFAULT;
134 static int process_options = 0;
135
136 static BOOL count_only = FALSE;
137 static BOOL do_colour = FALSE;
138 static BOOL hyphenpending = FALSE;
139 static BOOL invert = FALSE;
140 static BOOL multiline = FALSE;
141 static BOOL number = FALSE;
142 static BOOL only_matching = FALSE;
143 static BOOL quiet = FALSE;
144 static BOOL silent = FALSE;
145
146 /* Structure for options and list of them */
147
148 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
149 OP_PATLIST };
150
151 typedef struct option_item {
152 int type;
153 int one_char;
154 void *dataptr;
155 const char *long_name;
156 const char *help_text;
157 } option_item;
158
159 /* Options without a single-letter equivalent get a negative value. This can be
160 used to identify them. */
161
162 #define N_COLOUR (-1)
163 #define N_EXCLUDE (-2)
164 #define N_HELP (-3)
165 #define N_INCLUDE (-4)
166 #define N_LABEL (-5)
167 #define N_LOCALE (-6)
168 #define N_NULL (-7)
169
170 static option_item optionlist[] = {
171 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
172 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
173 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
174 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
175 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
176 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
177 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
178 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
179 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
180 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
181 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
182 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
183 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
184 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
185 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
186 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
187 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
188 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
189 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
190 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
191 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
192 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LR, CRLF)" },
193 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
194 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
195 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
196 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
197 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
198 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
199 #ifdef JFRIEDL_DEBUG
200 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
201 #endif
202 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
203 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
204 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
205 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
206 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
207 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
208 { OP_NODATA, 0, NULL, NULL, NULL }
209 };
210
211 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
212 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
213 that the combination of -w and -x has the same effect as -x on its own, so we
214 can treat them as the same. */
215
216 static const char *prefix[] = {
217 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
218
219 static const char *suffix[] = {
220 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
221
222
223
224 /*************************************************
225 * OS-specific functions *
226 *************************************************/
227
228 /* These functions are defined so that they can be made system specific,
229 although at present the only ones are for Unix, Win32, and for "no support". */
230
231
232 /************* Directory scanning in Unix ***********/
233
234 #if IS_UNIX
235 #include <sys/types.h>
236 #include <sys/stat.h>
237 #include <dirent.h>
238
239 typedef DIR directory_type;
240
241 static int
242 isdirectory(char *filename)
243 {
244 struct stat statbuf;
245 if (stat(filename, &statbuf) < 0)
246 return 0; /* In the expectation that opening as a file will fail */
247 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
248 }
249
250 static directory_type *
251 opendirectory(char *filename)
252 {
253 return opendir(filename);
254 }
255
256 static char *
257 readdirectory(directory_type *dir)
258 {
259 for (;;)
260 {
261 struct dirent *dent = readdir(dir);
262 if (dent == NULL) return NULL;
263 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
264 return dent->d_name;
265 }
266 return NULL; /* Keep compiler happy; never executed */
267 }
268
269 static void
270 closedirectory(directory_type *dir)
271 {
272 closedir(dir);
273 }
274
275
276 /************* Test for regular file in Unix **********/
277
278 static int
279 isregfile(char *filename)
280 {
281 struct stat statbuf;
282 if (stat(filename, &statbuf) < 0)
283 return 1; /* In the expectation that opening as a file will fail */
284 return (statbuf.st_mode & S_IFMT) == S_IFREG;
285 }
286
287
288 /************* Test stdout for being a terminal in Unix **********/
289
290 static BOOL
291 is_stdout_tty(void)
292 {
293 return isatty(fileno(stdout));
294 }
295
296
297 /************* Directory scanning in Win32 ***********/
298
299 /* I (Philip Hazel) have no means of testing this code. It was contributed by
300 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
301 when it did not exist. */
302
303
304 #elif HAVE_WIN32API
305
306 #ifndef STRICT
307 # define STRICT
308 #endif
309 #ifndef WIN32_LEAN_AND_MEAN
310 # define WIN32_LEAN_AND_MEAN
311 #endif
312 #ifndef INVALID_FILE_ATTRIBUTES
313 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
314 #endif
315
316 #include <windows.h>
317
318 typedef struct directory_type
319 {
320 HANDLE handle;
321 BOOL first;
322 WIN32_FIND_DATA data;
323 } directory_type;
324
325 int
326 isdirectory(char *filename)
327 {
328 DWORD attr = GetFileAttributes(filename);
329 if (attr == INVALID_FILE_ATTRIBUTES)
330 return 0;
331 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
332 }
333
334 directory_type *
335 opendirectory(char *filename)
336 {
337 size_t len;
338 char *pattern;
339 directory_type *dir;
340 DWORD err;
341 len = strlen(filename);
342 pattern = (char *) malloc(len + 3);
343 dir = (directory_type *) malloc(sizeof(*dir));
344 if ((pattern == NULL) || (dir == NULL))
345 {
346 fprintf(stderr, "pcregrep: malloc failed\n");
347 exit(2);
348 }
349 memcpy(pattern, filename, len);
350 memcpy(&(pattern[len]), "\\*", 3);
351 dir->handle = FindFirstFile(pattern, &(dir->data));
352 if (dir->handle != INVALID_HANDLE_VALUE)
353 {
354 free(pattern);
355 dir->first = TRUE;
356 return dir;
357 }
358 err = GetLastError();
359 free(pattern);
360 free(dir);
361 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
362 return NULL;
363 }
364
365 char *
366 readdirectory(directory_type *dir)
367 {
368 for (;;)
369 {
370 if (!dir->first)
371 {
372 if (!FindNextFile(dir->handle, &(dir->data)))
373 return NULL;
374 }
375 else
376 {
377 dir->first = FALSE;
378 }
379 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
380 return dir->data.cFileName;
381 }
382 #ifndef _MSC_VER
383 return NULL; /* Keep compiler happy; never executed */
384 #endif
385 }
386
387 void
388 closedirectory(directory_type *dir)
389 {
390 FindClose(dir->handle);
391 free(dir);
392 }
393
394
395 /************* Test for regular file in Win32 **********/
396
397 /* I don't know how to do this, or if it can be done; assume all paths are
398 regular if they are not directories. */
399
400 int isregfile(char *filename)
401 {
402 return !isdirectory(filename)
403 }
404
405
406 /************* Test stdout for being a terminal in Win32 **********/
407
408 /* I don't know how to do this; assume never */
409
410 static BOOL
411 is_stdout_tty(void)
412 {
413 FALSE;
414 }
415
416
417 /************* Directory scanning when we can't do it ***********/
418
419 /* The type is void, and apart from isdirectory(), the functions do nothing. */
420
421 #else
422
423 typedef void directory_type;
424
425 int isdirectory(char *filename) { return 0; }
426 directory_type * opendirectory(char *filename) {}
427 char *readdirectory(directory_type *dir) {}
428 void closedirectory(directory_type *dir) {}
429
430
431 /************* Test for regular when we can't do it **********/
432
433 /* Assume all files are regular. */
434
435 int isregfile(char *filename) { return 1; }
436
437
438 /************* Test stdout for being a terminal when we can't do it **********/
439
440 static BOOL
441 is_stdout_tty(void)
442 {
443 return FALSE;
444 }
445
446
447 #endif
448
449
450
451 #if ! HAVE_STRERROR
452 /*************************************************
453 * Provide strerror() for non-ANSI libraries *
454 *************************************************/
455
456 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
457 in their libraries, but can provide the same facility by this simple
458 alternative function. */
459
460 extern int sys_nerr;
461 extern char *sys_errlist[];
462
463 char *
464 strerror(int n)
465 {
466 if (n < 0 || n >= sys_nerr) return "unknown error number";
467 return sys_errlist[n];
468 }
469 #endif /* HAVE_STRERROR */
470
471
472
473 /*************************************************
474 * Print the previous "after" lines *
475 *************************************************/
476
477 /* This is called if we are about to lose said lines because of buffer filling,
478 and at the end of the file. The data in the line is written using fwrite() so
479 that a binary zero does not terminate it.
480
481 Arguments:
482 lastmatchnumber the number of the last matching line, plus one
483 lastmatchrestart where we restarted after the last match
484 endptr end of available data
485 printname filename for printing
486
487 Returns: nothing
488 */
489
490 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
491 char *endptr, char *printname)
492 {
493 if (after_context > 0 && lastmatchnumber > 0)
494 {
495 int count = 0;
496 while (lastmatchrestart < endptr && count++ < after_context)
497 {
498 char *pp = lastmatchrestart;
499 if (printname != NULL) fprintf(stdout, "%s-", printname);
500 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
501 while (*pp != endlinebyte) pp++;
502 fwrite(lastmatchrestart, 1, pp - lastmatchrestart + (1 + endlineextra),
503 stdout);
504 lastmatchrestart = pp + 1;
505 }
506 hyphenpending = TRUE;
507 }
508 }
509
510
511
512 /*************************************************
513 * Grep an individual file *
514 *************************************************/
515
516 /* This is called from grep_or_recurse() below. It uses a buffer that is three
517 times the value of MBUFTHIRD. The matching point is never allowed to stray into
518 the top third of the buffer, thus keeping more of the file available for
519 context printing or for multiline scanning. For large files, the pointer will
520 be in the middle third most of the time, so the bottom third is available for
521 "before" context printing.
522
523 Arguments:
524 in the fopened FILE stream
525 printname the file name if it is to be printed for each match
526 or NULL if the file name is not to be printed
527 it cannot be NULL if filenames[_nomatch]_only is set
528
529 Returns: 0 if there was at least one match
530 1 otherwise (no matches)
531 */
532
533 static int
534 pcregrep(FILE *in, char *printname)
535 {
536 int rc = 1;
537 int linenumber = 1;
538 int lastmatchnumber = 0;
539 int count = 0;
540 int offsets[99];
541 char *lastmatchrestart = NULL;
542 char buffer[3*MBUFTHIRD];
543 char *ptr = buffer;
544 char *endptr;
545 size_t bufflength;
546 BOOL endhyphenpending = FALSE;
547
548 /* Do the first read into the start of the buffer and set up the pointer to
549 end of what we have. */
550
551 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
552 endptr = buffer + bufflength;
553
554 /* Loop while the current pointer is not at the end of the file. For large
555 files, endptr will be at the end of the buffer when we are in the middle of the
556 file, but ptr will never get there, because as soon as it gets over 2/3 of the
557 way, the buffer is shifted left and re-filled. */
558
559 while (ptr < endptr)
560 {
561 int i;
562 int mrc = 0;
563 BOOL match = FALSE;
564 char *t = ptr;
565 size_t length, linelength;
566
567 /* At this point, ptr is at the start of a line. We need to find the length
568 of the subject string to pass to pcre_exec(). In multiline mode, it is the
569 length remainder of the data in the buffer. Otherwise, it is the length of
570 the next line. After matching, we always advance by the length of the next
571 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
572 that any match is constrained to be in the first line. */
573
574 linelength = 0;
575 while (t < endptr && *t++ != endlinebyte) linelength++;
576 length = multiline? endptr - ptr : linelength;
577
578
579 /* Extra processing for Jeffrey Friedl's debugging. */
580
581 #ifdef JFRIEDL_DEBUG
582 if (jfriedl_XT || jfriedl_XR)
583 {
584 #include <sys/time.h>
585 #include <time.h>
586 struct timeval start_time, end_time;
587 struct timezone dummy;
588
589 if (jfriedl_XT)
590 {
591 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
592 const char *orig = ptr;
593 ptr = malloc(newlen + 1);
594 if (!ptr) {
595 printf("out of memory");
596 exit(2);
597 }
598 endptr = ptr;
599 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
600 for (i = 0; i < jfriedl_XT; i++) {
601 strncpy(endptr, orig, length);
602 endptr += length;
603 }
604 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
605 length = newlen;
606 }
607
608 if (gettimeofday(&start_time, &dummy) != 0)
609 perror("bad gettimeofday");
610
611
612 for (i = 0; i < jfriedl_XR; i++)
613 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
614
615 if (gettimeofday(&end_time, &dummy) != 0)
616 perror("bad gettimeofday");
617
618 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
619 -
620 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
621
622 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
623 return 0;
624 }
625 #endif
626
627
628 /* Run through all the patterns until one matches. Note that we don't include
629 the final newline in the subject string. */
630
631 for (i = 0; i < pattern_count; i++)
632 {
633 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
634 offsets, 99);
635 if (mrc >= 0) { match = TRUE; break; }
636 if (mrc != PCRE_ERROR_NOMATCH)
637 {
638 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
639 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
640 fprintf(stderr, "this line:\n");
641 fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
642 fprintf(stderr, "\n");
643 if (error_count == 0 &&
644 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
645 {
646 fprintf(stderr, "pcregrep: error %d means that a resource limit "
647 "was exceeded\n", mrc);
648 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
649 }
650 if (error_count++ > 20)
651 {
652 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
653 exit(2);
654 }
655 match = invert; /* No more matching; don't show the line again */
656 break;
657 }
658 }
659
660 /* If it's a match or a not-match (as required), do what's wanted. */
661
662 if (match != invert)
663 {
664 BOOL hyphenprinted = FALSE;
665
666 /* We've failed if we want a file that doesn't have any matches. */
667
668 if (filenames == FN_NOMATCH_ONLY) return 1;
669
670 /* Just count if just counting is wanted. */
671
672 if (count_only) count++;
673
674 /* If all we want is a file name, there is no need to scan any more lines
675 in the file. */
676
677 else if (filenames == FN_ONLY)
678 {
679 fprintf(stdout, "%s\n", printname);
680 return 0;
681 }
682
683 /* Likewise, if all we want is a yes/no answer. */
684
685 else if (quiet) return 0;
686
687 /* The --only-matching option prints just the substring that matched, and
688 does not pring any context. */
689
690 else if (only_matching)
691 {
692 if (printname != NULL) fprintf(stdout, "%s:", printname);
693 if (number) fprintf(stdout, "%d:", linenumber);
694 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
695 fprintf(stdout, "\n");
696 }
697
698 /* This is the default case when none of the above options is set. We print
699 the matching lines(s), possibly preceded and/or followed by other lines of
700 context. */
701
702 else
703 {
704 /* See if there is a requirement to print some "after" lines from a
705 previous match. We never print any overlaps. */
706
707 if (after_context > 0 && lastmatchnumber > 0)
708 {
709 int linecount = 0;
710 char *p = lastmatchrestart;
711
712 while (p < ptr && linecount < after_context)
713 {
714 while (*p != endlinebyte) p++;
715 p++;
716 linecount++;
717 }
718
719 /* It is important to advance lastmatchrestart during this printing so
720 that it interacts correctly with any "before" printing below. Print
721 each line's data using fwrite() in case there are binary zeroes. */
722
723 while (lastmatchrestart < p)
724 {
725 char *pp = lastmatchrestart;
726 if (printname != NULL) fprintf(stdout, "%s-", printname);
727 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
728 while (*pp != endlinebyte) pp++;
729 fwrite(lastmatchrestart, 1, pp - lastmatchrestart +
730 (1 + endlineextra), stdout);
731 lastmatchrestart = pp + 1;
732 }
733 if (lastmatchrestart != ptr) hyphenpending = TRUE;
734 }
735
736 /* If there were non-contiguous lines printed above, insert hyphens. */
737
738 if (hyphenpending)
739 {
740 fprintf(stdout, "--\n");
741 hyphenpending = FALSE;
742 hyphenprinted = TRUE;
743 }
744
745 /* See if there is a requirement to print some "before" lines for this
746 match. Again, don't print overlaps. */
747
748 if (before_context > 0)
749 {
750 int linecount = 0;
751 char *p = ptr;
752
753 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
754 linecount < before_context)
755 {
756 linecount++;
757 p--;
758 while (p > buffer && p[-1] != endlinebyte) p--;
759 }
760
761 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
762 fprintf(stdout, "--\n");
763
764 while (p < ptr)
765 {
766 char *pp = p;
767 if (printname != NULL) fprintf(stdout, "%s-", printname);
768 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
769 while (*pp != endlinebyte) pp++;
770 fwrite(p, 1, pp - p + (1 + endlineextra), stdout);
771 p = pp + 1;
772 }
773 }
774
775 /* Now print the matching line(s); ensure we set hyphenpending at the end
776 of the file if any context lines are being output. */
777
778 if (after_context > 0 || before_context > 0)
779 endhyphenpending = TRUE;
780
781 if (printname != NULL) fprintf(stdout, "%s:", printname);
782 if (number) fprintf(stdout, "%d:", linenumber);
783
784 /* In multiline mode, we want to print to the end of the line in which
785 the end of the matched string is found, so we adjust linelength and the
786 line number appropriately. Because the PCRE_FIRSTLINE option is set, the
787 start of the match will always be before the first newline sequence. */
788
789 if (multiline)
790 {
791 char *endmatch = ptr + offsets[1];
792 t = ptr;
793 while (t < endmatch) { if (*t++ == endlinebyte) linenumber++; }
794 while (endmatch < endptr && *endmatch != endlinebyte) endmatch++;
795 linelength = endmatch - ptr;
796 }
797
798 /*** NOTE: Use only fwrite() to output the data line, so that binary
799 zeroes are treated as just another data character. */
800
801 /* This extra option, for Jeffrey Friedl's debugging requirements,
802 replaces the matched string, or a specific captured string if it exists,
803 with X. When this happens, colouring is ignored. */
804
805 #ifdef JFRIEDL_DEBUG
806 if (S_arg >= 0 && S_arg < mrc)
807 {
808 int first = S_arg * 2;
809 int last = first + 1;
810 fwrite(ptr, 1, offsets[first], stdout);
811 fprintf(stdout, "X");
812 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
813 }
814 else
815 #endif
816
817 /* We have to split the line(s) up if colouring. */
818
819 if (do_colour)
820 {
821 fwrite(ptr, 1, offsets[0], stdout);
822 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
823 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
824 fprintf(stdout, "%c[00m", 0x1b);
825 fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
826 }
827 else fwrite(ptr, 1, linelength, stdout);
828
829 fprintf(stdout, "\n");
830 }
831
832 /* End of doing what has to be done for a match */
833
834 rc = 0; /* Had some success */
835
836 /* Remember where the last match happened for after_context. We remember
837 where we are about to restart, and that line's number. */
838
839 lastmatchrestart = ptr + linelength + 1;
840 lastmatchnumber = linenumber + 1;
841 }
842
843 /* Advance to after the newline and increment the line number. */
844
845 ptr += linelength + 1;
846 linenumber++;
847
848 /* If we haven't yet reached the end of the file (the buffer is full), and
849 the current point is in the top 1/3 of the buffer, slide the buffer down by
850 1/3 and refill it. Before we do this, if some unprinted "after" lines are
851 about to be lost, print them. */
852
853 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
854 {
855 if (after_context > 0 &&
856 lastmatchnumber > 0 &&
857 lastmatchrestart < buffer + MBUFTHIRD)
858 {
859 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
860 lastmatchnumber = 0;
861 }
862
863 /* Now do the shuffle */
864
865 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
866 ptr -= MBUFTHIRD;
867 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
868 endptr = buffer + bufflength;
869
870 /* Adjust any last match point */
871
872 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
873 }
874 } /* Loop through the whole file */
875
876 /* End of file; print final "after" lines if wanted; do_after_lines sets
877 hyphenpending if it prints something. */
878
879 if (!only_matching && !count_only)
880 {
881 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
882 hyphenpending |= endhyphenpending;
883 }
884
885 /* Print the file name if we are looking for those without matches and there
886 were none. If we found a match, we won't have got this far. */
887
888 if (filenames == FN_NOMATCH_ONLY)
889 {
890 fprintf(stdout, "%s\n", printname);
891 return 0;
892 }
893
894 /* Print the match count if wanted */
895
896 if (count_only)
897 {
898 if (printname != NULL) fprintf(stdout, "%s:", printname);
899 fprintf(stdout, "%d\n", count);
900 }
901
902 return rc;
903 }
904
905
906
907 /*************************************************
908 * Grep a file or recurse into a directory *
909 *************************************************/
910
911 /* Given a path name, if it's a directory, scan all the files if we are
912 recursing; if it's a file, grep it.
913
914 Arguments:
915 pathname the path to investigate
916 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
917 only_one_at_top TRUE if the path is the only one at toplevel
918
919 Returns: 0 if there was at least one match
920 1 if there were no matches
921 2 there was some kind of error
922
923 However, file opening failures are suppressed if "silent" is set.
924 */
925
926 static int
927 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
928 {
929 int rc = 1;
930 int sep;
931 FILE *in;
932
933 /* If the file name is "-" we scan stdin */
934
935 if (strcmp(pathname, "-") == 0)
936 {
937 return pcregrep(stdin,
938 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
939 stdin_name : NULL);
940 }
941
942
943 /* If the file is a directory, skip if skipping or if we are recursing, scan
944 each file within it, subject to any include or exclude patterns that were set.
945 The scanning code is localized so it can be made system-specific. */
946
947 if ((sep = isdirectory(pathname)) != 0)
948 {
949 if (dee_action == dee_SKIP) return 1;
950 if (dee_action == dee_RECURSE)
951 {
952 char buffer[1024];
953 char *nextfile;
954 directory_type *dir = opendirectory(pathname);
955
956 if (dir == NULL)
957 {
958 if (!silent)
959 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
960 strerror(errno));
961 return 2;
962 }
963
964 while ((nextfile = readdirectory(dir)) != NULL)
965 {
966 int frc, blen;
967 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
968 blen = strlen(buffer);
969
970 if (exclude_compiled != NULL &&
971 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
972 continue;
973
974 if (include_compiled != NULL &&
975 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
976 continue;
977
978 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
979 if (frc > 1) rc = frc;
980 else if (frc == 0 && rc == 1) rc = 0;
981 }
982
983 closedirectory(dir);
984 return rc;
985 }
986 }
987
988 /* If the file is not a directory and not a regular file, skip it if that's
989 been requested. */
990
991 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
992
993 /* Control reaches here if we have a regular file, or if we have a directory
994 and recursion or skipping was not requested, or if we have anything else and
995 skipping was not requested. The scan proceeds. If this is the first and only
996 argument at top level, we don't show the file name, unless we are only showing
997 the file name, or the filename was forced (-H). */
998
999 in = fopen(pathname, "r");
1000 if (in == NULL)
1001 {
1002 if (!silent)
1003 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1004 strerror(errno));
1005 return 2;
1006 }
1007
1008 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1009 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1010
1011 fclose(in);
1012 return rc;
1013 }
1014
1015
1016
1017
1018 /*************************************************
1019 * Usage function *
1020 *************************************************/
1021
1022 static int
1023 usage(int rc)
1024 {
1025 option_item *op;
1026 fprintf(stderr, "Usage: pcregrep [-");
1027 for (op = optionlist; op->one_char != 0; op++)
1028 {
1029 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1030 }
1031 fprintf(stderr, "] [long options] [pattern] [files]\n");
1032 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1033 return rc;
1034 }
1035
1036
1037
1038
1039 /*************************************************
1040 * Help function *
1041 *************************************************/
1042
1043 static void
1044 help(void)
1045 {
1046 option_item *op;
1047
1048 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1049 printf("Search for PATTERN in each FILE or standard input.\n");
1050 printf("PATTERN must be present if neither -e nor -f is used.\n");
1051 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1052 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1053
1054 printf("Options:\n");
1055
1056 for (op = optionlist; op->one_char != 0; op++)
1057 {
1058 int n;
1059 char s[4];
1060 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1061 printf(" %s --%s%n", s, op->long_name, &n);
1062 n = 30 - n;
1063 if (n < 1) n = 1;
1064 printf("%.*s%s\n", n, " ", op->help_text);
1065 }
1066
1067 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1068 printf("trailing white space is removed and blank lines are ignored.\n");
1069 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1070
1071 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1072 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1073 }
1074
1075
1076
1077
1078 /*************************************************
1079 * Handle a single-letter, no data option *
1080 *************************************************/
1081
1082 static int
1083 handle_option(int letter, int options)
1084 {
1085 switch(letter)
1086 {
1087 case N_HELP: help(); exit(0);
1088 case 'c': count_only = TRUE; break;
1089 case 'F': process_options |= PO_FIXED_STRINGS; break;
1090 case 'H': filenames = FN_FORCE; break;
1091 case 'h': filenames = FN_NONE; break;
1092 case 'i': options |= PCRE_CASELESS; break;
1093 case 'l': filenames = FN_ONLY; break;
1094 case 'L': filenames = FN_NOMATCH_ONLY; break;
1095 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1096 case 'n': number = TRUE; break;
1097 case 'o': only_matching = TRUE; break;
1098 case 'q': quiet = TRUE; break;
1099 case 'r': dee_action = dee_RECURSE; break;
1100 case 's': silent = TRUE; break;
1101 case 'u': options |= PCRE_UTF8; break;
1102 case 'v': invert = TRUE; break;
1103 case 'w': process_options |= PO_WORD_MATCH; break;
1104 case 'x': process_options |= PO_LINE_MATCH; break;
1105
1106 case 'V':
1107 fprintf(stderr, "pcregrep version %s using ", VERSION);
1108 fprintf(stderr, "PCRE version %s\n", pcre_version());
1109 exit(0);
1110 break;
1111
1112 default:
1113 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1114 exit(usage(2));
1115 }
1116
1117 return options;
1118 }
1119
1120
1121
1122
1123 /*************************************************
1124 * Construct printed ordinal *
1125 *************************************************/
1126
1127 /* This turns a number into "1st", "3rd", etc. */
1128
1129 static char *
1130 ordin(int n)
1131 {
1132 static char buffer[8];
1133 char *p = buffer;
1134 sprintf(p, "%d", n);
1135 while (*p != 0) p++;
1136 switch (n%10)
1137 {
1138 case 1: strcpy(p, "st"); break;
1139 case 2: strcpy(p, "nd"); break;
1140 case 3: strcpy(p, "rd"); break;
1141 default: strcpy(p, "th"); break;
1142 }
1143 return buffer;
1144 }
1145
1146
1147
1148 /*************************************************
1149 * Compile a single pattern *
1150 *************************************************/
1151
1152 /* When the -F option has been used, this is called for each substring.
1153 Otherwise it's called for each supplied pattern.
1154
1155 Arguments:
1156 pattern the pattern string
1157 options the PCRE options
1158 filename the file name, or NULL for a command-line pattern
1159 count 0 if this is the only command line pattern, or
1160 number of the command line pattern, or
1161 linenumber for a pattern from a file
1162
1163 Returns: TRUE on success, FALSE after an error
1164 */
1165
1166 static BOOL
1167 compile_single_pattern(char *pattern, int options, char *filename, int count)
1168 {
1169 char buffer[MBUFTHIRD + 16];
1170 const char *error;
1171 int errptr;
1172
1173 if (pattern_count >= MAX_PATTERN_COUNT)
1174 {
1175 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1176 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1177 return FALSE;
1178 }
1179
1180 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1181 suffix[process_options]);
1182 pattern_list[pattern_count] =
1183 pcre_compile(buffer, options, &error, &errptr, pcretables);
1184 if (pattern_list[pattern_count++] != NULL) return TRUE;
1185
1186 /* Handle compile errors */
1187
1188 errptr -= (int)strlen(prefix[process_options]);
1189 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1190
1191 if (filename == NULL)
1192 {
1193 if (count == 0)
1194 fprintf(stderr, "pcregrep: Error in command-line regex "
1195 "at offset %d: %s\n", errptr, error);
1196 else
1197 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1198 "at offset %d: %s\n", ordin(count), errptr, error);
1199 }
1200 else
1201 {
1202 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1203 "at offset %d: %s\n", count, filename, errptr, error);
1204 }
1205
1206 return FALSE;
1207 }
1208
1209
1210
1211 /*************************************************
1212 * Compile one supplied pattern *
1213 *************************************************/
1214
1215 /* When the -F option has been used, each string may be a list of strings,
1216 separated by line breaks. They will be matched literally.
1217
1218 Arguments:
1219 pattern the pattern string
1220 options the PCRE options
1221 filename the file name, or NULL for a command-line pattern
1222 count 0 if this is the only command line pattern, or
1223 number of the command line pattern, or
1224 linenumber for a pattern from a file
1225
1226 Returns: TRUE on success, FALSE after an error
1227 */
1228
1229 static BOOL
1230 compile_pattern(char *pattern, int options, char *filename, int count)
1231 {
1232 if ((process_options & PO_FIXED_STRINGS) != 0)
1233 {
1234 char buffer[MBUFTHIRD];
1235 for(;;)
1236 {
1237 char *p = strchr(pattern, endlinebyte);
1238 if (p == NULL)
1239 return compile_single_pattern(pattern, options, filename, count);
1240 sprintf(buffer, "%.*s", p - pattern - endlineextra, pattern);
1241 pattern = p + 1;
1242 if (!compile_single_pattern(buffer, options, filename, count))
1243 return FALSE;
1244 }
1245 }
1246 else return compile_single_pattern(pattern, options, filename, count);
1247 }
1248
1249
1250
1251 /*************************************************
1252 * Main program *
1253 *************************************************/
1254
1255 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1256
1257 int
1258 main(int argc, char **argv)
1259 {
1260 int i, j;
1261 int rc = 1;
1262 int pcre_options = 0;
1263 int cmd_pattern_count = 0;
1264 int errptr;
1265 BOOL only_one_at_top;
1266 char *patterns[MAX_PATTERN_COUNT];
1267 const char *locale_from = "--locale";
1268 const char *error;
1269
1270 /* Set the default line ending value from the default in the PCRE library. */
1271
1272 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1273 switch(i)
1274 {
1275 default: newline = (char *)"lf"; break;
1276 case '\r': newline = (char *)"cr"; break;
1277 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1278 }
1279
1280 /* Process the options */
1281
1282 for (i = 1; i < argc; i++)
1283 {
1284 option_item *op = NULL;
1285 char *option_data = (char *)""; /* default to keep compiler happy */
1286 BOOL longop;
1287 BOOL longopwasequals = FALSE;
1288
1289 if (argv[i][0] != '-') break;
1290
1291 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1292 but only if we have previously had -e or -f to define the patterns. */
1293
1294 if (argv[i][1] == 0)
1295 {
1296 if (pattern_filename != NULL || pattern_count > 0) break;
1297 else exit(usage(2));
1298 }
1299
1300 /* Handle a long name option, or -- to terminate the options */
1301
1302 if (argv[i][1] == '-')
1303 {
1304 char *arg = argv[i] + 2;
1305 char *argequals = strchr(arg, '=');
1306
1307 if (*arg == 0) /* -- terminates options */
1308 {
1309 i++;
1310 break; /* out of the options-handling loop */
1311 }
1312
1313 longop = TRUE;
1314
1315 /* Some long options have data that follows after =, for example file=name.
1316 Some options have variations in the long name spelling: specifically, we
1317 allow "regexp" because GNU grep allows it, though I personally go along
1318 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1319 These options are entered in the table as "regex(p)". No option is in both
1320 these categories, fortunately. */
1321
1322 for (op = optionlist; op->one_char != 0; op++)
1323 {
1324 char *opbra = strchr(op->long_name, '(');
1325 char *equals = strchr(op->long_name, '=');
1326 if (opbra == NULL) /* Not a (p) case */
1327 {
1328 if (equals == NULL) /* Not thing=data case */
1329 {
1330 if (strcmp(arg, op->long_name) == 0) break;
1331 }
1332 else /* Special case xxx=data */
1333 {
1334 int oplen = equals - op->long_name;
1335 int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1336 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1337 {
1338 option_data = arg + arglen;
1339 if (*option_data == '=')
1340 {
1341 option_data++;
1342 longopwasequals = TRUE;
1343 }
1344 break;
1345 }
1346 }
1347 }
1348 else /* Special case xxxx(p) */
1349 {
1350 char buff1[24];
1351 char buff2[24];
1352 int baselen = opbra - op->long_name;
1353 sprintf(buff1, "%.*s", baselen, op->long_name);
1354 sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1355 opbra + 1);
1356 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1357 break;
1358 }
1359 }
1360
1361 if (op->one_char == 0)
1362 {
1363 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1364 exit(usage(2));
1365 }
1366 }
1367
1368
1369 /* Jeffrey Friedl's debugging harness uses these additional options which
1370 are not in the right form for putting in the option table because they use
1371 only one hyphen, yet are more than one character long. By putting them
1372 separately here, they will not get displayed as part of the help() output,
1373 but I don't think Jeffrey will care about that. */
1374
1375 #ifdef JFRIEDL_DEBUG
1376 else if (strcmp(argv[i], "-pre") == 0) {
1377 jfriedl_prefix = argv[++i];
1378 continue;
1379 } else if (strcmp(argv[i], "-post") == 0) {
1380 jfriedl_postfix = argv[++i];
1381 continue;
1382 } else if (strcmp(argv[i], "-XT") == 0) {
1383 sscanf(argv[++i], "%d", &jfriedl_XT);
1384 continue;
1385 } else if (strcmp(argv[i], "-XR") == 0) {
1386 sscanf(argv[++i], "%d", &jfriedl_XR);
1387 continue;
1388 }
1389 #endif
1390
1391
1392 /* One-char options; many that have no data may be in a single argument; we
1393 continue till we hit the last one or one that needs data. */
1394
1395 else
1396 {
1397 char *s = argv[i] + 1;
1398 longop = FALSE;
1399 while (*s != 0)
1400 {
1401 for (op = optionlist; op->one_char != 0; op++)
1402 { if (*s == op->one_char) break; }
1403 if (op->one_char == 0)
1404 {
1405 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1406 *s, argv[i]);
1407 exit(usage(2));
1408 }
1409 if (op->type != OP_NODATA || s[1] == 0)
1410 {
1411 option_data = s+1;
1412 break;
1413 }
1414 pcre_options = handle_option(*s++, pcre_options);
1415 }
1416 }
1417
1418 /* At this point we should have op pointing to a matched option. If the type
1419 is NO_DATA, it means that there is no data, and the option might set
1420 something in the PCRE options. */
1421
1422 if (op->type == OP_NODATA)
1423 {
1424 pcre_options = handle_option(op->one_char, pcre_options);
1425 continue;
1426 }
1427
1428 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1429 either has a value or defaults to something. It cannot have data in a
1430 separate item. At the moment, the only such options are "colo(u)r" and
1431 Jeffrey Friedl's special -S debugging option. */
1432
1433 if (*option_data == 0 &&
1434 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1435 {
1436 switch (op->one_char)
1437 {
1438 case N_COLOUR:
1439 colour_option = (char *)"auto";
1440 break;
1441 #ifdef JFRIEDL_DEBUG
1442 case 'S':
1443 S_arg = 0;
1444 break;
1445 #endif
1446 }
1447 continue;
1448 }
1449
1450 /* Otherwise, find the data string for the option. */
1451
1452 if (*option_data == 0)
1453 {
1454 if (i >= argc - 1 || longopwasequals)
1455 {
1456 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1457 exit(usage(2));
1458 }
1459 option_data = argv[++i];
1460 }
1461
1462 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1463 multiple times to create a list of patterns. */
1464
1465 if (op->type == OP_PATLIST)
1466 {
1467 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1468 {
1469 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1470 MAX_PATTERN_COUNT);
1471 return 2;
1472 }
1473 patterns[cmd_pattern_count++] = option_data;
1474 }
1475
1476 /* Otherwise, deal with single string or numeric data values. */
1477
1478 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1479 {
1480 *((char **)op->dataptr) = option_data;
1481 }
1482 else
1483 {
1484 char *endptr;
1485 int n = strtoul(option_data, &endptr, 10);
1486 if (*endptr != 0)
1487 {
1488 if (longop)
1489 {
1490 char *equals = strchr(op->long_name, '=');
1491 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1492 equals - op->long_name;
1493 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1494 option_data, nlen, op->long_name);
1495 }
1496 else
1497 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1498 option_data, op->one_char);
1499 exit(usage(2));
1500 }
1501 *((int *)op->dataptr) = n;
1502 }
1503 }
1504
1505 /* Options have been decoded. If -C was used, its value is used as a default
1506 for -A and -B. */
1507
1508 if (both_context > 0)
1509 {
1510 if (after_context == 0) after_context = both_context;
1511 if (before_context == 0) before_context = both_context;
1512 }
1513
1514 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1515 LC_ALL environment variable is set, and if so, use it. */
1516
1517 if (locale == NULL)
1518 {
1519 locale = getenv("LC_ALL");
1520 locale_from = "LCC_ALL";
1521 }
1522
1523 if (locale == NULL)
1524 {
1525 locale = getenv("LC_CTYPE");
1526 locale_from = "LC_CTYPE";
1527 }
1528
1529 /* If a locale has been provided, set it, and generate the tables the PCRE
1530 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1531
1532 if (locale != NULL)
1533 {
1534 if (setlocale(LC_CTYPE, locale) == NULL)
1535 {
1536 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1537 locale, locale_from);
1538 return 2;
1539 }
1540 pcretables = pcre_maketables();
1541 }
1542
1543 /* Sort out colouring */
1544
1545 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1546 {
1547 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1548 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1549 else
1550 {
1551 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1552 colour_option);
1553 return 2;
1554 }
1555 if (do_colour)
1556 {
1557 char *cs = getenv("PCREGREP_COLOUR");
1558 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1559 if (cs != NULL) colour_string = cs;
1560 }
1561 }
1562
1563 /* Interpret the newline type; the default settings are Unix-like. */
1564
1565 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1566 {
1567 pcre_options |= PCRE_NEWLINE_CR;
1568 endlinebyte = '\r';
1569 }
1570 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1571 {
1572 pcre_options |= PCRE_NEWLINE_LF;
1573 }
1574 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1575 {
1576 pcre_options |= PCRE_NEWLINE_CRLF;
1577 endlineextra = 1;
1578 }
1579 else
1580 {
1581 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1582 return 2;
1583 }
1584
1585 /* Interpret the text values for -d and -D */
1586
1587 if (dee_option != NULL)
1588 {
1589 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1590 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1591 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1592 else
1593 {
1594 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1595 return 2;
1596 }
1597 }
1598
1599 if (DEE_option != NULL)
1600 {
1601 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1602 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1603 else
1604 {
1605 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1606 return 2;
1607 }
1608 }
1609
1610 /* Check the values for Jeffrey Friedl's debugging options. */
1611
1612 #ifdef JFRIEDL_DEBUG
1613 if (S_arg > 9)
1614 {
1615 fprintf(stderr, "pcregrep: bad value for -S option\n");
1616 return 2;
1617 }
1618 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1619 {
1620 if (jfriedl_XT == 0) jfriedl_XT = 1;
1621 if (jfriedl_XR == 0) jfriedl_XR = 1;
1622 }
1623 #endif
1624
1625 /* Get memory to store the pattern and hints lists. */
1626
1627 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1628 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1629
1630 if (pattern_list == NULL || hints_list == NULL)
1631 {
1632 fprintf(stderr, "pcregrep: malloc failed\n");
1633 return 2;
1634 }
1635
1636 /* If no patterns were provided by -e, and there is no file provided by -f,
1637 the first argument is the one and only pattern, and it must exist. */
1638
1639 if (cmd_pattern_count == 0 && pattern_filename == NULL)
1640 {
1641 if (i >= argc) return usage(2);
1642 patterns[cmd_pattern_count++] = argv[i++];
1643 }
1644
1645 /* Compile the patterns that were provided on the command line, either by
1646 multiple uses of -e or as a single unkeyed pattern. */
1647
1648 for (j = 0; j < cmd_pattern_count; j++)
1649 {
1650 if (!compile_pattern(patterns[j], pcre_options, NULL,
1651 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1652 return 2;
1653 }
1654
1655 /* Compile the regular expressions that are provided in a file. */
1656
1657 if (pattern_filename != NULL)
1658 {
1659 int linenumber = 0;
1660 FILE *f;
1661 char *filename;
1662 char buffer[MBUFTHIRD];
1663
1664 if (strcmp(pattern_filename, "-") == 0)
1665 {
1666 f = stdin;
1667 filename = stdin_name;
1668 }
1669 else
1670 {
1671 f = fopen(pattern_filename, "r");
1672 if (f == NULL)
1673 {
1674 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1675 strerror(errno));
1676 return 2;
1677 }
1678 filename = pattern_filename;
1679 }
1680
1681 while (fgets(buffer, MBUFTHIRD, f) != NULL)
1682 {
1683 char *s = buffer + (int)strlen(buffer);
1684 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1685 *s = 0;
1686 linenumber++;
1687 if (buffer[0] == 0) continue; /* Skip blank lines */
1688 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1689 return 2;
1690 }
1691
1692 if (f != stdin) fclose(f);
1693 }
1694
1695 /* Study the regular expressions, as we will be running them many times */
1696
1697 for (j = 0; j < pattern_count; j++)
1698 {
1699 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1700 if (error != NULL)
1701 {
1702 char s[16];
1703 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1704 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1705 return 2;
1706 }
1707 }
1708
1709 /* If there are include or exclude patterns, compile them. */
1710
1711 if (exclude_pattern != NULL)
1712 {
1713 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1714 pcretables);
1715 if (exclude_compiled == NULL)
1716 {
1717 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1718 errptr, error);
1719 return 2;
1720 }
1721 }
1722
1723 if (include_pattern != NULL)
1724 {
1725 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1726 pcretables);
1727 if (include_compiled == NULL)
1728 {
1729 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1730 errptr, error);
1731 return 2;
1732 }
1733 }
1734
1735 /* If there are no further arguments, do the business on stdin and exit. */
1736
1737 if (i >= argc)
1738 return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1739
1740 /* Otherwise, work through the remaining arguments as files or directories.
1741 Pass in the fact that there is only one argument at top level - this suppresses
1742 the file name if the argument is not a directory and filenames are not
1743 otherwise forced. */
1744
1745 only_one_at_top = i == argc - 1; /* Catch initial value of i */
1746
1747 for (; i < argc; i++)
1748 {
1749 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1750 only_one_at_top);
1751 if (frc > 1) rc = frc;
1752 else if (frc == 0 && rc == 1) rc = 0;
1753 }
1754
1755 return rc;
1756 }
1757
1758 /* End of pcregrep */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12