/[pcre]/code/tags/pcre-6.3/pcregrep.c
ViewVC logotype

Contents of /code/tags/pcre-6.3/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 84 - (show annotations) (download)
Sat Feb 24 21:41:08 2007 UTC (7 years, 6 months ago) by nigel
File MIME type: text/plain
File size: 33424 byte(s)
Tag code/trunk as code/tags/pcre-6.3.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2005 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #include <ctype.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <stdlib.h>
44 #include <errno.h>
45
46 #include <sys/types.h>
47 #include <sys/stat.h>
48 #include <unistd.h>
49
50 #include "config.h"
51 #include "pcre.h"
52
53 #define FALSE 0
54 #define TRUE 1
55
56 typedef int BOOL;
57
58 #define VERSION "4.0 07-Jun-2005"
59 #define MAX_PATTERN_COUNT 100
60
61 #if BUFSIZ > 8192
62 #define MBUFTHIRD BUFSIZ
63 #else
64 #define MBUFTHIRD 8192
65 #endif
66
67
68
69 /*************************************************
70 * Global variables *
71 *************************************************/
72
73 static char *pattern_filename = NULL;
74 static char *stdin_name = (char *)"(standard input)";
75 static int pattern_count = 0;
76 static pcre **pattern_list;
77 static pcre_extra **hints_list;
78
79 static char *include_pattern = NULL;
80 static char *exclude_pattern = NULL;
81
82 static pcre *include_compiled = NULL;
83 static pcre *exclude_compiled = NULL;
84
85 static int after_context = 0;
86 static int before_context = 0;
87 static int both_context = 0;
88
89 static BOOL count_only = FALSE;
90 static BOOL filenames = TRUE;
91 static BOOL filenames_only = FALSE;
92 static BOOL filenames_nomatch_only = FALSE;
93 static BOOL hyphenpending = FALSE;
94 static BOOL invert = FALSE;
95 static BOOL multiline = FALSE;
96 static BOOL number = FALSE;
97 static BOOL quiet = FALSE;
98 static BOOL recurse = FALSE;
99 static BOOL silent = FALSE;
100 static BOOL whole_lines = FALSE;
101 static BOOL word_match = FALSE;
102
103 /* Structure for options and list of them */
104
105 enum { OP_NODATA, OP_STRING, OP_NUMBER };
106
107 typedef struct option_item {
108 int type;
109 int one_char;
110 void *dataptr;
111 const char *long_name;
112 const char *help_text;
113 } option_item;
114
115 static option_item optionlist[] = {
116 { OP_NODATA, -1, NULL, "", " terminate options" },
117 { OP_NODATA, -1, NULL, "help", "display this help and exit" },
118 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
119 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
120 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
121 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
122 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
123 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
124 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
125 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
126 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
127 { OP_STRING, -1, &stdin_name, "label=name", "set name for standard input" },
128 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
129 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
130 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
131 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
132 { OP_STRING, -1, &exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
133 { OP_STRING, -1, &include_pattern, "include=pattern","include matching files when recursing" },
134 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
135 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
136 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
137 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
138 { OP_NODATA, 'w', NULL, "word-regex(p)", "force PATTERN to match only as a word" },
139 { OP_NODATA, 'x', NULL, "line-regex(p)", "force PATTERN to match only whole lines" },
140 { OP_NODATA, 0, NULL, NULL, NULL }
141 };
142
143
144 /*************************************************
145 * Functions for directory scanning *
146 *************************************************/
147
148 /* These functions are defined so that they can be made system specific,
149 although at present the only ones are for Unix, Win32, and for "no directory
150 recursion support". */
151
152
153 /************* Directory scanning in Unix ***********/
154
155 #if IS_UNIX
156 #include <sys/types.h>
157 #include <sys/stat.h>
158 #include <dirent.h>
159
160 typedef DIR directory_type;
161
162 static int
163 isdirectory(char *filename)
164 {
165 struct stat statbuf;
166 if (stat(filename, &statbuf) < 0)
167 return 0; /* In the expectation that opening as a file will fail */
168 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
169 }
170
171 static directory_type *
172 opendirectory(char *filename)
173 {
174 return opendir(filename);
175 }
176
177 static char *
178 readdirectory(directory_type *dir)
179 {
180 for (;;)
181 {
182 struct dirent *dent = readdir(dir);
183 if (dent == NULL) return NULL;
184 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
185 return dent->d_name;
186 }
187 return NULL; /* Keep compiler happy; never executed */
188 }
189
190 static void
191 closedirectory(directory_type *dir)
192 {
193 closedir(dir);
194 }
195
196
197 /************* Directory scanning in Win32 ***********/
198
199 /* I (Philip Hazel) have no means of testing this code. It was contributed by
200 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
201 when it did not exist. */
202
203
204 #elif HAVE_WIN32API
205
206 #ifndef STRICT
207 # define STRICT
208 #endif
209 #ifndef WIN32_LEAN_AND_MEAN
210 # define WIN32_LEAN_AND_MEAN
211 #endif
212 #ifndef INVALID_FILE_ATTRIBUTES
213 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
214 #endif
215
216 #include <windows.h>
217
218 typedef struct directory_type
219 {
220 HANDLE handle;
221 BOOL first;
222 WIN32_FIND_DATA data;
223 } directory_type;
224
225 int
226 isdirectory(char *filename)
227 {
228 DWORD attr = GetFileAttributes(filename);
229 if (attr == INVALID_FILE_ATTRIBUTES)
230 return 0;
231 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
232 }
233
234 directory_type *
235 opendirectory(char *filename)
236 {
237 size_t len;
238 char *pattern;
239 directory_type *dir;
240 DWORD err;
241 len = strlen(filename);
242 pattern = (char *) malloc(len + 3);
243 dir = (directory_type *) malloc(sizeof(*dir));
244 if ((pattern == NULL) || (dir == NULL))
245 {
246 fprintf(stderr, "pcregrep: malloc failed\n");
247 exit(2);
248 }
249 memcpy(pattern, filename, len);
250 memcpy(&(pattern[len]), "\\*", 3);
251 dir->handle = FindFirstFile(pattern, &(dir->data));
252 if (dir->handle != INVALID_HANDLE_VALUE)
253 {
254 free(pattern);
255 dir->first = TRUE;
256 return dir;
257 }
258 err = GetLastError();
259 free(pattern);
260 free(dir);
261 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
262 return NULL;
263 }
264
265 char *
266 readdirectory(directory_type *dir)
267 {
268 for (;;)
269 {
270 if (!dir->first)
271 {
272 if (!FindNextFile(dir->handle, &(dir->data)))
273 return NULL;
274 }
275 else
276 {
277 dir->first = FALSE;
278 }
279 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
280 return dir->data.cFileName;
281 }
282 #ifndef _MSC_VER
283 return NULL; /* Keep compiler happy; never executed */
284 #endif
285 }
286
287 void
288 closedirectory(directory_type *dir)
289 {
290 FindClose(dir->handle);
291 free(dir);
292 }
293
294
295 /************* Directory scanning when we can't do it ***********/
296
297 /* The type is void, and apart from isdirectory(), the functions do nothing. */
298
299 #else
300
301 typedef void directory_type;
302
303 int isdirectory(char *filename) { return FALSE; }
304 directory_type * opendirectory(char *filename) {}
305 char *readdirectory(directory_type *dir) {}
306 void closedirectory(directory_type *dir) {}
307
308 #endif
309
310
311
312 #if ! HAVE_STRERROR
313 /*************************************************
314 * Provide strerror() for non-ANSI libraries *
315 *************************************************/
316
317 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
318 in their libraries, but can provide the same facility by this simple
319 alternative function. */
320
321 extern int sys_nerr;
322 extern char *sys_errlist[];
323
324 char *
325 strerror(int n)
326 {
327 if (n < 0 || n >= sys_nerr) return "unknown error number";
328 return sys_errlist[n];
329 }
330 #endif /* HAVE_STRERROR */
331
332
333
334 /*************************************************
335 * Print the previous "after" lines *
336 *************************************************/
337
338 /* This is called if we are about to lose said lines because of buffer filling,
339 and at the end of the file.
340
341 Arguments:
342 lastmatchnumber the number of the last matching line, plus one
343 lastmatchrestart where we restarted after the last match
344 endptr end of available data
345 printname filename for printing
346
347 Returns: nothing
348 */
349
350 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
351 char *endptr, char *printname)
352 {
353 if (after_context > 0 && lastmatchnumber > 0)
354 {
355 int count = 0;
356 while (lastmatchrestart < endptr && count++ < after_context)
357 {
358 char *pp = lastmatchrestart;
359 if (printname != NULL) fprintf(stdout, "%s-", printname);
360 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
361 while (*pp != '\n') pp++;
362 fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
363 lastmatchrestart = pp + 1;
364 }
365 hyphenpending = TRUE;
366 }
367 }
368
369
370
371 /*************************************************
372 * Grep an individual file *
373 *************************************************/
374
375 /* This is called from grep_or_recurse() below. It uses a buffer that is three
376 times the value of MBUFTHIRD. The matching point is never allowed to stray into
377 the top third of the buffer, thus keeping more of the file available for
378 context printing or for multiline scanning. For large files, the pointer will
379 be in the middle third most of the time, so the bottom third is available for
380 "before" context printing.
381
382 Arguments:
383 in the fopened FILE stream
384 printname the file name if it is to be printed for each match
385 or NULL if the file name is not to be printed
386 it cannot be NULL if filenames[_nomatch]_only is set
387
388 Returns: 0 if there was at least one match
389 1 otherwise (no matches)
390 */
391
392 static int
393 pcregrep(FILE *in, char *printname)
394 {
395 int rc = 1;
396 int linenumber = 1;
397 int lastmatchnumber = 0;
398 int count = 0;
399 int offsets[99];
400 char *lastmatchrestart = NULL;
401 char buffer[3*MBUFTHIRD];
402 char *ptr = buffer;
403 char *endptr;
404 size_t bufflength;
405 BOOL endhyphenpending = FALSE;
406
407 /* Do the first read into the start of the buffer and set up the pointer to
408 end of what we have. */
409
410 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
411 endptr = buffer + bufflength;
412
413 /* Loop while the current pointer is not at the end of the file. For large
414 files, endptr will be at the end of the buffer when we are in the middle of the
415 file, but ptr will never get there, because as soon as it gets over 2/3 of the
416 way, the buffer is shifted left and re-filled. */
417
418 while (ptr < endptr)
419 {
420 int i;
421 BOOL match = FALSE;
422 char *t = ptr;
423 size_t length, linelength;
424
425 /* At this point, ptr is at the start of a line. We need to find the length
426 of the subject string to pass to pcre_exec(). In multiline mode, it is the
427 length remainder of the data in the buffer. Otherwise, it is the length of
428 the next line. After matching, we always advance by the length of the next
429 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
430 that any match is constrained to be in the first line. */
431
432 linelength = 0;
433 while (t < endptr && *t++ != '\n') linelength++;
434 length = multiline? endptr - ptr : linelength;
435
436 /* Run through all the patterns until one matches. Note that we don't include
437 the final newline in the subject string. */
438
439 for (i = 0; !match && i < pattern_count; i++)
440 {
441 match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
442 offsets, 99) >= 0;
443 }
444
445 /* If it's a match or a not-match (as required), print what's wanted. */
446
447 if (match != invert)
448 {
449 BOOL hyphenprinted = FALSE;
450
451 if (filenames_nomatch_only) return 1;
452
453 if (count_only) count++;
454
455 else if (filenames_only)
456 {
457 fprintf(stdout, "%s\n", printname);
458 return 0;
459 }
460
461 else if (quiet) return 0;
462
463 else
464 {
465 /* See if there is a requirement to print some "after" lines from a
466 previous match. We never print any overlaps. */
467
468 if (after_context > 0 && lastmatchnumber > 0)
469 {
470 int linecount = 0;
471 char *p = lastmatchrestart;
472
473 while (p < ptr && linecount < after_context)
474 {
475 while (*p != '\n') p++;
476 p++;
477 linecount++;
478 }
479
480 /* It is important to advance lastmatchrestart during this printing so
481 that it interacts correctly with any "before" printing below. */
482
483 while (lastmatchrestart < p)
484 {
485 char *pp = lastmatchrestart;
486 if (printname != NULL) fprintf(stdout, "%s-", printname);
487 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
488 while (*pp != '\n') pp++;
489 fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
490 lastmatchrestart = pp + 1;
491 }
492 if (lastmatchrestart != ptr) hyphenpending = TRUE;
493 }
494
495 /* If there were non-contiguous lines printed above, insert hyphens. */
496
497 if (hyphenpending)
498 {
499 fprintf(stdout, "--\n");
500 hyphenpending = FALSE;
501 hyphenprinted = TRUE;
502 }
503
504 /* See if there is a requirement to print some "before" lines for this
505 match. Again, don't print overlaps. */
506
507 if (before_context > 0)
508 {
509 int linecount = 0;
510 char *p = ptr;
511
512 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
513 linecount++ < before_context)
514 {
515 p--;
516 while (p > buffer && p[-1] != '\n') p--;
517 }
518
519 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
520 fprintf(stdout, "--\n");
521
522 while (p < ptr)
523 {
524 char *pp = p;
525 if (printname != NULL) fprintf(stdout, "%s-", printname);
526 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
527 while (*pp != '\n') pp++;
528 fprintf(stdout, "%.*s", pp - p + 1, p);
529 p = pp + 1;
530 }
531 }
532
533 /* Now print the matching line(s); ensure we set hyphenpending at the end
534 of the file. */
535
536 endhyphenpending = TRUE;
537 if (printname != NULL) fprintf(stdout, "%s:", printname);
538 if (number) fprintf(stdout, "%d:", linenumber);
539
540 /* In multiline mode, we want to print to the end of the line in which
541 the end of the matched string is found, so we adjust linelength and the
542 line number appropriately. Because the PCRE_FIRSTLINE option is set, the
543 start of the match will always be before the first \n character. */
544
545 if (multiline)
546 {
547 char *endmatch = ptr + offsets[1];
548 t = ptr;
549 while (t < endmatch) { if (*t++ == '\n') linenumber++; }
550 while (endmatch < endptr && *endmatch != '\n') endmatch++;
551 linelength = endmatch - ptr;
552 }
553
554 fprintf(stdout, "%.*s\n", linelength, ptr);
555 }
556
557 rc = 0; /* Had some success */
558
559 /* Remember where the last match happened for after_context. We remember
560 where we are about to restart, and that line's number. */
561
562 lastmatchrestart = ptr + linelength + 1;
563 lastmatchnumber = linenumber + 1;
564 }
565
566 /* Advance to after the newline and increment the line number. */
567
568 ptr += linelength + 1;
569 linenumber++;
570
571 /* If we haven't yet reached the end of the file (the buffer is full), and
572 the current point is in the top 1/3 of the buffer, slide the buffer down by
573 1/3 and refill it. Before we do this, if some unprinted "after" lines are
574 about to be lost, print them. */
575
576 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
577 {
578 if (after_context > 0 &&
579 lastmatchnumber > 0 &&
580 lastmatchrestart < buffer + MBUFTHIRD)
581 {
582 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
583 lastmatchnumber = 0;
584 }
585
586 /* Now do the shuffle */
587
588 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
589 ptr -= MBUFTHIRD;
590 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
591 endptr = buffer + bufflength;
592
593 /* Adjust any last match point */
594
595 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
596 }
597 } /* Loop through the whole file */
598
599 /* End of file; print final "after" lines if wanted; do_after_lines sets
600 hyphenpending if it prints something. */
601
602 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
603 hyphenpending |= endhyphenpending;
604
605 /* Print the file name if we are looking for those without matches and there
606 were none. If we found a match, we won't have got this far. */
607
608 if (filenames_nomatch_only)
609 {
610 fprintf(stdout, "%s\n", printname);
611 return 0;
612 }
613
614 /* Print the match count if wanted */
615
616 if (count_only)
617 {
618 if (printname != NULL) fprintf(stdout, "%s:", printname);
619 fprintf(stdout, "%d\n", count);
620 }
621
622 return rc;
623 }
624
625
626
627 /*************************************************
628 * Grep a file or recurse into a directory *
629 *************************************************/
630
631 /* Given a path name, if it's a directory, scan all the files if we are
632 recursing; if it's a file, grep it.
633
634 Arguments:
635 pathname the path to investigate
636 dir_recurse TRUE if recursing is wanted (-r)
637 show_filenames TRUE if file names are wanted for multiple files, except
638 for the only file at top level when not filenames_only
639 only_one_at_top TRUE if the path is the only one at toplevel
640
641 Returns: 0 if there was at least one match
642 1 if there were no matches
643 2 there was some kind of error
644
645 However, file opening failures are suppressed if "silent" is set.
646 */
647
648 static int
649 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,
650 BOOL only_one_at_top)
651 {
652 int rc = 1;
653 int sep;
654 FILE *in;
655 char *printname;
656
657 /* If the file name is "-" we scan stdin */
658
659 if (strcmp(pathname, "-") == 0)
660 {
661 return pcregrep(stdin,
662 (filenames_only || filenames_nomatch_only ||
663 (show_filenames && !only_one_at_top))?
664 stdin_name : NULL);
665 }
666
667 /* If the file is a directory and we are recursing, scan each file within it,
668 subject to any include or exclude patterns that were set. The scanning code is
669 localized so it can be made system-specific. */
670
671 if ((sep = isdirectory(pathname)) != 0 && dir_recurse)
672 {
673 char buffer[1024];
674 char *nextfile;
675 directory_type *dir = opendirectory(pathname);
676
677 if (dir == NULL)
678 {
679 if (!silent)
680 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
681 strerror(errno));
682 return 2;
683 }
684
685 while ((nextfile = readdirectory(dir)) != NULL)
686 {
687 int frc, blen;
688 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
689 blen = strlen(buffer);
690
691 if (exclude_compiled != NULL &&
692 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
693 continue;
694
695 if (include_compiled != NULL &&
696 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
697 continue;
698
699 frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);
700 if (frc > 1) rc = frc;
701 else if (frc == 0 && rc == 1) rc = 0;
702 }
703
704 closedirectory(dir);
705 return rc;
706 }
707
708 /* If the file is not a directory, or we are not recursing, scan it. If this is
709 the first and only argument at top level, we don't show the file name (unless
710 we are only showing the file name). Otherwise, control is via the
711 show_filenames variable. */
712
713 in = fopen(pathname, "r");
714 if (in == NULL)
715 {
716 if (!silent)
717 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
718 strerror(errno));
719 return 2;
720 }
721
722 printname = (filenames_only || filenames_nomatch_only ||
723 (show_filenames && !only_one_at_top))? pathname : NULL;
724
725 rc = pcregrep(in, printname);
726
727 fclose(in);
728 return rc;
729 }
730
731
732
733
734 /*************************************************
735 * Usage function *
736 *************************************************/
737
738 static int
739 usage(int rc)
740 {
741 fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");
742 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
743 return rc;
744 }
745
746
747
748
749 /*************************************************
750 * Help function *
751 *************************************************/
752
753 static void
754 help(void)
755 {
756 option_item *op;
757
758 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
759 printf("Search for PATTERN in each FILE or standard input.\n");
760 printf("PATTERN must be present if -f is not used.\n");
761 printf("\"-\" can be used as a file name to mean STDIN.\n");
762 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
763
764 printf("Options:\n");
765
766 for (op = optionlist; op->one_char != 0; op++)
767 {
768 int n;
769 char s[4];
770 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
771 printf(" %s --%s%n", s, op->long_name, &n);
772 n = 30 - n;
773 if (n < 1) n = 1;
774 printf("%.*s%s\n", n, " ", op->help_text);
775 }
776
777 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
778 printf("trailing white space is removed and blank lines are ignored.\n");
779 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
780
781 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
782 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
783 }
784
785
786
787
788 /*************************************************
789 * Handle a single-letter, no data option *
790 *************************************************/
791
792 static int
793 handle_option(int letter, int options)
794 {
795 switch(letter)
796 {
797 case -1: help(); exit(0);
798 case 'c': count_only = TRUE; break;
799 case 'h': filenames = FALSE; break;
800 case 'i': options |= PCRE_CASELESS; break;
801 case 'l': filenames_only = TRUE; break;
802 case 'L': filenames_nomatch_only = TRUE; break;
803 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
804 case 'n': number = TRUE; break;
805 case 'q': quiet = TRUE; break;
806 case 'r': recurse = TRUE; break;
807 case 's': silent = TRUE; break;
808 case 'u': options |= PCRE_UTF8; break;
809 case 'v': invert = TRUE; break;
810 case 'w': word_match = TRUE; break;
811 case 'x': whole_lines = TRUE; break;
812
813 case 'V':
814 fprintf(stderr, "pcregrep version %s using ", VERSION);
815 fprintf(stderr, "PCRE version %s\n", pcre_version());
816 exit(0);
817 break;
818
819 default:
820 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
821 exit(usage(2));
822 }
823
824 return options;
825 }
826
827
828
829
830 /*************************************************
831 * Main program *
832 *************************************************/
833
834 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
835
836 int
837 main(int argc, char **argv)
838 {
839 int i, j;
840 int rc = 1;
841 int options = 0;
842 int errptr;
843 const char *error;
844 BOOL only_one_at_top;
845
846 /* Process the options */
847
848 for (i = 1; i < argc; i++)
849 {
850 option_item *op = NULL;
851 char *option_data = (char *)""; /* default to keep compiler happy */
852 BOOL longop;
853 BOOL longopwasequals = FALSE;
854
855 if (argv[i][0] != '-') break;
856
857 /* If we hit an argument that is just "-", it may be a reference to STDIN,
858 but only if we have previously had -f to define the patterns. */
859
860 if (argv[i][1] == 0)
861 {
862 if (pattern_filename != NULL) break;
863 else exit(usage(2));
864 }
865
866 /* Handle a long name option, or -- to terminate the options */
867
868 if (argv[i][1] == '-')
869 {
870 char *arg = argv[i] + 2;
871 char *argequals = strchr(arg, '=');
872
873 if (*arg == 0) /* -- terminates options */
874 {
875 i++;
876 break; /* out of the options-handling loop */
877 }
878
879 longop = TRUE;
880
881 /* Some long options have data that follows after =, for example file=name.
882 Some options have variations in the long name spelling: specifically, we
883 allow "regexp" because GNU grep allows it, though I personally go along
884 with Jeff Friedl in preferring "regex" without the "p". These options are
885 entered in the table as "regex(p)". No option is in both these categories,
886 fortunately. */
887
888 for (op = optionlist; op->one_char != 0; op++)
889 {
890 char *opbra = strchr(op->long_name, '(');
891 char *equals = strchr(op->long_name, '=');
892 if (opbra == NULL) /* Not a (p) case */
893 {
894 if (equals == NULL) /* Not thing=data case */
895 {
896 if (strcmp(arg, op->long_name) == 0) break;
897 }
898 else /* Special case xxx=data */
899 {
900 int oplen = equals - op->long_name;
901 int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
902 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
903 {
904 option_data = arg + arglen;
905 if (*option_data == '=')
906 {
907 option_data++;
908 longopwasequals = TRUE;
909 }
910 break;
911 }
912 }
913 }
914 else /* Special case xxxx(p) */
915 {
916 char buff1[24];
917 char buff2[24];
918 int baselen = opbra - op->long_name;
919 sprintf(buff1, "%.*s", baselen, op->long_name);
920 sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
921 opbra + 1);
922 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
923 break;
924 }
925 }
926
927 if (op->one_char == 0)
928 {
929 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
930 exit(usage(2));
931 }
932 }
933
934 /* One-char options; many that have no data may be in a single argument; we
935 continue till we hit the last one or one that needs data. */
936
937 else
938 {
939 char *s = argv[i] + 1;
940 longop = FALSE;
941 while (*s != 0)
942 {
943 for (op = optionlist; op->one_char != 0; op++)
944 { if (*s == op->one_char) break; }
945 if (op->one_char == 0)
946 {
947 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
948 *s, argv[i]);
949 exit(usage(2));
950 }
951 if (op->type != OP_NODATA || s[1] == 0)
952 {
953 option_data = s+1;
954 break;
955 }
956 options = handle_option(*s++, options);
957 }
958 }
959
960 /* At this point we should have op pointing to a matched option */
961
962 if (op->type == OP_NODATA)
963 options = handle_option(op->one_char, options);
964 else
965 {
966 if (*option_data == 0)
967 {
968 if (i >= argc - 1 || longopwasequals)
969 {
970 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
971 exit(usage(2));
972 }
973 option_data = argv[++i];
974 }
975
976 if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else
977 {
978 char *endptr;
979 int n = strtoul(option_data, &endptr, 10);
980 if (*endptr != 0)
981 {
982 if (longop)
983 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",
984 option_data, op->long_name);
985 else
986 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
987 option_data, op->one_char);
988 exit(usage(2));
989 }
990 *((int *)op->dataptr) = n;
991 }
992 }
993 }
994
995 /* Options have been decoded. If -C was used, its value is used as a default
996 for -A and -B. */
997
998 if (both_context > 0)
999 {
1000 if (after_context == 0) after_context = both_context;
1001 if (before_context == 0) before_context = both_context;
1002 }
1003
1004 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1005 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1006
1007 if (pattern_list == NULL || hints_list == NULL)
1008 {
1009 fprintf(stderr, "pcregrep: malloc failed\n");
1010 return 2;
1011 }
1012
1013 /* Compile the regular expression(s). */
1014
1015 if (pattern_filename != NULL)
1016 {
1017 FILE *f = fopen(pattern_filename, "r");
1018 char buffer[MBUFTHIRD + 16];
1019 char *rdstart;
1020 int adjust = 0;
1021
1022 if (f == NULL)
1023 {
1024 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1025 strerror(errno));
1026 return 2;
1027 }
1028
1029 if (whole_lines)
1030 {
1031 strcpy(buffer, "^(?:");
1032 adjust = 4;
1033 }
1034 else if (word_match)
1035 {
1036 strcpy(buffer, "\\b");
1037 adjust = 2;
1038 }
1039
1040 rdstart = buffer + adjust;
1041 while (fgets(rdstart, MBUFTHIRD, f) != NULL)
1042 {
1043 char *s = rdstart + (int)strlen(rdstart);
1044 if (pattern_count >= MAX_PATTERN_COUNT)
1045 {
1046 fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
1047 MAX_PATTERN_COUNT);
1048 return 2;
1049 }
1050 while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;
1051 if (s == rdstart) continue;
1052 if (whole_lines) strcpy(s, ")$");
1053 else if (word_match)strcpy(s, "\\b");
1054 else *s = 0;
1055 pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
1056 &errptr, NULL);
1057 if (pattern_list[pattern_count++] == NULL)
1058 {
1059 fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
1060 pattern_count, errptr - adjust, error);
1061 return 2;
1062 }
1063 }
1064 fclose(f);
1065 }
1066
1067 /* If no file name, a single regex must be given inline. */
1068
1069 else
1070 {
1071 char buffer[MBUFTHIRD + 16];
1072 char *pat;
1073 int adjust = 0;
1074
1075 if (i >= argc) return usage(2);
1076
1077 if (whole_lines)
1078 {
1079 sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);
1080 pat = buffer;
1081 adjust = 4;
1082 }
1083 else if (word_match)
1084 {
1085 sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);
1086 pat = buffer;
1087 adjust = 2;
1088 }
1089 else pat = argv[i++];
1090
1091 pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);
1092
1093 if (pattern_list[0] == NULL)
1094 {
1095 fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",
1096 errptr - adjust, error);
1097 return 2;
1098 }
1099 pattern_count++;
1100 }
1101
1102 /* Study the regular expressions, as we will be running them many times */
1103
1104 for (j = 0; j < pattern_count; j++)
1105 {
1106 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1107 if (error != NULL)
1108 {
1109 char s[16];
1110 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1111 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1112 return 2;
1113 }
1114 }
1115
1116 /* If there are include or exclude patterns, compile them. */
1117
1118 if (exclude_pattern != NULL)
1119 {
1120 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);
1121 if (exclude_compiled == NULL)
1122 {
1123 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1124 errptr, error);
1125 return 2;
1126 }
1127 }
1128
1129 if (include_pattern != NULL)
1130 {
1131 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);
1132 if (include_compiled == NULL)
1133 {
1134 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1135 errptr, error);
1136 return 2;
1137 }
1138 }
1139
1140 /* If there are no further arguments, do the business on stdin and exit */
1141
1142 if (i >= argc) return pcregrep(stdin,
1143 (filenames_only || filenames_nomatch_only)? stdin_name : NULL);
1144
1145 /* Otherwise, work through the remaining arguments as files or directories.
1146 Pass in the fact that there is only one argument at top level - this suppresses
1147 the file name if the argument is not a directory and filenames_only is not set.
1148 */
1149
1150 only_one_at_top = (i == argc - 1);
1151
1152 for (; i < argc; i++)
1153 {
1154 int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
1155 if (frc > 1) rc = frc;
1156 else if (frc == 0 && rc == 1) rc = 0;
1157 }
1158
1159 return rc;
1160 }
1161
1162 /* End of pcregrep */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12