/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 85 - (show annotations) (download)
Sat Feb 24 21:41:13 2007 UTC (7 years, 1 month ago) by nigel
File MIME type: text/plain
File size: 33516 byte(s)
Load pcre-6.4 into code/trunk.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2005 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #include <ctype.h>
41 #include <stdio.h>
42 #include <string.h>
43 #include <stdlib.h>
44 #include <errno.h>
45
46 #include <sys/types.h>
47 #include <sys/stat.h>
48 #include <unistd.h>
49
50 #include "config.h"
51 #include "pcre.h"
52
53 #define FALSE 0
54 #define TRUE 1
55
56 typedef int BOOL;
57
58 #define VERSION "4.1 05-Sep-2005"
59 #define MAX_PATTERN_COUNT 100
60
61 #if BUFSIZ > 8192
62 #define MBUFTHIRD BUFSIZ
63 #else
64 #define MBUFTHIRD 8192
65 #endif
66
67
68
69 /*************************************************
70 * Global variables *
71 *************************************************/
72
73 static char *pattern_filename = NULL;
74 static char *stdin_name = (char *)"(standard input)";
75 static int pattern_count = 0;
76 static pcre **pattern_list;
77 static pcre_extra **hints_list;
78
79 static char *include_pattern = NULL;
80 static char *exclude_pattern = NULL;
81
82 static pcre *include_compiled = NULL;
83 static pcre *exclude_compiled = NULL;
84
85 static int after_context = 0;
86 static int before_context = 0;
87 static int both_context = 0;
88
89 static BOOL count_only = FALSE;
90 static BOOL filenames = TRUE;
91 static BOOL filenames_only = FALSE;
92 static BOOL filenames_nomatch_only = FALSE;
93 static BOOL hyphenpending = FALSE;
94 static BOOL invert = FALSE;
95 static BOOL multiline = FALSE;
96 static BOOL number = FALSE;
97 static BOOL quiet = FALSE;
98 static BOOL recurse = FALSE;
99 static BOOL silent = FALSE;
100 static BOOL whole_lines = FALSE;
101 static BOOL word_match = FALSE;
102
103 /* Structure for options and list of them */
104
105 enum { OP_NODATA, OP_STRING, OP_NUMBER };
106
107 typedef struct option_item {
108 int type;
109 int one_char;
110 void *dataptr;
111 const char *long_name;
112 const char *help_text;
113 } option_item;
114
115 static option_item optionlist[] = {
116 { OP_NODATA, -1, NULL, "", " terminate options" },
117 { OP_NODATA, -1, NULL, "help", "display this help and exit" },
118 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
119 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
120 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
121 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
122 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
123 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
124 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
125 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
126 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
127 { OP_STRING, -1, &stdin_name, "label=name", "set name for standard input" },
128 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
129 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
130 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
131 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
132 { OP_STRING, -1, &exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
133 { OP_STRING, -1, &include_pattern, "include=pattern","include matching files when recursing" },
134 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
135 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
136 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
137 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
138 { OP_NODATA, 'w', NULL, "word-regex(p)", "force PATTERN to match only as a word" },
139 { OP_NODATA, 'x', NULL, "line-regex(p)", "force PATTERN to match only whole lines" },
140 { OP_NODATA, 0, NULL, NULL, NULL }
141 };
142
143
144 /*************************************************
145 * Functions for directory scanning *
146 *************************************************/
147
148 /* These functions are defined so that they can be made system specific,
149 although at present the only ones are for Unix, Win32, and for "no directory
150 recursion support". */
151
152
153 /************* Directory scanning in Unix ***********/
154
155 #if IS_UNIX
156 #include <sys/types.h>
157 #include <sys/stat.h>
158 #include <dirent.h>
159
160 typedef DIR directory_type;
161
162 static int
163 isdirectory(char *filename)
164 {
165 struct stat statbuf;
166 if (stat(filename, &statbuf) < 0)
167 return 0; /* In the expectation that opening as a file will fail */
168 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
169 }
170
171 static directory_type *
172 opendirectory(char *filename)
173 {
174 return opendir(filename);
175 }
176
177 static char *
178 readdirectory(directory_type *dir)
179 {
180 for (;;)
181 {
182 struct dirent *dent = readdir(dir);
183 if (dent == NULL) return NULL;
184 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
185 return dent->d_name;
186 }
187 return NULL; /* Keep compiler happy; never executed */
188 }
189
190 static void
191 closedirectory(directory_type *dir)
192 {
193 closedir(dir);
194 }
195
196
197 /************* Directory scanning in Win32 ***********/
198
199 /* I (Philip Hazel) have no means of testing this code. It was contributed by
200 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
201 when it did not exist. */
202
203
204 #elif HAVE_WIN32API
205
206 #ifndef STRICT
207 # define STRICT
208 #endif
209 #ifndef WIN32_LEAN_AND_MEAN
210 # define WIN32_LEAN_AND_MEAN
211 #endif
212 #ifndef INVALID_FILE_ATTRIBUTES
213 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
214 #endif
215
216 #include <windows.h>
217
218 typedef struct directory_type
219 {
220 HANDLE handle;
221 BOOL first;
222 WIN32_FIND_DATA data;
223 } directory_type;
224
225 int
226 isdirectory(char *filename)
227 {
228 DWORD attr = GetFileAttributes(filename);
229 if (attr == INVALID_FILE_ATTRIBUTES)
230 return 0;
231 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
232 }
233
234 directory_type *
235 opendirectory(char *filename)
236 {
237 size_t len;
238 char *pattern;
239 directory_type *dir;
240 DWORD err;
241 len = strlen(filename);
242 pattern = (char *) malloc(len + 3);
243 dir = (directory_type *) malloc(sizeof(*dir));
244 if ((pattern == NULL) || (dir == NULL))
245 {
246 fprintf(stderr, "pcregrep: malloc failed\n");
247 exit(2);
248 }
249 memcpy(pattern, filename, len);
250 memcpy(&(pattern[len]), "\\*", 3);
251 dir->handle = FindFirstFile(pattern, &(dir->data));
252 if (dir->handle != INVALID_HANDLE_VALUE)
253 {
254 free(pattern);
255 dir->first = TRUE;
256 return dir;
257 }
258 err = GetLastError();
259 free(pattern);
260 free(dir);
261 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
262 return NULL;
263 }
264
265 char *
266 readdirectory(directory_type *dir)
267 {
268 for (;;)
269 {
270 if (!dir->first)
271 {
272 if (!FindNextFile(dir->handle, &(dir->data)))
273 return NULL;
274 }
275 else
276 {
277 dir->first = FALSE;
278 }
279 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
280 return dir->data.cFileName;
281 }
282 #ifndef _MSC_VER
283 return NULL; /* Keep compiler happy; never executed */
284 #endif
285 }
286
287 void
288 closedirectory(directory_type *dir)
289 {
290 FindClose(dir->handle);
291 free(dir);
292 }
293
294
295 /************* Directory scanning when we can't do it ***********/
296
297 /* The type is void, and apart from isdirectory(), the functions do nothing. */
298
299 #else
300
301 typedef void directory_type;
302
303 int isdirectory(char *filename) { return FALSE; }
304 directory_type * opendirectory(char *filename) {}
305 char *readdirectory(directory_type *dir) {}
306 void closedirectory(directory_type *dir) {}
307
308 #endif
309
310
311
312 #if ! HAVE_STRERROR
313 /*************************************************
314 * Provide strerror() for non-ANSI libraries *
315 *************************************************/
316
317 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
318 in their libraries, but can provide the same facility by this simple
319 alternative function. */
320
321 extern int sys_nerr;
322 extern char *sys_errlist[];
323
324 char *
325 strerror(int n)
326 {
327 if (n < 0 || n >= sys_nerr) return "unknown error number";
328 return sys_errlist[n];
329 }
330 #endif /* HAVE_STRERROR */
331
332
333
334 /*************************************************
335 * Print the previous "after" lines *
336 *************************************************/
337
338 /* This is called if we are about to lose said lines because of buffer filling,
339 and at the end of the file.
340
341 Arguments:
342 lastmatchnumber the number of the last matching line, plus one
343 lastmatchrestart where we restarted after the last match
344 endptr end of available data
345 printname filename for printing
346
347 Returns: nothing
348 */
349
350 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
351 char *endptr, char *printname)
352 {
353 if (after_context > 0 && lastmatchnumber > 0)
354 {
355 int count = 0;
356 while (lastmatchrestart < endptr && count++ < after_context)
357 {
358 char *pp = lastmatchrestart;
359 if (printname != NULL) fprintf(stdout, "%s-", printname);
360 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
361 while (*pp != '\n') pp++;
362 fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
363 lastmatchrestart = pp + 1;
364 }
365 hyphenpending = TRUE;
366 }
367 }
368
369
370
371 /*************************************************
372 * Grep an individual file *
373 *************************************************/
374
375 /* This is called from grep_or_recurse() below. It uses a buffer that is three
376 times the value of MBUFTHIRD. The matching point is never allowed to stray into
377 the top third of the buffer, thus keeping more of the file available for
378 context printing or for multiline scanning. For large files, the pointer will
379 be in the middle third most of the time, so the bottom third is available for
380 "before" context printing.
381
382 Arguments:
383 in the fopened FILE stream
384 printname the file name if it is to be printed for each match
385 or NULL if the file name is not to be printed
386 it cannot be NULL if filenames[_nomatch]_only is set
387
388 Returns: 0 if there was at least one match
389 1 otherwise (no matches)
390 */
391
392 static int
393 pcregrep(FILE *in, char *printname)
394 {
395 int rc = 1;
396 int linenumber = 1;
397 int lastmatchnumber = 0;
398 int count = 0;
399 int offsets[99];
400 char *lastmatchrestart = NULL;
401 char buffer[3*MBUFTHIRD];
402 char *ptr = buffer;
403 char *endptr;
404 size_t bufflength;
405 BOOL endhyphenpending = FALSE;
406
407 /* Do the first read into the start of the buffer and set up the pointer to
408 end of what we have. */
409
410 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
411 endptr = buffer + bufflength;
412
413 /* Loop while the current pointer is not at the end of the file. For large
414 files, endptr will be at the end of the buffer when we are in the middle of the
415 file, but ptr will never get there, because as soon as it gets over 2/3 of the
416 way, the buffer is shifted left and re-filled. */
417
418 while (ptr < endptr)
419 {
420 int i;
421 BOOL match = FALSE;
422 char *t = ptr;
423 size_t length, linelength;
424
425 /* At this point, ptr is at the start of a line. We need to find the length
426 of the subject string to pass to pcre_exec(). In multiline mode, it is the
427 length remainder of the data in the buffer. Otherwise, it is the length of
428 the next line. After matching, we always advance by the length of the next
429 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
430 that any match is constrained to be in the first line. */
431
432 linelength = 0;
433 while (t < endptr && *t++ != '\n') linelength++;
434 length = multiline? endptr - ptr : linelength;
435
436 /* Run through all the patterns until one matches. Note that we don't include
437 the final newline in the subject string. */
438
439 for (i = 0; !match && i < pattern_count; i++)
440 {
441 match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
442 offsets, 99) >= 0;
443 }
444
445 /* If it's a match or a not-match (as required), print what's wanted. */
446
447 if (match != invert)
448 {
449 BOOL hyphenprinted = FALSE;
450
451 if (filenames_nomatch_only) return 1;
452
453 if (count_only) count++;
454
455 else if (filenames_only)
456 {
457 fprintf(stdout, "%s\n", printname);
458 return 0;
459 }
460
461 else if (quiet) return 0;
462
463 else
464 {
465 /* See if there is a requirement to print some "after" lines from a
466 previous match. We never print any overlaps. */
467
468 if (after_context > 0 && lastmatchnumber > 0)
469 {
470 int linecount = 0;
471 char *p = lastmatchrestart;
472
473 while (p < ptr && linecount < after_context)
474 {
475 while (*p != '\n') p++;
476 p++;
477 linecount++;
478 }
479
480 /* It is important to advance lastmatchrestart during this printing so
481 that it interacts correctly with any "before" printing below. */
482
483 while (lastmatchrestart < p)
484 {
485 char *pp = lastmatchrestart;
486 if (printname != NULL) fprintf(stdout, "%s-", printname);
487 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
488 while (*pp != '\n') pp++;
489 fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
490 lastmatchrestart = pp + 1;
491 }
492 if (lastmatchrestart != ptr) hyphenpending = TRUE;
493 }
494
495 /* If there were non-contiguous lines printed above, insert hyphens. */
496
497 if (hyphenpending)
498 {
499 fprintf(stdout, "--\n");
500 hyphenpending = FALSE;
501 hyphenprinted = TRUE;
502 }
503
504 /* See if there is a requirement to print some "before" lines for this
505 match. Again, don't print overlaps. */
506
507 if (before_context > 0)
508 {
509 int linecount = 0;
510 char *p = ptr;
511
512 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
513 linecount++ < before_context)
514 {
515 p--;
516 while (p > buffer && p[-1] != '\n') p--;
517 }
518
519 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
520 fprintf(stdout, "--\n");
521
522 while (p < ptr)
523 {
524 char *pp = p;
525 if (printname != NULL) fprintf(stdout, "%s-", printname);
526 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
527 while (*pp != '\n') pp++;
528 fprintf(stdout, "%.*s", pp - p + 1, p);
529 p = pp + 1;
530 }
531 }
532
533 /* Now print the matching line(s); ensure we set hyphenpending at the end
534 of the file if any context lines are being output. */
535
536 if (after_context > 0 || before_context > 0)
537 endhyphenpending = TRUE;
538
539 if (printname != NULL) fprintf(stdout, "%s:", printname);
540 if (number) fprintf(stdout, "%d:", linenumber);
541
542 /* In multiline mode, we want to print to the end of the line in which
543 the end of the matched string is found, so we adjust linelength and the
544 line number appropriately. Because the PCRE_FIRSTLINE option is set, the
545 start of the match will always be before the first \n character. */
546
547 if (multiline)
548 {
549 char *endmatch = ptr + offsets[1];
550 t = ptr;
551 while (t < endmatch) { if (*t++ == '\n') linenumber++; }
552 while (endmatch < endptr && *endmatch != '\n') endmatch++;
553 linelength = endmatch - ptr;
554 }
555
556 fprintf(stdout, "%.*s\n", linelength, ptr);
557 }
558
559 rc = 0; /* Had some success */
560
561 /* Remember where the last match happened for after_context. We remember
562 where we are about to restart, and that line's number. */
563
564 lastmatchrestart = ptr + linelength + 1;
565 lastmatchnumber = linenumber + 1;
566 }
567
568 /* Advance to after the newline and increment the line number. */
569
570 ptr += linelength + 1;
571 linenumber++;
572
573 /* If we haven't yet reached the end of the file (the buffer is full), and
574 the current point is in the top 1/3 of the buffer, slide the buffer down by
575 1/3 and refill it. Before we do this, if some unprinted "after" lines are
576 about to be lost, print them. */
577
578 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
579 {
580 if (after_context > 0 &&
581 lastmatchnumber > 0 &&
582 lastmatchrestart < buffer + MBUFTHIRD)
583 {
584 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
585 lastmatchnumber = 0;
586 }
587
588 /* Now do the shuffle */
589
590 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
591 ptr -= MBUFTHIRD;
592 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
593 endptr = buffer + bufflength;
594
595 /* Adjust any last match point */
596
597 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
598 }
599 } /* Loop through the whole file */
600
601 /* End of file; print final "after" lines if wanted; do_after_lines sets
602 hyphenpending if it prints something. */
603
604 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
605 hyphenpending |= endhyphenpending;
606
607 /* Print the file name if we are looking for those without matches and there
608 were none. If we found a match, we won't have got this far. */
609
610 if (filenames_nomatch_only)
611 {
612 fprintf(stdout, "%s\n", printname);
613 return 0;
614 }
615
616 /* Print the match count if wanted */
617
618 if (count_only)
619 {
620 if (printname != NULL) fprintf(stdout, "%s:", printname);
621 fprintf(stdout, "%d\n", count);
622 }
623
624 return rc;
625 }
626
627
628
629 /*************************************************
630 * Grep a file or recurse into a directory *
631 *************************************************/
632
633 /* Given a path name, if it's a directory, scan all the files if we are
634 recursing; if it's a file, grep it.
635
636 Arguments:
637 pathname the path to investigate
638 dir_recurse TRUE if recursing is wanted (-r)
639 show_filenames TRUE if file names are wanted for multiple files, except
640 for the only file at top level when not filenames_only
641 only_one_at_top TRUE if the path is the only one at toplevel
642
643 Returns: 0 if there was at least one match
644 1 if there were no matches
645 2 there was some kind of error
646
647 However, file opening failures are suppressed if "silent" is set.
648 */
649
650 static int
651 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,
652 BOOL only_one_at_top)
653 {
654 int rc = 1;
655 int sep;
656 FILE *in;
657 char *printname;
658
659 /* If the file name is "-" we scan stdin */
660
661 if (strcmp(pathname, "-") == 0)
662 {
663 return pcregrep(stdin,
664 (filenames_only || filenames_nomatch_only ||
665 (show_filenames && !only_one_at_top))?
666 stdin_name : NULL);
667 }
668
669 /* If the file is a directory and we are recursing, scan each file within it,
670 subject to any include or exclude patterns that were set. The scanning code is
671 localized so it can be made system-specific. */
672
673 if ((sep = isdirectory(pathname)) != 0 && dir_recurse)
674 {
675 char buffer[1024];
676 char *nextfile;
677 directory_type *dir = opendirectory(pathname);
678
679 if (dir == NULL)
680 {
681 if (!silent)
682 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
683 strerror(errno));
684 return 2;
685 }
686
687 while ((nextfile = readdirectory(dir)) != NULL)
688 {
689 int frc, blen;
690 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
691 blen = strlen(buffer);
692
693 if (exclude_compiled != NULL &&
694 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
695 continue;
696
697 if (include_compiled != NULL &&
698 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
699 continue;
700
701 frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);
702 if (frc > 1) rc = frc;
703 else if (frc == 0 && rc == 1) rc = 0;
704 }
705
706 closedirectory(dir);
707 return rc;
708 }
709
710 /* If the file is not a directory, or we are not recursing, scan it. If this is
711 the first and only argument at top level, we don't show the file name (unless
712 we are only showing the file name). Otherwise, control is via the
713 show_filenames variable. */
714
715 in = fopen(pathname, "r");
716 if (in == NULL)
717 {
718 if (!silent)
719 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
720 strerror(errno));
721 return 2;
722 }
723
724 printname = (filenames_only || filenames_nomatch_only ||
725 (show_filenames && !only_one_at_top))? pathname : NULL;
726
727 rc = pcregrep(in, printname);
728
729 fclose(in);
730 return rc;
731 }
732
733
734
735
736 /*************************************************
737 * Usage function *
738 *************************************************/
739
740 static int
741 usage(int rc)
742 {
743 fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");
744 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
745 return rc;
746 }
747
748
749
750
751 /*************************************************
752 * Help function *
753 *************************************************/
754
755 static void
756 help(void)
757 {
758 option_item *op;
759
760 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
761 printf("Search for PATTERN in each FILE or standard input.\n");
762 printf("PATTERN must be present if -f is not used.\n");
763 printf("\"-\" can be used as a file name to mean STDIN.\n");
764 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
765
766 printf("Options:\n");
767
768 for (op = optionlist; op->one_char != 0; op++)
769 {
770 int n;
771 char s[4];
772 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
773 printf(" %s --%s%n", s, op->long_name, &n);
774 n = 30 - n;
775 if (n < 1) n = 1;
776 printf("%.*s%s\n", n, " ", op->help_text);
777 }
778
779 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
780 printf("trailing white space is removed and blank lines are ignored.\n");
781 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
782
783 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
784 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
785 }
786
787
788
789
790 /*************************************************
791 * Handle a single-letter, no data option *
792 *************************************************/
793
794 static int
795 handle_option(int letter, int options)
796 {
797 switch(letter)
798 {
799 case -1: help(); exit(0);
800 case 'c': count_only = TRUE; break;
801 case 'h': filenames = FALSE; break;
802 case 'i': options |= PCRE_CASELESS; break;
803 case 'l': filenames_only = TRUE; break;
804 case 'L': filenames_nomatch_only = TRUE; break;
805 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
806 case 'n': number = TRUE; break;
807 case 'q': quiet = TRUE; break;
808 case 'r': recurse = TRUE; break;
809 case 's': silent = TRUE; break;
810 case 'u': options |= PCRE_UTF8; break;
811 case 'v': invert = TRUE; break;
812 case 'w': word_match = TRUE; break;
813 case 'x': whole_lines = TRUE; break;
814
815 case 'V':
816 fprintf(stderr, "pcregrep version %s using ", VERSION);
817 fprintf(stderr, "PCRE version %s\n", pcre_version());
818 exit(0);
819 break;
820
821 default:
822 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
823 exit(usage(2));
824 }
825
826 return options;
827 }
828
829
830
831
832 /*************************************************
833 * Main program *
834 *************************************************/
835
836 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
837
838 int
839 main(int argc, char **argv)
840 {
841 int i, j;
842 int rc = 1;
843 int options = 0;
844 int errptr;
845 const char *error;
846 BOOL only_one_at_top;
847
848 /* Process the options */
849
850 for (i = 1; i < argc; i++)
851 {
852 option_item *op = NULL;
853 char *option_data = (char *)""; /* default to keep compiler happy */
854 BOOL longop;
855 BOOL longopwasequals = FALSE;
856
857 if (argv[i][0] != '-') break;
858
859 /* If we hit an argument that is just "-", it may be a reference to STDIN,
860 but only if we have previously had -f to define the patterns. */
861
862 if (argv[i][1] == 0)
863 {
864 if (pattern_filename != NULL) break;
865 else exit(usage(2));
866 }
867
868 /* Handle a long name option, or -- to terminate the options */
869
870 if (argv[i][1] == '-')
871 {
872 char *arg = argv[i] + 2;
873 char *argequals = strchr(arg, '=');
874
875 if (*arg == 0) /* -- terminates options */
876 {
877 i++;
878 break; /* out of the options-handling loop */
879 }
880
881 longop = TRUE;
882
883 /* Some long options have data that follows after =, for example file=name.
884 Some options have variations in the long name spelling: specifically, we
885 allow "regexp" because GNU grep allows it, though I personally go along
886 with Jeff Friedl in preferring "regex" without the "p". These options are
887 entered in the table as "regex(p)". No option is in both these categories,
888 fortunately. */
889
890 for (op = optionlist; op->one_char != 0; op++)
891 {
892 char *opbra = strchr(op->long_name, '(');
893 char *equals = strchr(op->long_name, '=');
894 if (opbra == NULL) /* Not a (p) case */
895 {
896 if (equals == NULL) /* Not thing=data case */
897 {
898 if (strcmp(arg, op->long_name) == 0) break;
899 }
900 else /* Special case xxx=data */
901 {
902 int oplen = equals - op->long_name;
903 int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
904 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
905 {
906 option_data = arg + arglen;
907 if (*option_data == '=')
908 {
909 option_data++;
910 longopwasequals = TRUE;
911 }
912 break;
913 }
914 }
915 }
916 else /* Special case xxxx(p) */
917 {
918 char buff1[24];
919 char buff2[24];
920 int baselen = opbra - op->long_name;
921 sprintf(buff1, "%.*s", baselen, op->long_name);
922 sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
923 opbra + 1);
924 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
925 break;
926 }
927 }
928
929 if (op->one_char == 0)
930 {
931 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
932 exit(usage(2));
933 }
934 }
935
936 /* One-char options; many that have no data may be in a single argument; we
937 continue till we hit the last one or one that needs data. */
938
939 else
940 {
941 char *s = argv[i] + 1;
942 longop = FALSE;
943 while (*s != 0)
944 {
945 for (op = optionlist; op->one_char != 0; op++)
946 { if (*s == op->one_char) break; }
947 if (op->one_char == 0)
948 {
949 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
950 *s, argv[i]);
951 exit(usage(2));
952 }
953 if (op->type != OP_NODATA || s[1] == 0)
954 {
955 option_data = s+1;
956 break;
957 }
958 options = handle_option(*s++, options);
959 }
960 }
961
962 /* At this point we should have op pointing to a matched option */
963
964 if (op->type == OP_NODATA)
965 options = handle_option(op->one_char, options);
966 else
967 {
968 if (*option_data == 0)
969 {
970 if (i >= argc - 1 || longopwasequals)
971 {
972 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
973 exit(usage(2));
974 }
975 option_data = argv[++i];
976 }
977
978 if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else
979 {
980 char *endptr;
981 int n = strtoul(option_data, &endptr, 10);
982 if (*endptr != 0)
983 {
984 if (longop)
985 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",
986 option_data, op->long_name);
987 else
988 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
989 option_data, op->one_char);
990 exit(usage(2));
991 }
992 *((int *)op->dataptr) = n;
993 }
994 }
995 }
996
997 /* Options have been decoded. If -C was used, its value is used as a default
998 for -A and -B. */
999
1000 if (both_context > 0)
1001 {
1002 if (after_context == 0) after_context = both_context;
1003 if (before_context == 0) before_context = both_context;
1004 }
1005
1006 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1007 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1008
1009 if (pattern_list == NULL || hints_list == NULL)
1010 {
1011 fprintf(stderr, "pcregrep: malloc failed\n");
1012 return 2;
1013 }
1014
1015 /* Compile the regular expression(s). */
1016
1017 if (pattern_filename != NULL)
1018 {
1019 FILE *f = fopen(pattern_filename, "r");
1020 char buffer[MBUFTHIRD + 16];
1021 char *rdstart;
1022 int adjust = 0;
1023
1024 if (f == NULL)
1025 {
1026 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1027 strerror(errno));
1028 return 2;
1029 }
1030
1031 if (whole_lines)
1032 {
1033 strcpy(buffer, "^(?:");
1034 adjust = 4;
1035 }
1036 else if (word_match)
1037 {
1038 strcpy(buffer, "\\b");
1039 adjust = 2;
1040 }
1041
1042 rdstart = buffer + adjust;
1043 while (fgets(rdstart, MBUFTHIRD, f) != NULL)
1044 {
1045 char *s = rdstart + (int)strlen(rdstart);
1046 if (pattern_count >= MAX_PATTERN_COUNT)
1047 {
1048 fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
1049 MAX_PATTERN_COUNT);
1050 return 2;
1051 }
1052 while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;
1053 if (s == rdstart) continue;
1054 if (whole_lines) strcpy(s, ")$");
1055 else if (word_match)strcpy(s, "\\b");
1056 else *s = 0;
1057 pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
1058 &errptr, NULL);
1059 if (pattern_list[pattern_count++] == NULL)
1060 {
1061 fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
1062 pattern_count, errptr - adjust, error);
1063 return 2;
1064 }
1065 }
1066 fclose(f);
1067 }
1068
1069 /* If no file name, a single regex must be given inline. */
1070
1071 else
1072 {
1073 char buffer[MBUFTHIRD + 16];
1074 char *pat;
1075 int adjust = 0;
1076
1077 if (i >= argc) return usage(2);
1078
1079 if (whole_lines)
1080 {
1081 sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);
1082 pat = buffer;
1083 adjust = 4;
1084 }
1085 else if (word_match)
1086 {
1087 sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);
1088 pat = buffer;
1089 adjust = 2;
1090 }
1091 else pat = argv[i++];
1092
1093 pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);
1094
1095 if (pattern_list[0] == NULL)
1096 {
1097 fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",
1098 errptr - adjust, error);
1099 return 2;
1100 }
1101 pattern_count++;
1102 }
1103
1104 /* Study the regular expressions, as we will be running them many times */
1105
1106 for (j = 0; j < pattern_count; j++)
1107 {
1108 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1109 if (error != NULL)
1110 {
1111 char s[16];
1112 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1113 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1114 return 2;
1115 }
1116 }
1117
1118 /* If there are include or exclude patterns, compile them. */
1119
1120 if (exclude_pattern != NULL)
1121 {
1122 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);
1123 if (exclude_compiled == NULL)
1124 {
1125 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1126 errptr, error);
1127 return 2;
1128 }
1129 }
1130
1131 if (include_pattern != NULL)
1132 {
1133 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);
1134 if (include_compiled == NULL)
1135 {
1136 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1137 errptr, error);
1138 return 2;
1139 }
1140 }
1141
1142 /* If there are no further arguments, do the business on stdin and exit */
1143
1144 if (i >= argc) return pcregrep(stdin,
1145 (filenames_only || filenames_nomatch_only)? stdin_name : NULL);
1146
1147 /* Otherwise, work through the remaining arguments as files or directories.
1148 Pass in the fact that there is only one argument at top level - this suppresses
1149 the file name if the argument is not a directory and filenames_only is not set.
1150 */
1151
1152 only_one_at_top = (i == argc - 1);
1153
1154 for (; i < argc; i++)
1155 {
1156 int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
1157 if (frc > 1) rc = frc;
1158 else if (frc == 0 && rc == 1) rc = 0;
1159 }
1160
1161 return rc;
1162 }
1163
1164 /* End of pcregrep */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12