/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 199 - (show annotations) (download)
Tue Jul 31 14:39:09 2007 UTC (6 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 58796 byte(s)
Daniel's patch for config.h and Windows DLL declarations (not fully working).

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include <config.h>
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #include <pcre.h>
59
60 #define FALSE 0
61 #define TRUE 1
62
63 typedef int BOOL;
64
65 #define MAX_PATTERN_COUNT 100
66
67 #if BUFSIZ > 8192
68 #define MBUFTHIRD BUFSIZ
69 #else
70 #define MBUFTHIRD 8192
71 #endif
72
73 /* Values for the "filenames" variable, which specifies options for file name
74 output. The order is important; it is assumed that a file name is wanted for
75 all values greater than FN_DEFAULT. */
76
77 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78
79 /* Actions for the -d and -D options */
80
81 enum { dee_READ, dee_SKIP, dee_RECURSE };
82 enum { DEE_READ, DEE_SKIP };
83
84 /* Actions for special processing options (flag bits) */
85
86 #define PO_WORD_MATCH 0x0001
87 #define PO_LINE_MATCH 0x0002
88 #define PO_FIXED_STRINGS 0x0004
89
90 /* Line ending types */
91
92 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93
94
95
96 /*************************************************
97 * Global variables *
98 *************************************************/
99
100 /* Jeffrey Friedl has some debugging requirements that are not part of the
101 regular code. */
102
103 #ifdef JFRIEDL_DEBUG
104 static int S_arg = -1;
105 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107 static const char *jfriedl_prefix = "";
108 static const char *jfriedl_postfix = "";
109 #endif
110
111 static int endlinetype;
112
113 static char *colour_string = (char *)"1;31";
114 static char *colour_option = NULL;
115 static char *dee_option = NULL;
116 static char *DEE_option = NULL;
117 static char *newline = NULL;
118 static char *pattern_filename = NULL;
119 static char *stdin_name = (char *)"(standard input)";
120 static char *locale = NULL;
121
122 static const unsigned char *pcretables = NULL;
123
124 static int pattern_count = 0;
125 static pcre **pattern_list = NULL;
126 static pcre_extra **hints_list = NULL;
127
128 static char *include_pattern = NULL;
129 static char *exclude_pattern = NULL;
130
131 static pcre *include_compiled = NULL;
132 static pcre *exclude_compiled = NULL;
133
134 static int after_context = 0;
135 static int before_context = 0;
136 static int both_context = 0;
137 static int dee_action = dee_READ;
138 static int DEE_action = DEE_READ;
139 static int error_count = 0;
140 static int filenames = FN_DEFAULT;
141 static int process_options = 0;
142
143 static BOOL count_only = FALSE;
144 static BOOL do_colour = FALSE;
145 static BOOL hyphenpending = FALSE;
146 static BOOL invert = FALSE;
147 static BOOL multiline = FALSE;
148 static BOOL number = FALSE;
149 static BOOL only_matching = FALSE;
150 static BOOL quiet = FALSE;
151 static BOOL silent = FALSE;
152 static BOOL utf8 = FALSE;
153
154 /* Structure for options and list of them */
155
156 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
157 OP_PATLIST };
158
159 typedef struct option_item {
160 int type;
161 int one_char;
162 void *dataptr;
163 const char *long_name;
164 const char *help_text;
165 } option_item;
166
167 /* Options without a single-letter equivalent get a negative value. This can be
168 used to identify them. */
169
170 #define N_COLOUR (-1)
171 #define N_EXCLUDE (-2)
172 #define N_HELP (-3)
173 #define N_INCLUDE (-4)
174 #define N_LABEL (-5)
175 #define N_LOCALE (-6)
176 #define N_NULL (-7)
177
178 static option_item optionlist[] = {
179 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
180 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
181 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
182 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
183 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
184 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
185 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
186 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
187 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
188 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
189 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
190 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
191 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
192 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
193 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
194 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
195 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
196 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
197 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
198 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
199 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
200 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
202 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
203 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
204 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
205 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
206 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
207 #ifdef JFRIEDL_DEBUG
208 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
209 #endif
210 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
211 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
212 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
213 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
214 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
215 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
216 { OP_NODATA, 0, NULL, NULL, NULL }
217 };
218
219 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
220 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
221 that the combination of -w and -x has the same effect as -x on its own, so we
222 can treat them as the same. */
223
224 static const char *prefix[] = {
225 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
226
227 static const char *suffix[] = {
228 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
229
230 /* UTF-8 tables - used only when the newline setting is "any". */
231
232 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233
234 const char utf8_table4[] = {
235 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
237 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
238 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
239
240
241
242 /*************************************************
243 * OS-specific functions *
244 *************************************************/
245
246 /* These functions are defined so that they can be made system specific,
247 although at present the only ones are for Unix, Win32, and for "no support". */
248
249
250 /************* Directory scanning in Unix ***********/
251
252 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253 #include <sys/types.h>
254 #include <sys/stat.h>
255 #include <dirent.h>
256
257 typedef DIR directory_type;
258
259 static int
260 isdirectory(char *filename)
261 {
262 struct stat statbuf;
263 if (stat(filename, &statbuf) < 0)
264 return 0; /* In the expectation that opening as a file will fail */
265 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
266 }
267
268 static directory_type *
269 opendirectory(char *filename)
270 {
271 return opendir(filename);
272 }
273
274 static char *
275 readdirectory(directory_type *dir)
276 {
277 for (;;)
278 {
279 struct dirent *dent = readdir(dir);
280 if (dent == NULL) return NULL;
281 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282 return dent->d_name;
283 }
284 /* Control never reaches here */
285 }
286
287 static void
288 closedirectory(directory_type *dir)
289 {
290 closedir(dir);
291 }
292
293
294 /************* Test for regular file in Unix **********/
295
296 static int
297 isregfile(char *filename)
298 {
299 struct stat statbuf;
300 if (stat(filename, &statbuf) < 0)
301 return 1; /* In the expectation that opening as a file will fail */
302 return (statbuf.st_mode & S_IFMT) == S_IFREG;
303 }
304
305
306 /************* Test stdout for being a terminal in Unix **********/
307
308 static BOOL
309 is_stdout_tty(void)
310 {
311 return isatty(fileno(stdout));
312 }
313
314
315 /************* Directory scanning in Win32 ***********/
316
317 /* I (Philip Hazel) have no means of testing this code. It was contributed by
318 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
319 when it did not exist. */
320
321
322 #elif HAVE_WINDOWS_H
323
324 #ifndef STRICT
325 # define STRICT
326 #endif
327 #ifndef WIN32_LEAN_AND_MEAN
328 # define WIN32_LEAN_AND_MEAN
329 #endif
330 #ifndef INVALID_FILE_ATTRIBUTES
331 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
332 #endif
333
334 #include <windows.h>
335
336 typedef struct directory_type
337 {
338 HANDLE handle;
339 BOOL first;
340 WIN32_FIND_DATA data;
341 } directory_type;
342
343 int
344 isdirectory(char *filename)
345 {
346 DWORD attr = GetFileAttributes(filename);
347 if (attr == INVALID_FILE_ATTRIBUTES)
348 return 0;
349 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
350 }
351
352 directory_type *
353 opendirectory(char *filename)
354 {
355 size_t len;
356 char *pattern;
357 directory_type *dir;
358 DWORD err;
359 len = strlen(filename);
360 pattern = (char *) malloc(len + 3);
361 dir = (directory_type *) malloc(sizeof(*dir));
362 if ((pattern == NULL) || (dir == NULL))
363 {
364 fprintf(stderr, "pcregrep: malloc failed\n");
365 exit(2);
366 }
367 memcpy(pattern, filename, len);
368 memcpy(&(pattern[len]), "\\*", 3);
369 dir->handle = FindFirstFile(pattern, &(dir->data));
370 if (dir->handle != INVALID_HANDLE_VALUE)
371 {
372 free(pattern);
373 dir->first = TRUE;
374 return dir;
375 }
376 err = GetLastError();
377 free(pattern);
378 free(dir);
379 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
380 return NULL;
381 }
382
383 char *
384 readdirectory(directory_type *dir)
385 {
386 for (;;)
387 {
388 if (!dir->first)
389 {
390 if (!FindNextFile(dir->handle, &(dir->data)))
391 return NULL;
392 }
393 else
394 {
395 dir->first = FALSE;
396 }
397 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
398 return dir->data.cFileName;
399 }
400 #ifndef _MSC_VER
401 return NULL; /* Keep compiler happy; never executed */
402 #endif
403 }
404
405 void
406 closedirectory(directory_type *dir)
407 {
408 FindClose(dir->handle);
409 free(dir);
410 }
411
412
413 /************* Test for regular file in Win32 **********/
414
415 /* I don't know how to do this, or if it can be done; assume all paths are
416 regular if they are not directories. */
417
418 int isregfile(char *filename)
419 {
420 return !isdirectory(filename)
421 }
422
423
424 /************* Test stdout for being a terminal in Win32 **********/
425
426 /* I don't know how to do this; assume never */
427
428 static BOOL
429 is_stdout_tty(void)
430 {
431 FALSE;
432 }
433
434
435 /************* Directory scanning when we can't do it ***********/
436
437 /* The type is void, and apart from isdirectory(), the functions do nothing. */
438
439 #else
440
441 typedef void directory_type;
442
443 int isdirectory(char *filename) { return 0; }
444 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445 char *readdirectory(directory_type *dir) { return (char*)0;}
446 void closedirectory(directory_type *dir) {}
447
448
449 /************* Test for regular when we can't do it **********/
450
451 /* Assume all files are regular. */
452
453 int isregfile(char *filename) { return 1; }
454
455
456 /************* Test stdout for being a terminal when we can't do it **********/
457
458 static BOOL
459 is_stdout_tty(void)
460 {
461 return FALSE;
462 }
463
464
465 #endif
466
467
468
469 #ifndef HAVE_STRERROR
470 /*************************************************
471 * Provide strerror() for non-ANSI libraries *
472 *************************************************/
473
474 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
475 in their libraries, but can provide the same facility by this simple
476 alternative function. */
477
478 extern int sys_nerr;
479 extern char *sys_errlist[];
480
481 char *
482 strerror(int n)
483 {
484 if (n < 0 || n >= sys_nerr) return "unknown error number";
485 return sys_errlist[n];
486 }
487 #endif /* HAVE_STRERROR */
488
489
490
491 /*************************************************
492 * Find end of line *
493 *************************************************/
494
495 /* The length of the endline sequence that is found is set via lenptr. This may
496 be zero at the very end of the file if there is no line-ending sequence there.
497
498 Arguments:
499 p current position in line
500 endptr end of available data
501 lenptr where to put the length of the eol sequence
502
503 Returns: pointer to the last byte of the line
504 */
505
506 static char *
507 end_of_line(char *p, char *endptr, int *lenptr)
508 {
509 switch(endlinetype)
510 {
511 default: /* Just in case */
512 case EL_LF:
513 while (p < endptr && *p != '\n') p++;
514 if (p < endptr)
515 {
516 *lenptr = 1;
517 return p + 1;
518 }
519 *lenptr = 0;
520 return endptr;
521
522 case EL_CR:
523 while (p < endptr && *p != '\r') p++;
524 if (p < endptr)
525 {
526 *lenptr = 1;
527 return p + 1;
528 }
529 *lenptr = 0;
530 return endptr;
531
532 case EL_CRLF:
533 for (;;)
534 {
535 while (p < endptr && *p != '\r') p++;
536 if (++p >= endptr)
537 {
538 *lenptr = 0;
539 return endptr;
540 }
541 if (*p == '\n')
542 {
543 *lenptr = 2;
544 return p + 1;
545 }
546 }
547 break;
548
549 case EL_ANYCRLF:
550 while (p < endptr)
551 {
552 int extra = 0;
553 register int c = *((unsigned char *)p);
554
555 if (utf8 && c >= 0xc0)
556 {
557 int gcii, gcss;
558 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
559 gcss = 6*extra;
560 c = (c & utf8_table3[extra]) << gcss;
561 for (gcii = 1; gcii <= extra; gcii++)
562 {
563 gcss -= 6;
564 c |= (p[gcii] & 0x3f) << gcss;
565 }
566 }
567
568 p += 1 + extra;
569
570 switch (c)
571 {
572 case 0x0a: /* LF */
573 *lenptr = 1;
574 return p;
575
576 case 0x0d: /* CR */
577 if (p < endptr && *p == 0x0a)
578 {
579 *lenptr = 2;
580 p++;
581 }
582 else *lenptr = 1;
583 return p;
584
585 default:
586 break;
587 }
588 } /* End of loop for ANYCRLF case */
589
590 *lenptr = 0; /* Must have hit the end */
591 return endptr;
592
593 case EL_ANY:
594 while (p < endptr)
595 {
596 int extra = 0;
597 register int c = *((unsigned char *)p);
598
599 if (utf8 && c >= 0xc0)
600 {
601 int gcii, gcss;
602 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
603 gcss = 6*extra;
604 c = (c & utf8_table3[extra]) << gcss;
605 for (gcii = 1; gcii <= extra; gcii++)
606 {
607 gcss -= 6;
608 c |= (p[gcii] & 0x3f) << gcss;
609 }
610 }
611
612 p += 1 + extra;
613
614 switch (c)
615 {
616 case 0x0a: /* LF */
617 case 0x0b: /* VT */
618 case 0x0c: /* FF */
619 *lenptr = 1;
620 return p;
621
622 case 0x0d: /* CR */
623 if (p < endptr && *p == 0x0a)
624 {
625 *lenptr = 2;
626 p++;
627 }
628 else *lenptr = 1;
629 return p;
630
631 case 0x85: /* NEL */
632 *lenptr = utf8? 2 : 1;
633 return p;
634
635 case 0x2028: /* LS */
636 case 0x2029: /* PS */
637 *lenptr = 3;
638 return p;
639
640 default:
641 break;
642 }
643 } /* End of loop for ANY case */
644
645 *lenptr = 0; /* Must have hit the end */
646 return endptr;
647 } /* End of overall switch */
648 }
649
650
651
652 /*************************************************
653 * Find start of previous line *
654 *************************************************/
655
656 /* This is called when looking back for before lines to print.
657
658 Arguments:
659 p start of the subsequent line
660 startptr start of available data
661
662 Returns: pointer to the start of the previous line
663 */
664
665 static char *
666 previous_line(char *p, char *startptr)
667 {
668 switch(endlinetype)
669 {
670 default: /* Just in case */
671 case EL_LF:
672 p--;
673 while (p > startptr && p[-1] != '\n') p--;
674 return p;
675
676 case EL_CR:
677 p--;
678 while (p > startptr && p[-1] != '\n') p--;
679 return p;
680
681 case EL_CRLF:
682 for (;;)
683 {
684 p -= 2;
685 while (p > startptr && p[-1] != '\n') p--;
686 if (p <= startptr + 1 || p[-2] == '\r') return p;
687 }
688 return p; /* But control should never get here */
689
690 case EL_ANY:
691 case EL_ANYCRLF:
692 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693 if (utf8) while ((*p & 0xc0) == 0x80) p--;
694
695 while (p > startptr)
696 {
697 register int c;
698 char *pp = p - 1;
699
700 if (utf8)
701 {
702 int extra = 0;
703 while ((*pp & 0xc0) == 0x80) pp--;
704 c = *((unsigned char *)pp);
705 if (c >= 0xc0)
706 {
707 int gcii, gcss;
708 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
709 gcss = 6*extra;
710 c = (c & utf8_table3[extra]) << gcss;
711 for (gcii = 1; gcii <= extra; gcii++)
712 {
713 gcss -= 6;
714 c |= (pp[gcii] & 0x3f) << gcss;
715 }
716 }
717 }
718 else c = *((unsigned char *)pp);
719
720 if (endlinetype == EL_ANYCRLF) switch (c)
721 {
722 case 0x0a: /* LF */
723 case 0x0d: /* CR */
724 return p;
725
726 default:
727 break;
728 }
729
730 else switch (c)
731 {
732 case 0x0a: /* LF */
733 case 0x0b: /* VT */
734 case 0x0c: /* FF */
735 case 0x0d: /* CR */
736 case 0x85: /* NEL */
737 case 0x2028: /* LS */
738 case 0x2029: /* PS */
739 return p;
740
741 default:
742 break;
743 }
744
745 p = pp; /* Back one character */
746 } /* End of loop for ANY case */
747
748 return startptr; /* Hit start of data */
749 } /* End of overall switch */
750 }
751
752
753
754
755
756 /*************************************************
757 * Print the previous "after" lines *
758 *************************************************/
759
760 /* This is called if we are about to lose said lines because of buffer filling,
761 and at the end of the file. The data in the line is written using fwrite() so
762 that a binary zero does not terminate it.
763
764 Arguments:
765 lastmatchnumber the number of the last matching line, plus one
766 lastmatchrestart where we restarted after the last match
767 endptr end of available data
768 printname filename for printing
769
770 Returns: nothing
771 */
772
773 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
774 char *endptr, char *printname)
775 {
776 if (after_context > 0 && lastmatchnumber > 0)
777 {
778 int count = 0;
779 while (lastmatchrestart < endptr && count++ < after_context)
780 {
781 int ellength;
782 char *pp = lastmatchrestart;
783 if (printname != NULL) fprintf(stdout, "%s-", printname);
784 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
785 pp = end_of_line(pp, endptr, &ellength);
786 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
787 lastmatchrestart = pp;
788 }
789 hyphenpending = TRUE;
790 }
791 }
792
793
794
795 /*************************************************
796 * Grep an individual file *
797 *************************************************/
798
799 /* This is called from grep_or_recurse() below. It uses a buffer that is three
800 times the value of MBUFTHIRD. The matching point is never allowed to stray into
801 the top third of the buffer, thus keeping more of the file available for
802 context printing or for multiline scanning. For large files, the pointer will
803 be in the middle third most of the time, so the bottom third is available for
804 "before" context printing.
805
806 Arguments:
807 in the fopened FILE stream
808 printname the file name if it is to be printed for each match
809 or NULL if the file name is not to be printed
810 it cannot be NULL if filenames[_nomatch]_only is set
811
812 Returns: 0 if there was at least one match
813 1 otherwise (no matches)
814 */
815
816 static int
817 pcregrep(FILE *in, char *printname)
818 {
819 int rc = 1;
820 int linenumber = 1;
821 int lastmatchnumber = 0;
822 int count = 0;
823 int offsets[99];
824 char *lastmatchrestart = NULL;
825 char buffer[3*MBUFTHIRD];
826 char *ptr = buffer;
827 char *endptr;
828 size_t bufflength;
829 BOOL endhyphenpending = FALSE;
830
831 /* Do the first read into the start of the buffer and set up the pointer to
832 end of what we have. */
833
834 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
835 endptr = buffer + bufflength;
836
837 /* Loop while the current pointer is not at the end of the file. For large
838 files, endptr will be at the end of the buffer when we are in the middle of the
839 file, but ptr will never get there, because as soon as it gets over 2/3 of the
840 way, the buffer is shifted left and re-filled. */
841
842 while (ptr < endptr)
843 {
844 int i, endlinelength;
845 int mrc = 0;
846 BOOL match = FALSE;
847 char *t = ptr;
848 size_t length, linelength;
849
850 /* At this point, ptr is at the start of a line. We need to find the length
851 of the subject string to pass to pcre_exec(). In multiline mode, it is the
852 length remainder of the data in the buffer. Otherwise, it is the length of
853 the next line. After matching, we always advance by the length of the next
854 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
855 that any match is constrained to be in the first line. */
856
857 t = end_of_line(t, endptr, &endlinelength);
858 linelength = t - ptr - endlinelength;
859 length = multiline? (size_t)(endptr - ptr) : linelength;
860
861 /* Extra processing for Jeffrey Friedl's debugging. */
862
863 #ifdef JFRIEDL_DEBUG
864 if (jfriedl_XT || jfriedl_XR)
865 {
866 #include <sys/time.h>
867 #include <time.h>
868 struct timeval start_time, end_time;
869 struct timezone dummy;
870
871 if (jfriedl_XT)
872 {
873 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
874 const char *orig = ptr;
875 ptr = malloc(newlen + 1);
876 if (!ptr) {
877 printf("out of memory");
878 exit(2);
879 }
880 endptr = ptr;
881 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
882 for (i = 0; i < jfriedl_XT; i++) {
883 strncpy(endptr, orig, length);
884 endptr += length;
885 }
886 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
887 length = newlen;
888 }
889
890 if (gettimeofday(&start_time, &dummy) != 0)
891 perror("bad gettimeofday");
892
893
894 for (i = 0; i < jfriedl_XR; i++)
895 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
896
897 if (gettimeofday(&end_time, &dummy) != 0)
898 perror("bad gettimeofday");
899
900 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
901 -
902 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
903
904 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
905 return 0;
906 }
907 #endif
908
909
910 /* Run through all the patterns until one matches. Note that we don't include
911 the final newline in the subject string. */
912
913 for (i = 0; i < pattern_count; i++)
914 {
915 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
916 offsets, 99);
917 if (mrc >= 0) { match = TRUE; break; }
918 if (mrc != PCRE_ERROR_NOMATCH)
919 {
920 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
921 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
922 fprintf(stderr, "this line:\n");
923 fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
924 fprintf(stderr, "\n");
925 if (error_count == 0 &&
926 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
927 {
928 fprintf(stderr, "pcregrep: error %d means that a resource limit "
929 "was exceeded\n", mrc);
930 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
931 }
932 if (error_count++ > 20)
933 {
934 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
935 exit(2);
936 }
937 match = invert; /* No more matching; don't show the line again */
938 break;
939 }
940 }
941
942 /* If it's a match or a not-match (as required), do what's wanted. */
943
944 if (match != invert)
945 {
946 BOOL hyphenprinted = FALSE;
947
948 /* We've failed if we want a file that doesn't have any matches. */
949
950 if (filenames == FN_NOMATCH_ONLY) return 1;
951
952 /* Just count if just counting is wanted. */
953
954 if (count_only) count++;
955
956 /* If all we want is a file name, there is no need to scan any more lines
957 in the file. */
958
959 else if (filenames == FN_ONLY)
960 {
961 fprintf(stdout, "%s\n", printname);
962 return 0;
963 }
964
965 /* Likewise, if all we want is a yes/no answer. */
966
967 else if (quiet) return 0;
968
969 /* The --only-matching option prints just the substring that matched, and
970 does not pring any context. */
971
972 else if (only_matching)
973 {
974 if (printname != NULL) fprintf(stdout, "%s:", printname);
975 if (number) fprintf(stdout, "%d:", linenumber);
976 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
977 fprintf(stdout, "\n");
978 }
979
980 /* This is the default case when none of the above options is set. We print
981 the matching lines(s), possibly preceded and/or followed by other lines of
982 context. */
983
984 else
985 {
986 /* See if there is a requirement to print some "after" lines from a
987 previous match. We never print any overlaps. */
988
989 if (after_context > 0 && lastmatchnumber > 0)
990 {
991 int ellength;
992 int linecount = 0;
993 char *p = lastmatchrestart;
994
995 while (p < ptr && linecount < after_context)
996 {
997 p = end_of_line(p, ptr, &ellength);
998 linecount++;
999 }
1000
1001 /* It is important to advance lastmatchrestart during this printing so
1002 that it interacts correctly with any "before" printing below. Print
1003 each line's data using fwrite() in case there are binary zeroes. */
1004
1005 while (lastmatchrestart < p)
1006 {
1007 char *pp = lastmatchrestart;
1008 if (printname != NULL) fprintf(stdout, "%s-", printname);
1009 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1010 pp = end_of_line(pp, endptr, &ellength);
1011 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1012 lastmatchrestart = pp;
1013 }
1014 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1015 }
1016
1017 /* If there were non-contiguous lines printed above, insert hyphens. */
1018
1019 if (hyphenpending)
1020 {
1021 fprintf(stdout, "--\n");
1022 hyphenpending = FALSE;
1023 hyphenprinted = TRUE;
1024 }
1025
1026 /* See if there is a requirement to print some "before" lines for this
1027 match. Again, don't print overlaps. */
1028
1029 if (before_context > 0)
1030 {
1031 int linecount = 0;
1032 char *p = ptr;
1033
1034 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1035 linecount < before_context)
1036 {
1037 linecount++;
1038 p = previous_line(p, buffer);
1039 }
1040
1041 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1042 fprintf(stdout, "--\n");
1043
1044 while (p < ptr)
1045 {
1046 int ellength;
1047 char *pp = p;
1048 if (printname != NULL) fprintf(stdout, "%s-", printname);
1049 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1050 pp = end_of_line(pp, endptr, &ellength);
1051 fwrite(p, 1, pp - p, stdout);
1052 p = pp;
1053 }
1054 }
1055
1056 /* Now print the matching line(s); ensure we set hyphenpending at the end
1057 of the file if any context lines are being output. */
1058
1059 if (after_context > 0 || before_context > 0)
1060 endhyphenpending = TRUE;
1061
1062 if (printname != NULL) fprintf(stdout, "%s:", printname);
1063 if (number) fprintf(stdout, "%d:", linenumber);
1064
1065 /* In multiline mode, we want to print to the end of the line in which
1066 the end of the matched string is found, so we adjust linelength and the
1067 line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1068 start of the match will always be before the first newline sequence. */
1069
1070 if (multiline)
1071 {
1072 int ellength;
1073 char *endmatch = ptr + offsets[1];
1074 t = ptr;
1075 while (t < endmatch)
1076 {
1077 t = end_of_line(t, endptr, &ellength);
1078 if (t <= endmatch) linenumber++; else break;
1079 }
1080 endmatch = end_of_line(endmatch, endptr, &ellength);
1081 linelength = endmatch - ptr - ellength;
1082 }
1083
1084 /*** NOTE: Use only fwrite() to output the data line, so that binary
1085 zeroes are treated as just another data character. */
1086
1087 /* This extra option, for Jeffrey Friedl's debugging requirements,
1088 replaces the matched string, or a specific captured string if it exists,
1089 with X. When this happens, colouring is ignored. */
1090
1091 #ifdef JFRIEDL_DEBUG
1092 if (S_arg >= 0 && S_arg < mrc)
1093 {
1094 int first = S_arg * 2;
1095 int last = first + 1;
1096 fwrite(ptr, 1, offsets[first], stdout);
1097 fprintf(stdout, "X");
1098 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1099 }
1100 else
1101 #endif
1102
1103 /* We have to split the line(s) up if colouring. */
1104
1105 if (do_colour)
1106 {
1107 fwrite(ptr, 1, offsets[0], stdout);
1108 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1109 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1110 fprintf(stdout, "%c[00m", 0x1b);
1111 fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1112 }
1113 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1114 }
1115
1116 /* End of doing what has to be done for a match */
1117
1118 rc = 0; /* Had some success */
1119
1120 /* Remember where the last match happened for after_context. We remember
1121 where we are about to restart, and that line's number. */
1122
1123 lastmatchrestart = ptr + linelength + endlinelength;
1124 lastmatchnumber = linenumber + 1;
1125 }
1126
1127 /* Advance to after the newline and increment the line number. */
1128
1129 ptr += linelength + endlinelength;
1130 linenumber++;
1131
1132 /* If we haven't yet reached the end of the file (the buffer is full), and
1133 the current point is in the top 1/3 of the buffer, slide the buffer down by
1134 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1135 about to be lost, print them. */
1136
1137 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1138 {
1139 if (after_context > 0 &&
1140 lastmatchnumber > 0 &&
1141 lastmatchrestart < buffer + MBUFTHIRD)
1142 {
1143 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1144 lastmatchnumber = 0;
1145 }
1146
1147 /* Now do the shuffle */
1148
1149 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1150 ptr -= MBUFTHIRD;
1151 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1152 endptr = buffer + bufflength;
1153
1154 /* Adjust any last match point */
1155
1156 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1157 }
1158 } /* Loop through the whole file */
1159
1160 /* End of file; print final "after" lines if wanted; do_after_lines sets
1161 hyphenpending if it prints something. */
1162
1163 if (!only_matching && !count_only)
1164 {
1165 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1166 hyphenpending |= endhyphenpending;
1167 }
1168
1169 /* Print the file name if we are looking for those without matches and there
1170 were none. If we found a match, we won't have got this far. */
1171
1172 if (filenames == FN_NOMATCH_ONLY)
1173 {
1174 fprintf(stdout, "%s\n", printname);
1175 return 0;
1176 }
1177
1178 /* Print the match count if wanted */
1179
1180 if (count_only)
1181 {
1182 if (printname != NULL) fprintf(stdout, "%s:", printname);
1183 fprintf(stdout, "%d\n", count);
1184 }
1185
1186 return rc;
1187 }
1188
1189
1190
1191 /*************************************************
1192 * Grep a file or recurse into a directory *
1193 *************************************************/
1194
1195 /* Given a path name, if it's a directory, scan all the files if we are
1196 recursing; if it's a file, grep it.
1197
1198 Arguments:
1199 pathname the path to investigate
1200 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1201 only_one_at_top TRUE if the path is the only one at toplevel
1202
1203 Returns: 0 if there was at least one match
1204 1 if there were no matches
1205 2 there was some kind of error
1206
1207 However, file opening failures are suppressed if "silent" is set.
1208 */
1209
1210 static int
1211 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1212 {
1213 int rc = 1;
1214 int sep;
1215 FILE *in;
1216
1217 /* If the file name is "-" we scan stdin */
1218
1219 if (strcmp(pathname, "-") == 0)
1220 {
1221 return pcregrep(stdin,
1222 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1223 stdin_name : NULL);
1224 }
1225
1226
1227 /* If the file is a directory, skip if skipping or if we are recursing, scan
1228 each file within it, subject to any include or exclude patterns that were set.
1229 The scanning code is localized so it can be made system-specific. */
1230
1231 if ((sep = isdirectory(pathname)) != 0)
1232 {
1233 if (dee_action == dee_SKIP) return 1;
1234 if (dee_action == dee_RECURSE)
1235 {
1236 char buffer[1024];
1237 char *nextfile;
1238 directory_type *dir = opendirectory(pathname);
1239
1240 if (dir == NULL)
1241 {
1242 if (!silent)
1243 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1244 strerror(errno));
1245 return 2;
1246 }
1247
1248 while ((nextfile = readdirectory(dir)) != NULL)
1249 {
1250 int frc, blen;
1251 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1252 blen = strlen(buffer);
1253
1254 if (exclude_compiled != NULL &&
1255 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1256 continue;
1257
1258 if (include_compiled != NULL &&
1259 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1260 continue;
1261
1262 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1263 if (frc > 1) rc = frc;
1264 else if (frc == 0 && rc == 1) rc = 0;
1265 }
1266
1267 closedirectory(dir);
1268 return rc;
1269 }
1270 }
1271
1272 /* If the file is not a directory and not a regular file, skip it if that's
1273 been requested. */
1274
1275 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1276
1277 /* Control reaches here if we have a regular file, or if we have a directory
1278 and recursion or skipping was not requested, or if we have anything else and
1279 skipping was not requested. The scan proceeds. If this is the first and only
1280 argument at top level, we don't show the file name, unless we are only showing
1281 the file name, or the filename was forced (-H). */
1282
1283 in = fopen(pathname, "r");
1284 if (in == NULL)
1285 {
1286 if (!silent)
1287 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1288 strerror(errno));
1289 return 2;
1290 }
1291
1292 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1293 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1294
1295 fclose(in);
1296 return rc;
1297 }
1298
1299
1300
1301
1302 /*************************************************
1303 * Usage function *
1304 *************************************************/
1305
1306 static int
1307 usage(int rc)
1308 {
1309 option_item *op;
1310 fprintf(stderr, "Usage: pcregrep [-");
1311 for (op = optionlist; op->one_char != 0; op++)
1312 {
1313 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1314 }
1315 fprintf(stderr, "] [long options] [pattern] [files]\n");
1316 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1317 return rc;
1318 }
1319
1320
1321
1322
1323 /*************************************************
1324 * Help function *
1325 *************************************************/
1326
1327 static void
1328 help(void)
1329 {
1330 option_item *op;
1331
1332 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1333 printf("Search for PATTERN in each FILE or standard input.\n");
1334 printf("PATTERN must be present if neither -e nor -f is used.\n");
1335 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1336 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1337
1338 printf("Options:\n");
1339
1340 for (op = optionlist; op->one_char != 0; op++)
1341 {
1342 int n;
1343 char s[4];
1344 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1345 printf(" %s --%s%n", s, op->long_name, &n);
1346 n = 30 - n;
1347 if (n < 1) n = 1;
1348 printf("%.*s%s\n", n, " ", op->help_text);
1349 }
1350
1351 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1352 printf("trailing white space is removed and blank lines are ignored.\n");
1353 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1354
1355 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1356 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1357 }
1358
1359
1360
1361
1362 /*************************************************
1363 * Handle a single-letter, no data option *
1364 *************************************************/
1365
1366 static int
1367 handle_option(int letter, int options)
1368 {
1369 switch(letter)
1370 {
1371 case N_HELP: help(); exit(0);
1372 case 'c': count_only = TRUE; break;
1373 case 'F': process_options |= PO_FIXED_STRINGS; break;
1374 case 'H': filenames = FN_FORCE; break;
1375 case 'h': filenames = FN_NONE; break;
1376 case 'i': options |= PCRE_CASELESS; break;
1377 case 'l': filenames = FN_ONLY; break;
1378 case 'L': filenames = FN_NOMATCH_ONLY; break;
1379 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1380 case 'n': number = TRUE; break;
1381 case 'o': only_matching = TRUE; break;
1382 case 'q': quiet = TRUE; break;
1383 case 'r': dee_action = dee_RECURSE; break;
1384 case 's': silent = TRUE; break;
1385 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1386 case 'v': invert = TRUE; break;
1387 case 'w': process_options |= PO_WORD_MATCH; break;
1388 case 'x': process_options |= PO_LINE_MATCH; break;
1389
1390 case 'V':
1391 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1392 exit(0);
1393 break;
1394
1395 default:
1396 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1397 exit(usage(2));
1398 }
1399
1400 return options;
1401 }
1402
1403
1404
1405
1406 /*************************************************
1407 * Construct printed ordinal *
1408 *************************************************/
1409
1410 /* This turns a number into "1st", "3rd", etc. */
1411
1412 static char *
1413 ordin(int n)
1414 {
1415 static char buffer[8];
1416 char *p = buffer;
1417 sprintf(p, "%d", n);
1418 while (*p != 0) p++;
1419 switch (n%10)
1420 {
1421 case 1: strcpy(p, "st"); break;
1422 case 2: strcpy(p, "nd"); break;
1423 case 3: strcpy(p, "rd"); break;
1424 default: strcpy(p, "th"); break;
1425 }
1426 return buffer;
1427 }
1428
1429
1430
1431 /*************************************************
1432 * Compile a single pattern *
1433 *************************************************/
1434
1435 /* When the -F option has been used, this is called for each substring.
1436 Otherwise it's called for each supplied pattern.
1437
1438 Arguments:
1439 pattern the pattern string
1440 options the PCRE options
1441 filename the file name, or NULL for a command-line pattern
1442 count 0 if this is the only command line pattern, or
1443 number of the command line pattern, or
1444 linenumber for a pattern from a file
1445
1446 Returns: TRUE on success, FALSE after an error
1447 */
1448
1449 static BOOL
1450 compile_single_pattern(char *pattern, int options, char *filename, int count)
1451 {
1452 char buffer[MBUFTHIRD + 16];
1453 const char *error;
1454 int errptr;
1455
1456 if (pattern_count >= MAX_PATTERN_COUNT)
1457 {
1458 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1459 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1460 return FALSE;
1461 }
1462
1463 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1464 suffix[process_options]);
1465 pattern_list[pattern_count] =
1466 pcre_compile(buffer, options, &error, &errptr, pcretables);
1467 if (pattern_list[pattern_count] != NULL)
1468 {
1469 pattern_count++;
1470 return TRUE;
1471 }
1472
1473 /* Handle compile errors */
1474
1475 errptr -= (int)strlen(prefix[process_options]);
1476 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1477
1478 if (filename == NULL)
1479 {
1480 if (count == 0)
1481 fprintf(stderr, "pcregrep: Error in command-line regex "
1482 "at offset %d: %s\n", errptr, error);
1483 else
1484 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1485 "at offset %d: %s\n", ordin(count), errptr, error);
1486 }
1487 else
1488 {
1489 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1490 "at offset %d: %s\n", count, filename, errptr, error);
1491 }
1492
1493 return FALSE;
1494 }
1495
1496
1497
1498 /*************************************************
1499 * Compile one supplied pattern *
1500 *************************************************/
1501
1502 /* When the -F option has been used, each string may be a list of strings,
1503 separated by line breaks. They will be matched literally.
1504
1505 Arguments:
1506 pattern the pattern string
1507 options the PCRE options
1508 filename the file name, or NULL for a command-line pattern
1509 count 0 if this is the only command line pattern, or
1510 number of the command line pattern, or
1511 linenumber for a pattern from a file
1512
1513 Returns: TRUE on success, FALSE after an error
1514 */
1515
1516 static BOOL
1517 compile_pattern(char *pattern, int options, char *filename, int count)
1518 {
1519 if ((process_options & PO_FIXED_STRINGS) != 0)
1520 {
1521 char *eop = pattern + strlen(pattern);
1522 char buffer[MBUFTHIRD];
1523 for(;;)
1524 {
1525 int ellength;
1526 char *p = end_of_line(pattern, eop, &ellength);
1527 if (ellength == 0)
1528 return compile_single_pattern(pattern, options, filename, count);
1529 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1530 pattern = p;
1531 if (!compile_single_pattern(buffer, options, filename, count))
1532 return FALSE;
1533 }
1534 }
1535 else return compile_single_pattern(pattern, options, filename, count);
1536 }
1537
1538
1539
1540 /*************************************************
1541 * Main program *
1542 *************************************************/
1543
1544 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1545
1546 int
1547 main(int argc, char **argv)
1548 {
1549 int i, j;
1550 int rc = 1;
1551 int pcre_options = 0;
1552 int cmd_pattern_count = 0;
1553 int hint_count = 0;
1554 int errptr;
1555 BOOL only_one_at_top;
1556 char *patterns[MAX_PATTERN_COUNT];
1557 const char *locale_from = "--locale";
1558 const char *error;
1559
1560 /* Set the default line ending value from the default in the PCRE library;
1561 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1562 */
1563
1564 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1565 switch(i)
1566 {
1567 default: newline = (char *)"lf"; break;
1568 case '\r': newline = (char *)"cr"; break;
1569 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1570 case -1: newline = (char *)"any"; break;
1571 case -2: newline = (char *)"anycrlf"; break;
1572 }
1573
1574 /* Process the options */
1575
1576 for (i = 1; i < argc; i++)
1577 {
1578 option_item *op = NULL;
1579 char *option_data = (char *)""; /* default to keep compiler happy */
1580 BOOL longop;
1581 BOOL longopwasequals = FALSE;
1582
1583 if (argv[i][0] != '-') break;
1584
1585 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1586 but only if we have previously had -e or -f to define the patterns. */
1587
1588 if (argv[i][1] == 0)
1589 {
1590 if (pattern_filename != NULL || pattern_count > 0) break;
1591 else exit(usage(2));
1592 }
1593
1594 /* Handle a long name option, or -- to terminate the options */
1595
1596 if (argv[i][1] == '-')
1597 {
1598 char *arg = argv[i] + 2;
1599 char *argequals = strchr(arg, '=');
1600
1601 if (*arg == 0) /* -- terminates options */
1602 {
1603 i++;
1604 break; /* out of the options-handling loop */
1605 }
1606
1607 longop = TRUE;
1608
1609 /* Some long options have data that follows after =, for example file=name.
1610 Some options have variations in the long name spelling: specifically, we
1611 allow "regexp" because GNU grep allows it, though I personally go along
1612 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1613 These options are entered in the table as "regex(p)". No option is in both
1614 these categories, fortunately. */
1615
1616 for (op = optionlist; op->one_char != 0; op++)
1617 {
1618 char *opbra = strchr(op->long_name, '(');
1619 char *equals = strchr(op->long_name, '=');
1620 if (opbra == NULL) /* Not a (p) case */
1621 {
1622 if (equals == NULL) /* Not thing=data case */
1623 {
1624 if (strcmp(arg, op->long_name) == 0) break;
1625 }
1626 else /* Special case xxx=data */
1627 {
1628 int oplen = equals - op->long_name;
1629 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1630 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1631 {
1632 option_data = arg + arglen;
1633 if (*option_data == '=')
1634 {
1635 option_data++;
1636 longopwasequals = TRUE;
1637 }
1638 break;
1639 }
1640 }
1641 }
1642 else /* Special case xxxx(p) */
1643 {
1644 char buff1[24];
1645 char buff2[24];
1646 int baselen = opbra - op->long_name;
1647 sprintf(buff1, "%.*s", baselen, op->long_name);
1648 sprintf(buff2, "%s%.*s", buff1,
1649 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1650 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1651 break;
1652 }
1653 }
1654
1655 if (op->one_char == 0)
1656 {
1657 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1658 exit(usage(2));
1659 }
1660 }
1661
1662
1663 /* Jeffrey Friedl's debugging harness uses these additional options which
1664 are not in the right form for putting in the option table because they use
1665 only one hyphen, yet are more than one character long. By putting them
1666 separately here, they will not get displayed as part of the help() output,
1667 but I don't think Jeffrey will care about that. */
1668
1669 #ifdef JFRIEDL_DEBUG
1670 else if (strcmp(argv[i], "-pre") == 0) {
1671 jfriedl_prefix = argv[++i];
1672 continue;
1673 } else if (strcmp(argv[i], "-post") == 0) {
1674 jfriedl_postfix = argv[++i];
1675 continue;
1676 } else if (strcmp(argv[i], "-XT") == 0) {
1677 sscanf(argv[++i], "%d", &jfriedl_XT);
1678 continue;
1679 } else if (strcmp(argv[i], "-XR") == 0) {
1680 sscanf(argv[++i], "%d", &jfriedl_XR);
1681 continue;
1682 }
1683 #endif
1684
1685
1686 /* One-char options; many that have no data may be in a single argument; we
1687 continue till we hit the last one or one that needs data. */
1688
1689 else
1690 {
1691 char *s = argv[i] + 1;
1692 longop = FALSE;
1693 while (*s != 0)
1694 {
1695 for (op = optionlist; op->one_char != 0; op++)
1696 { if (*s == op->one_char) break; }
1697 if (op->one_char == 0)
1698 {
1699 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1700 *s, argv[i]);
1701 exit(usage(2));
1702 }
1703 if (op->type != OP_NODATA || s[1] == 0)
1704 {
1705 option_data = s+1;
1706 break;
1707 }
1708 pcre_options = handle_option(*s++, pcre_options);
1709 }
1710 }
1711
1712 /* At this point we should have op pointing to a matched option. If the type
1713 is NO_DATA, it means that there is no data, and the option might set
1714 something in the PCRE options. */
1715
1716 if (op->type == OP_NODATA)
1717 {
1718 pcre_options = handle_option(op->one_char, pcre_options);
1719 continue;
1720 }
1721
1722 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1723 either has a value or defaults to something. It cannot have data in a
1724 separate item. At the moment, the only such options are "colo(u)r" and
1725 Jeffrey Friedl's special -S debugging option. */
1726
1727 if (*option_data == 0 &&
1728 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1729 {
1730 switch (op->one_char)
1731 {
1732 case N_COLOUR:
1733 colour_option = (char *)"auto";
1734 break;
1735 #ifdef JFRIEDL_DEBUG
1736 case 'S':
1737 S_arg = 0;
1738 break;
1739 #endif
1740 }
1741 continue;
1742 }
1743
1744 /* Otherwise, find the data string for the option. */
1745
1746 if (*option_data == 0)
1747 {
1748 if (i >= argc - 1 || longopwasequals)
1749 {
1750 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1751 exit(usage(2));
1752 }
1753 option_data = argv[++i];
1754 }
1755
1756 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1757 multiple times to create a list of patterns. */
1758
1759 if (op->type == OP_PATLIST)
1760 {
1761 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1762 {
1763 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1764 MAX_PATTERN_COUNT);
1765 return 2;
1766 }
1767 patterns[cmd_pattern_count++] = option_data;
1768 }
1769
1770 /* Otherwise, deal with single string or numeric data values. */
1771
1772 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1773 {
1774 *((char **)op->dataptr) = option_data;
1775 }
1776 else
1777 {
1778 char *endptr;
1779 int n = strtoul(option_data, &endptr, 10);
1780 if (*endptr != 0)
1781 {
1782 if (longop)
1783 {
1784 char *equals = strchr(op->long_name, '=');
1785 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1786 equals - op->long_name;
1787 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1788 option_data, nlen, op->long_name);
1789 }
1790 else
1791 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1792 option_data, op->one_char);
1793 exit(usage(2));
1794 }
1795 *((int *)op->dataptr) = n;
1796 }
1797 }
1798
1799 /* Options have been decoded. If -C was used, its value is used as a default
1800 for -A and -B. */
1801
1802 if (both_context > 0)
1803 {
1804 if (after_context == 0) after_context = both_context;
1805 if (before_context == 0) before_context = both_context;
1806 }
1807
1808 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1809 LC_ALL environment variable is set, and if so, use it. */
1810
1811 if (locale == NULL)
1812 {
1813 locale = getenv("LC_ALL");
1814 locale_from = "LCC_ALL";
1815 }
1816
1817 if (locale == NULL)
1818 {
1819 locale = getenv("LC_CTYPE");
1820 locale_from = "LC_CTYPE";
1821 }
1822
1823 /* If a locale has been provided, set it, and generate the tables the PCRE
1824 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1825
1826 if (locale != NULL)
1827 {
1828 if (setlocale(LC_CTYPE, locale) == NULL)
1829 {
1830 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1831 locale, locale_from);
1832 return 2;
1833 }
1834 pcretables = pcre_maketables();
1835 }
1836
1837 /* Sort out colouring */
1838
1839 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1840 {
1841 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1842 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1843 else
1844 {
1845 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1846 colour_option);
1847 return 2;
1848 }
1849 if (do_colour)
1850 {
1851 char *cs = getenv("PCREGREP_COLOUR");
1852 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1853 if (cs != NULL) colour_string = cs;
1854 }
1855 }
1856
1857 /* Interpret the newline type; the default settings are Unix-like. */
1858
1859 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1860 {
1861 pcre_options |= PCRE_NEWLINE_CR;
1862 endlinetype = EL_CR;
1863 }
1864 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1865 {
1866 pcre_options |= PCRE_NEWLINE_LF;
1867 endlinetype = EL_LF;
1868 }
1869 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1870 {
1871 pcre_options |= PCRE_NEWLINE_CRLF;
1872 endlinetype = EL_CRLF;
1873 }
1874 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1875 {
1876 pcre_options |= PCRE_NEWLINE_ANY;
1877 endlinetype = EL_ANY;
1878 }
1879 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1880 {
1881 pcre_options |= PCRE_NEWLINE_ANYCRLF;
1882 endlinetype = EL_ANYCRLF;
1883 }
1884 else
1885 {
1886 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1887 return 2;
1888 }
1889
1890 /* Interpret the text values for -d and -D */
1891
1892 if (dee_option != NULL)
1893 {
1894 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1895 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1896 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1897 else
1898 {
1899 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1900 return 2;
1901 }
1902 }
1903
1904 if (DEE_option != NULL)
1905 {
1906 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1907 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1908 else
1909 {
1910 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1911 return 2;
1912 }
1913 }
1914
1915 /* Check the values for Jeffrey Friedl's debugging options. */
1916
1917 #ifdef JFRIEDL_DEBUG
1918 if (S_arg > 9)
1919 {
1920 fprintf(stderr, "pcregrep: bad value for -S option\n");
1921 return 2;
1922 }
1923 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1924 {
1925 if (jfriedl_XT == 0) jfriedl_XT = 1;
1926 if (jfriedl_XR == 0) jfriedl_XR = 1;
1927 }
1928 #endif
1929
1930 /* Get memory to store the pattern and hints lists. */
1931
1932 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1933 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1934
1935 if (pattern_list == NULL || hints_list == NULL)
1936 {
1937 fprintf(stderr, "pcregrep: malloc failed\n");
1938 goto EXIT2;
1939 }
1940
1941 /* If no patterns were provided by -e, and there is no file provided by -f,
1942 the first argument is the one and only pattern, and it must exist. */
1943
1944 if (cmd_pattern_count == 0 && pattern_filename == NULL)
1945 {
1946 if (i >= argc) return usage(2);
1947 patterns[cmd_pattern_count++] = argv[i++];
1948 }
1949
1950 /* Compile the patterns that were provided on the command line, either by
1951 multiple uses of -e or as a single unkeyed pattern. */
1952
1953 for (j = 0; j < cmd_pattern_count; j++)
1954 {
1955 if (!compile_pattern(patterns[j], pcre_options, NULL,
1956 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1957 goto EXIT2;
1958 }
1959
1960 /* Compile the regular expressions that are provided in a file. */
1961
1962 if (pattern_filename != NULL)
1963 {
1964 int linenumber = 0;
1965 FILE *f;
1966 char *filename;
1967 char buffer[MBUFTHIRD];
1968
1969 if (strcmp(pattern_filename, "-") == 0)
1970 {
1971 f = stdin;
1972 filename = stdin_name;
1973 }
1974 else
1975 {
1976 f = fopen(pattern_filename, "r");
1977 if (f == NULL)
1978 {
1979 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1980 strerror(errno));
1981 goto EXIT2;
1982 }
1983 filename = pattern_filename;
1984 }
1985
1986 while (fgets(buffer, MBUFTHIRD, f) != NULL)
1987 {
1988 char *s = buffer + (int)strlen(buffer);
1989 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1990 *s = 0;
1991 linenumber++;
1992 if (buffer[0] == 0) continue; /* Skip blank lines */
1993 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1994 goto EXIT2;
1995 }
1996
1997 if (f != stdin) fclose(f);
1998 }
1999
2000 /* Study the regular expressions, as we will be running them many times */
2001
2002 for (j = 0; j < pattern_count; j++)
2003 {
2004 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2005 if (error != NULL)
2006 {
2007 char s[16];
2008 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2009 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2010 goto EXIT2;
2011 }
2012 hint_count++;
2013 }
2014
2015 /* If there are include or exclude patterns, compile them. */
2016
2017 if (exclude_pattern != NULL)
2018 {
2019 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2020 pcretables);
2021 if (exclude_compiled == NULL)
2022 {
2023 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2024 errptr, error);
2025 goto EXIT2;
2026 }
2027 }
2028
2029 if (include_pattern != NULL)
2030 {
2031 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2032 pcretables);
2033 if (include_compiled == NULL)
2034 {
2035 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2036 errptr, error);
2037 goto EXIT2;
2038 }
2039 }
2040
2041 /* If there are no further arguments, do the business on stdin and exit. */
2042
2043 if (i >= argc)
2044 {
2045 rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2046 goto EXIT;
2047 }
2048
2049 /* Otherwise, work through the remaining arguments as files or directories.
2050 Pass in the fact that there is only one argument at top level - this suppresses
2051 the file name if the argument is not a directory and filenames are not
2052 otherwise forced. */
2053
2054 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2055
2056 for (; i < argc; i++)
2057 {
2058 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2059 only_one_at_top);
2060 if (frc > 1) rc = frc;
2061 else if (frc == 0 && rc == 1) rc = 0;
2062 }
2063
2064 EXIT:
2065 if (pattern_list != NULL)
2066 {
2067 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2068 free(pattern_list);
2069 }
2070 if (hints_list != NULL)
2071 {
2072 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2073 free(hints_list);
2074 }
2075 return rc;
2076
2077 EXIT2:
2078 rc = 2;
2079 goto EXIT;
2080 }
2081
2082 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12