/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 280 - (show annotations) (download)
Wed Dec 5 20:56:03 2007 UTC (6 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 61563 byte(s)
Add --line-offsets and --file-offsets to pcregrep.

1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40 #ifdef HAVE_CONFIG_H
41 #include "config.h"
42 #endif
43
44 #include <ctype.h>
45 #include <locale.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <stdlib.h>
49 #include <errno.h>
50
51 #include <sys/types.h>
52 #include <sys/stat.h>
53
54 #ifdef HAVE_UNISTD_H
55 #include <unistd.h>
56 #endif
57
58 #include "pcre.h"
59
60 #define FALSE 0
61 #define TRUE 1
62
63 typedef int BOOL;
64
65 #define MAX_PATTERN_COUNT 100
66
67 #if BUFSIZ > 8192
68 #define MBUFTHIRD BUFSIZ
69 #else
70 #define MBUFTHIRD 8192
71 #endif
72
73 /* Values for the "filenames" variable, which specifies options for file name
74 output. The order is important; it is assumed that a file name is wanted for
75 all values greater than FN_DEFAULT. */
76
77 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78
79 /* Actions for the -d and -D options */
80
81 enum { dee_READ, dee_SKIP, dee_RECURSE };
82 enum { DEE_READ, DEE_SKIP };
83
84 /* Actions for special processing options (flag bits) */
85
86 #define PO_WORD_MATCH 0x0001
87 #define PO_LINE_MATCH 0x0002
88 #define PO_FIXED_STRINGS 0x0004
89
90 /* Line ending types */
91
92 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93
94
95
96 /*************************************************
97 * Global variables *
98 *************************************************/
99
100 /* Jeffrey Friedl has some debugging requirements that are not part of the
101 regular code. */
102
103 #ifdef JFRIEDL_DEBUG
104 static int S_arg = -1;
105 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107 static const char *jfriedl_prefix = "";
108 static const char *jfriedl_postfix = "";
109 #endif
110
111 static int endlinetype;
112
113 static char *colour_string = (char *)"1;31";
114 static char *colour_option = NULL;
115 static char *dee_option = NULL;
116 static char *DEE_option = NULL;
117 static char *newline = NULL;
118 static char *pattern_filename = NULL;
119 static char *stdin_name = (char *)"(standard input)";
120 static char *locale = NULL;
121
122 static const unsigned char *pcretables = NULL;
123
124 static int pattern_count = 0;
125 static pcre **pattern_list = NULL;
126 static pcre_extra **hints_list = NULL;
127
128 static char *include_pattern = NULL;
129 static char *exclude_pattern = NULL;
130
131 static pcre *include_compiled = NULL;
132 static pcre *exclude_compiled = NULL;
133
134 static int after_context = 0;
135 static int before_context = 0;
136 static int both_context = 0;
137 static int dee_action = dee_READ;
138 static int DEE_action = DEE_READ;
139 static int error_count = 0;
140 static int filenames = FN_DEFAULT;
141 static int process_options = 0;
142
143 static BOOL count_only = FALSE;
144 static BOOL do_colour = FALSE;
145 static BOOL file_offsets = FALSE;
146 static BOOL hyphenpending = FALSE;
147 static BOOL invert = FALSE;
148 static BOOL line_offsets = FALSE;
149 static BOOL multiline = FALSE;
150 static BOOL number = FALSE;
151 static BOOL only_matching = FALSE;
152 static BOOL quiet = FALSE;
153 static BOOL silent = FALSE;
154 static BOOL utf8 = FALSE;
155
156 /* Structure for options and list of them */
157
158 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
159 OP_PATLIST };
160
161 typedef struct option_item {
162 int type;
163 int one_char;
164 void *dataptr;
165 const char *long_name;
166 const char *help_text;
167 } option_item;
168
169 /* Options without a single-letter equivalent get a negative value. This can be
170 used to identify them. */
171
172 #define N_COLOUR (-1)
173 #define N_EXCLUDE (-2)
174 #define N_HELP (-3)
175 #define N_INCLUDE (-4)
176 #define N_LABEL (-5)
177 #define N_LOCALE (-6)
178 #define N_NULL (-7)
179 #define N_LOFFSETS (-8)
180 #define N_FOFFSETS (-9)
181
182 static option_item optionlist[] = {
183 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
184 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
185 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
186 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
187 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
188 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
189 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
190 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
191 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
192 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
193 { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
194 { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
195 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
196 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
197 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
198 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
199 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
200 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
201 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
202 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
203 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
204 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
205 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
206 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
207 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
208 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
209 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
210 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
211 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
212 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
213 #ifdef JFRIEDL_DEBUG
214 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
215 #endif
216 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
217 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
218 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
219 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
220 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
221 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
222 { OP_NODATA, 0, NULL, NULL, NULL }
223 };
224
225 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
226 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
227 that the combination of -w and -x has the same effect as -x on its own, so we
228 can treat them as the same. */
229
230 static const char *prefix[] = {
231 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
232
233 static const char *suffix[] = {
234 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
235
236 /* UTF-8 tables - used only when the newline setting is "any". */
237
238 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
239
240 const char utf8_table4[] = {
241 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
242 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
243 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
244 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
245
246
247
248 /*************************************************
249 * OS-specific functions *
250 *************************************************/
251
252 /* These functions are defined so that they can be made system specific,
253 although at present the only ones are for Unix, Win32, and for "no support". */
254
255
256 /************* Directory scanning in Unix ***********/
257
258 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
259 #include <sys/types.h>
260 #include <sys/stat.h>
261 #include <dirent.h>
262
263 typedef DIR directory_type;
264
265 static int
266 isdirectory(char *filename)
267 {
268 struct stat statbuf;
269 if (stat(filename, &statbuf) < 0)
270 return 0; /* In the expectation that opening as a file will fail */
271 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
272 }
273
274 static directory_type *
275 opendirectory(char *filename)
276 {
277 return opendir(filename);
278 }
279
280 static char *
281 readdirectory(directory_type *dir)
282 {
283 for (;;)
284 {
285 struct dirent *dent = readdir(dir);
286 if (dent == NULL) return NULL;
287 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
288 return dent->d_name;
289 }
290 /* Control never reaches here */
291 }
292
293 static void
294 closedirectory(directory_type *dir)
295 {
296 closedir(dir);
297 }
298
299
300 /************* Test for regular file in Unix **********/
301
302 static int
303 isregfile(char *filename)
304 {
305 struct stat statbuf;
306 if (stat(filename, &statbuf) < 0)
307 return 1; /* In the expectation that opening as a file will fail */
308 return (statbuf.st_mode & S_IFMT) == S_IFREG;
309 }
310
311
312 /************* Test stdout for being a terminal in Unix **********/
313
314 static BOOL
315 is_stdout_tty(void)
316 {
317 return isatty(fileno(stdout));
318 }
319
320
321 /************* Directory scanning in Win32 ***********/
322
323 /* I (Philip Hazel) have no means of testing this code. It was contributed by
324 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
325 when it did not exist. */
326
327
328 #elif HAVE_WINDOWS_H
329
330 #ifndef STRICT
331 # define STRICT
332 #endif
333 #ifndef WIN32_LEAN_AND_MEAN
334 # define WIN32_LEAN_AND_MEAN
335 #endif
336 #ifndef INVALID_FILE_ATTRIBUTES
337 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
338 #endif
339
340 #include <windows.h>
341
342 typedef struct directory_type
343 {
344 HANDLE handle;
345 BOOL first;
346 WIN32_FIND_DATA data;
347 } directory_type;
348
349 int
350 isdirectory(char *filename)
351 {
352 DWORD attr = GetFileAttributes(filename);
353 if (attr == INVALID_FILE_ATTRIBUTES)
354 return 0;
355 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
356 }
357
358 directory_type *
359 opendirectory(char *filename)
360 {
361 size_t len;
362 char *pattern;
363 directory_type *dir;
364 DWORD err;
365 len = strlen(filename);
366 pattern = (char *) malloc(len + 3);
367 dir = (directory_type *) malloc(sizeof(*dir));
368 if ((pattern == NULL) || (dir == NULL))
369 {
370 fprintf(stderr, "pcregrep: malloc failed\n");
371 exit(2);
372 }
373 memcpy(pattern, filename, len);
374 memcpy(&(pattern[len]), "\\*", 3);
375 dir->handle = FindFirstFile(pattern, &(dir->data));
376 if (dir->handle != INVALID_HANDLE_VALUE)
377 {
378 free(pattern);
379 dir->first = TRUE;
380 return dir;
381 }
382 err = GetLastError();
383 free(pattern);
384 free(dir);
385 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
386 return NULL;
387 }
388
389 char *
390 readdirectory(directory_type *dir)
391 {
392 for (;;)
393 {
394 if (!dir->first)
395 {
396 if (!FindNextFile(dir->handle, &(dir->data)))
397 return NULL;
398 }
399 else
400 {
401 dir->first = FALSE;
402 }
403 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
404 return dir->data.cFileName;
405 }
406 #ifndef _MSC_VER
407 return NULL; /* Keep compiler happy; never executed */
408 #endif
409 }
410
411 void
412 closedirectory(directory_type *dir)
413 {
414 FindClose(dir->handle);
415 free(dir);
416 }
417
418
419 /************* Test for regular file in Win32 **********/
420
421 /* I don't know how to do this, or if it can be done; assume all paths are
422 regular if they are not directories. */
423
424 int isregfile(char *filename)
425 {
426 return !isdirectory(filename)
427 }
428
429
430 /************* Test stdout for being a terminal in Win32 **********/
431
432 /* I don't know how to do this; assume never */
433
434 static BOOL
435 is_stdout_tty(void)
436 {
437 FALSE;
438 }
439
440
441 /************* Directory scanning when we can't do it ***********/
442
443 /* The type is void, and apart from isdirectory(), the functions do nothing. */
444
445 #else
446
447 typedef void directory_type;
448
449 int isdirectory(char *filename) { return 0; }
450 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
451 char *readdirectory(directory_type *dir) { return (char*)0;}
452 void closedirectory(directory_type *dir) {}
453
454
455 /************* Test for regular when we can't do it **********/
456
457 /* Assume all files are regular. */
458
459 int isregfile(char *filename) { return 1; }
460
461
462 /************* Test stdout for being a terminal when we can't do it **********/
463
464 static BOOL
465 is_stdout_tty(void)
466 {
467 return FALSE;
468 }
469
470
471 #endif
472
473
474
475 #ifndef HAVE_STRERROR
476 /*************************************************
477 * Provide strerror() for non-ANSI libraries *
478 *************************************************/
479
480 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
481 in their libraries, but can provide the same facility by this simple
482 alternative function. */
483
484 extern int sys_nerr;
485 extern char *sys_errlist[];
486
487 char *
488 strerror(int n)
489 {
490 if (n < 0 || n >= sys_nerr) return "unknown error number";
491 return sys_errlist[n];
492 }
493 #endif /* HAVE_STRERROR */
494
495
496
497 /*************************************************
498 * Find end of line *
499 *************************************************/
500
501 /* The length of the endline sequence that is found is set via lenptr. This may
502 be zero at the very end of the file if there is no line-ending sequence there.
503
504 Arguments:
505 p current position in line
506 endptr end of available data
507 lenptr where to put the length of the eol sequence
508
509 Returns: pointer to the last byte of the line
510 */
511
512 static char *
513 end_of_line(char *p, char *endptr, int *lenptr)
514 {
515 switch(endlinetype)
516 {
517 default: /* Just in case */
518 case EL_LF:
519 while (p < endptr && *p != '\n') p++;
520 if (p < endptr)
521 {
522 *lenptr = 1;
523 return p + 1;
524 }
525 *lenptr = 0;
526 return endptr;
527
528 case EL_CR:
529 while (p < endptr && *p != '\r') p++;
530 if (p < endptr)
531 {
532 *lenptr = 1;
533 return p + 1;
534 }
535 *lenptr = 0;
536 return endptr;
537
538 case EL_CRLF:
539 for (;;)
540 {
541 while (p < endptr && *p != '\r') p++;
542 if (++p >= endptr)
543 {
544 *lenptr = 0;
545 return endptr;
546 }
547 if (*p == '\n')
548 {
549 *lenptr = 2;
550 return p + 1;
551 }
552 }
553 break;
554
555 case EL_ANYCRLF:
556 while (p < endptr)
557 {
558 int extra = 0;
559 register int c = *((unsigned char *)p);
560
561 if (utf8 && c >= 0xc0)
562 {
563 int gcii, gcss;
564 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
565 gcss = 6*extra;
566 c = (c & utf8_table3[extra]) << gcss;
567 for (gcii = 1; gcii <= extra; gcii++)
568 {
569 gcss -= 6;
570 c |= (p[gcii] & 0x3f) << gcss;
571 }
572 }
573
574 p += 1 + extra;
575
576 switch (c)
577 {
578 case 0x0a: /* LF */
579 *lenptr = 1;
580 return p;
581
582 case 0x0d: /* CR */
583 if (p < endptr && *p == 0x0a)
584 {
585 *lenptr = 2;
586 p++;
587 }
588 else *lenptr = 1;
589 return p;
590
591 default:
592 break;
593 }
594 } /* End of loop for ANYCRLF case */
595
596 *lenptr = 0; /* Must have hit the end */
597 return endptr;
598
599 case EL_ANY:
600 while (p < endptr)
601 {
602 int extra = 0;
603 register int c = *((unsigned char *)p);
604
605 if (utf8 && c >= 0xc0)
606 {
607 int gcii, gcss;
608 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
609 gcss = 6*extra;
610 c = (c & utf8_table3[extra]) << gcss;
611 for (gcii = 1; gcii <= extra; gcii++)
612 {
613 gcss -= 6;
614 c |= (p[gcii] & 0x3f) << gcss;
615 }
616 }
617
618 p += 1 + extra;
619
620 switch (c)
621 {
622 case 0x0a: /* LF */
623 case 0x0b: /* VT */
624 case 0x0c: /* FF */
625 *lenptr = 1;
626 return p;
627
628 case 0x0d: /* CR */
629 if (p < endptr && *p == 0x0a)
630 {
631 *lenptr = 2;
632 p++;
633 }
634 else *lenptr = 1;
635 return p;
636
637 case 0x85: /* NEL */
638 *lenptr = utf8? 2 : 1;
639 return p;
640
641 case 0x2028: /* LS */
642 case 0x2029: /* PS */
643 *lenptr = 3;
644 return p;
645
646 default:
647 break;
648 }
649 } /* End of loop for ANY case */
650
651 *lenptr = 0; /* Must have hit the end */
652 return endptr;
653 } /* End of overall switch */
654 }
655
656
657
658 /*************************************************
659 * Find start of previous line *
660 *************************************************/
661
662 /* This is called when looking back for before lines to print.
663
664 Arguments:
665 p start of the subsequent line
666 startptr start of available data
667
668 Returns: pointer to the start of the previous line
669 */
670
671 static char *
672 previous_line(char *p, char *startptr)
673 {
674 switch(endlinetype)
675 {
676 default: /* Just in case */
677 case EL_LF:
678 p--;
679 while (p > startptr && p[-1] != '\n') p--;
680 return p;
681
682 case EL_CR:
683 p--;
684 while (p > startptr && p[-1] != '\n') p--;
685 return p;
686
687 case EL_CRLF:
688 for (;;)
689 {
690 p -= 2;
691 while (p > startptr && p[-1] != '\n') p--;
692 if (p <= startptr + 1 || p[-2] == '\r') return p;
693 }
694 return p; /* But control should never get here */
695
696 case EL_ANY:
697 case EL_ANYCRLF:
698 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
699 if (utf8) while ((*p & 0xc0) == 0x80) p--;
700
701 while (p > startptr)
702 {
703 register int c;
704 char *pp = p - 1;
705
706 if (utf8)
707 {
708 int extra = 0;
709 while ((*pp & 0xc0) == 0x80) pp--;
710 c = *((unsigned char *)pp);
711 if (c >= 0xc0)
712 {
713 int gcii, gcss;
714 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
715 gcss = 6*extra;
716 c = (c & utf8_table3[extra]) << gcss;
717 for (gcii = 1; gcii <= extra; gcii++)
718 {
719 gcss -= 6;
720 c |= (pp[gcii] & 0x3f) << gcss;
721 }
722 }
723 }
724 else c = *((unsigned char *)pp);
725
726 if (endlinetype == EL_ANYCRLF) switch (c)
727 {
728 case 0x0a: /* LF */
729 case 0x0d: /* CR */
730 return p;
731
732 default:
733 break;
734 }
735
736 else switch (c)
737 {
738 case 0x0a: /* LF */
739 case 0x0b: /* VT */
740 case 0x0c: /* FF */
741 case 0x0d: /* CR */
742 case 0x85: /* NEL */
743 case 0x2028: /* LS */
744 case 0x2029: /* PS */
745 return p;
746
747 default:
748 break;
749 }
750
751 p = pp; /* Back one character */
752 } /* End of loop for ANY case */
753
754 return startptr; /* Hit start of data */
755 } /* End of overall switch */
756 }
757
758
759
760
761
762 /*************************************************
763 * Print the previous "after" lines *
764 *************************************************/
765
766 /* This is called if we are about to lose said lines because of buffer filling,
767 and at the end of the file. The data in the line is written using fwrite() so
768 that a binary zero does not terminate it.
769
770 Arguments:
771 lastmatchnumber the number of the last matching line, plus one
772 lastmatchrestart where we restarted after the last match
773 endptr end of available data
774 printname filename for printing
775
776 Returns: nothing
777 */
778
779 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
780 char *endptr, char *printname)
781 {
782 if (after_context > 0 && lastmatchnumber > 0)
783 {
784 int count = 0;
785 while (lastmatchrestart < endptr && count++ < after_context)
786 {
787 int ellength;
788 char *pp = lastmatchrestart;
789 if (printname != NULL) fprintf(stdout, "%s-", printname);
790 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
791 pp = end_of_line(pp, endptr, &ellength);
792 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
793 lastmatchrestart = pp;
794 }
795 hyphenpending = TRUE;
796 }
797 }
798
799
800
801 /*************************************************
802 * Grep an individual file *
803 *************************************************/
804
805 /* This is called from grep_or_recurse() below. It uses a buffer that is three
806 times the value of MBUFTHIRD. The matching point is never allowed to stray into
807 the top third of the buffer, thus keeping more of the file available for
808 context printing or for multiline scanning. For large files, the pointer will
809 be in the middle third most of the time, so the bottom third is available for
810 "before" context printing.
811
812 Arguments:
813 in the fopened FILE stream
814 printname the file name if it is to be printed for each match
815 or NULL if the file name is not to be printed
816 it cannot be NULL if filenames[_nomatch]_only is set
817
818 Returns: 0 if there was at least one match
819 1 otherwise (no matches)
820 */
821
822 static int
823 pcregrep(FILE *in, char *printname)
824 {
825 int rc = 1;
826 int linenumber = 1;
827 int lastmatchnumber = 0;
828 int count = 0;
829 int filepos = 0;
830 int offsets[99];
831 char *lastmatchrestart = NULL;
832 char buffer[3*MBUFTHIRD];
833 char *ptr = buffer;
834 char *endptr;
835 size_t bufflength;
836 BOOL endhyphenpending = FALSE;
837
838 /* Do the first read into the start of the buffer and set up the pointer to
839 end of what we have. */
840
841 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
842 endptr = buffer + bufflength;
843
844 /* Loop while the current pointer is not at the end of the file. For large
845 files, endptr will be at the end of the buffer when we are in the middle of the
846 file, but ptr will never get there, because as soon as it gets over 2/3 of the
847 way, the buffer is shifted left and re-filled. */
848
849 while (ptr < endptr)
850 {
851 int i, endlinelength;
852 int mrc = 0;
853 BOOL match = FALSE;
854 char *matchptr = ptr;
855 char *t = ptr;
856 size_t length, linelength;
857
858 /* At this point, ptr is at the start of a line. We need to find the length
859 of the subject string to pass to pcre_exec(). In multiline mode, it is the
860 length remainder of the data in the buffer. Otherwise, it is the length of
861 the next line. After matching, we always advance by the length of the next
862 line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
863 that any match is constrained to be in the first line. */
864
865 t = end_of_line(t, endptr, &endlinelength);
866 linelength = t - ptr - endlinelength;
867 length = multiline? (size_t)(endptr - ptr) : linelength;
868
869 /* Extra processing for Jeffrey Friedl's debugging. */
870
871 #ifdef JFRIEDL_DEBUG
872 if (jfriedl_XT || jfriedl_XR)
873 {
874 #include <sys/time.h>
875 #include <time.h>
876 struct timeval start_time, end_time;
877 struct timezone dummy;
878
879 if (jfriedl_XT)
880 {
881 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
882 const char *orig = ptr;
883 ptr = malloc(newlen + 1);
884 if (!ptr) {
885 printf("out of memory");
886 exit(2);
887 }
888 endptr = ptr;
889 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
890 for (i = 0; i < jfriedl_XT; i++) {
891 strncpy(endptr, orig, length);
892 endptr += length;
893 }
894 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
895 length = newlen;
896 }
897
898 if (gettimeofday(&start_time, &dummy) != 0)
899 perror("bad gettimeofday");
900
901
902 for (i = 0; i < jfriedl_XR; i++)
903 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
904
905 if (gettimeofday(&end_time, &dummy) != 0)
906 perror("bad gettimeofday");
907
908 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
909 -
910 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
911
912 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
913 return 0;
914 }
915 #endif
916
917 /* We come back here after a match when the -o option (only_matching) is set,
918 in order to find any further matches in the same line. */
919
920 ONLY_MATCHING_RESTART:
921
922 /* Run through all the patterns until one matches. Note that we don't include
923 the final newline in the subject string. */
924
925 for (i = 0; i < pattern_count; i++)
926 {
927 mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
928 offsets, 99);
929 if (mrc >= 0) { match = TRUE; break; }
930 if (mrc != PCRE_ERROR_NOMATCH)
931 {
932 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
933 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
934 fprintf(stderr, "this line:\n");
935 fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
936 fprintf(stderr, "\n");
937 if (error_count == 0 &&
938 (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
939 {
940 fprintf(stderr, "pcregrep: error %d means that a resource limit "
941 "was exceeded\n", mrc);
942 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
943 }
944 if (error_count++ > 20)
945 {
946 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
947 exit(2);
948 }
949 match = invert; /* No more matching; don't show the line again */
950 break;
951 }
952 }
953
954 /* If it's a match or a not-match (as required), do what's wanted. */
955
956 if (match != invert)
957 {
958 BOOL hyphenprinted = FALSE;
959
960 /* We've failed if we want a file that doesn't have any matches. */
961
962 if (filenames == FN_NOMATCH_ONLY) return 1;
963
964 /* Just count if just counting is wanted. */
965
966 if (count_only) count++;
967
968 /* If all we want is a file name, there is no need to scan any more lines
969 in the file. */
970
971 else if (filenames == FN_ONLY)
972 {
973 fprintf(stdout, "%s\n", printname);
974 return 0;
975 }
976
977 /* Likewise, if all we want is a yes/no answer. */
978
979 else if (quiet) return 0;
980
981 /* The --only-matching option prints just the substring that matched, and
982 the --file-offsets and --line-offsets options output offsets for the
983 matching substring (they both force --only-matching). None of these options
984 prints any context. Afterwards, adjust the start and length, and then jump
985 back to look for further matches in the same line. If we are in invert
986 mode, however, nothing is printed - this could be still useful because the
987 return code is set. */
988
989 else if (only_matching)
990 {
991 if (!invert)
992 {
993 if (printname != NULL) fprintf(stdout, "%s:", printname);
994 if (number) fprintf(stdout, "%d:", linenumber);
995 if (line_offsets)
996 fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
997 offsets[1] - offsets[0]);
998 else if (file_offsets)
999 fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1000 offsets[1] - offsets[0]);
1001 else
1002 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1003 fprintf(stdout, "\n");
1004 matchptr += offsets[1];
1005 length -= offsets[1];
1006 match = FALSE;
1007 goto ONLY_MATCHING_RESTART;
1008 }
1009 }
1010
1011 /* This is the default case when none of the above options is set. We print
1012 the matching lines(s), possibly preceded and/or followed by other lines of
1013 context. */
1014
1015 else
1016 {
1017 /* See if there is a requirement to print some "after" lines from a
1018 previous match. We never print any overlaps. */
1019
1020 if (after_context > 0 && lastmatchnumber > 0)
1021 {
1022 int ellength;
1023 int linecount = 0;
1024 char *p = lastmatchrestart;
1025
1026 while (p < ptr && linecount < after_context)
1027 {
1028 p = end_of_line(p, ptr, &ellength);
1029 linecount++;
1030 }
1031
1032 /* It is important to advance lastmatchrestart during this printing so
1033 that it interacts correctly with any "before" printing below. Print
1034 each line's data using fwrite() in case there are binary zeroes. */
1035
1036 while (lastmatchrestart < p)
1037 {
1038 char *pp = lastmatchrestart;
1039 if (printname != NULL) fprintf(stdout, "%s-", printname);
1040 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1041 pp = end_of_line(pp, endptr, &ellength);
1042 fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1043 lastmatchrestart = pp;
1044 }
1045 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1046 }
1047
1048 /* If there were non-contiguous lines printed above, insert hyphens. */
1049
1050 if (hyphenpending)
1051 {
1052 fprintf(stdout, "--\n");
1053 hyphenpending = FALSE;
1054 hyphenprinted = TRUE;
1055 }
1056
1057 /* See if there is a requirement to print some "before" lines for this
1058 match. Again, don't print overlaps. */
1059
1060 if (before_context > 0)
1061 {
1062 int linecount = 0;
1063 char *p = ptr;
1064
1065 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1066 linecount < before_context)
1067 {
1068 linecount++;
1069 p = previous_line(p, buffer);
1070 }
1071
1072 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1073 fprintf(stdout, "--\n");
1074
1075 while (p < ptr)
1076 {
1077 int ellength;
1078 char *pp = p;
1079 if (printname != NULL) fprintf(stdout, "%s-", printname);
1080 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1081 pp = end_of_line(pp, endptr, &ellength);
1082 fwrite(p, 1, pp - p, stdout);
1083 p = pp;
1084 }
1085 }
1086
1087 /* Now print the matching line(s); ensure we set hyphenpending at the end
1088 of the file if any context lines are being output. */
1089
1090 if (after_context > 0 || before_context > 0)
1091 endhyphenpending = TRUE;
1092
1093 if (printname != NULL) fprintf(stdout, "%s:", printname);
1094 if (number) fprintf(stdout, "%d:", linenumber);
1095
1096 /* In multiline mode, we want to print to the end of the line in which
1097 the end of the matched string is found, so we adjust linelength and the
1098 line number appropriately, but only when there actually was a match
1099 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1100 the match will always be before the first newline sequence. */
1101
1102 if (multiline)
1103 {
1104 int ellength;
1105 char *endmatch = ptr;
1106 if (!invert)
1107 {
1108 endmatch += offsets[1];
1109 t = ptr;
1110 while (t < endmatch)
1111 {
1112 t = end_of_line(t, endptr, &ellength);
1113 if (t <= endmatch) linenumber++; else break;
1114 }
1115 }
1116 endmatch = end_of_line(endmatch, endptr, &ellength);
1117 linelength = endmatch - ptr - ellength;
1118 }
1119
1120 /*** NOTE: Use only fwrite() to output the data line, so that binary
1121 zeroes are treated as just another data character. */
1122
1123 /* This extra option, for Jeffrey Friedl's debugging requirements,
1124 replaces the matched string, or a specific captured string if it exists,
1125 with X. When this happens, colouring is ignored. */
1126
1127 #ifdef JFRIEDL_DEBUG
1128 if (S_arg >= 0 && S_arg < mrc)
1129 {
1130 int first = S_arg * 2;
1131 int last = first + 1;
1132 fwrite(ptr, 1, offsets[first], stdout);
1133 fprintf(stdout, "X");
1134 fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1135 }
1136 else
1137 #endif
1138
1139 /* We have to split the line(s) up if colouring. */
1140
1141 if (do_colour)
1142 {
1143 fwrite(ptr, 1, offsets[0], stdout);
1144 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1145 fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1146 fprintf(stdout, "%c[00m", 0x1b);
1147 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1148 stdout);
1149 }
1150 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1151 }
1152
1153 /* End of doing what has to be done for a match */
1154
1155 rc = 0; /* Had some success */
1156
1157 /* Remember where the last match happened for after_context. We remember
1158 where we are about to restart, and that line's number. */
1159
1160 lastmatchrestart = ptr + linelength + endlinelength;
1161 lastmatchnumber = linenumber + 1;
1162 }
1163
1164 /* For a match in multiline inverted mode (which of course did not cause
1165 anything to be printed), we have to move on to the end of the match before
1166 proceeding. */
1167
1168 if (multiline && invert && match)
1169 {
1170 int ellength;
1171 char *endmatch = ptr + offsets[1];
1172 t = ptr;
1173 while (t < endmatch)
1174 {
1175 t = end_of_line(t, endptr, &ellength);
1176 if (t <= endmatch) linenumber++; else break;
1177 }
1178 endmatch = end_of_line(endmatch, endptr, &ellength);
1179 linelength = endmatch - ptr - ellength;
1180 }
1181
1182 /* Advance to after the newline and increment the line number. The file
1183 offset to the current line is maintained in filepos. */
1184
1185 ptr += linelength + endlinelength;
1186 filepos += linelength + endlinelength;
1187 linenumber++;
1188
1189 /* If we haven't yet reached the end of the file (the buffer is full), and
1190 the current point is in the top 1/3 of the buffer, slide the buffer down by
1191 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1192 about to be lost, print them. */
1193
1194 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1195 {
1196 if (after_context > 0 &&
1197 lastmatchnumber > 0 &&
1198 lastmatchrestart < buffer + MBUFTHIRD)
1199 {
1200 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1201 lastmatchnumber = 0;
1202 }
1203
1204 /* Now do the shuffle */
1205
1206 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1207 ptr -= MBUFTHIRD;
1208 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1209 endptr = buffer + bufflength;
1210
1211 /* Adjust any last match point */
1212
1213 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1214 }
1215 } /* Loop through the whole file */
1216
1217 /* End of file; print final "after" lines if wanted; do_after_lines sets
1218 hyphenpending if it prints something. */
1219
1220 if (!only_matching && !count_only)
1221 {
1222 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1223 hyphenpending |= endhyphenpending;
1224 }
1225
1226 /* Print the file name if we are looking for those without matches and there
1227 were none. If we found a match, we won't have got this far. */
1228
1229 if (filenames == FN_NOMATCH_ONLY)
1230 {
1231 fprintf(stdout, "%s\n", printname);
1232 return 0;
1233 }
1234
1235 /* Print the match count if wanted */
1236
1237 if (count_only)
1238 {
1239 if (printname != NULL) fprintf(stdout, "%s:", printname);
1240 fprintf(stdout, "%d\n", count);
1241 }
1242
1243 return rc;
1244 }
1245
1246
1247
1248 /*************************************************
1249 * Grep a file or recurse into a directory *
1250 *************************************************/
1251
1252 /* Given a path name, if it's a directory, scan all the files if we are
1253 recursing; if it's a file, grep it.
1254
1255 Arguments:
1256 pathname the path to investigate
1257 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1258 only_one_at_top TRUE if the path is the only one at toplevel
1259
1260 Returns: 0 if there was at least one match
1261 1 if there were no matches
1262 2 there was some kind of error
1263
1264 However, file opening failures are suppressed if "silent" is set.
1265 */
1266
1267 static int
1268 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1269 {
1270 int rc = 1;
1271 int sep;
1272 FILE *in;
1273
1274 /* If the file name is "-" we scan stdin */
1275
1276 if (strcmp(pathname, "-") == 0)
1277 {
1278 return pcregrep(stdin,
1279 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1280 stdin_name : NULL);
1281 }
1282
1283
1284 /* If the file is a directory, skip if skipping or if we are recursing, scan
1285 each file within it, subject to any include or exclude patterns that were set.
1286 The scanning code is localized so it can be made system-specific. */
1287
1288 if ((sep = isdirectory(pathname)) != 0)
1289 {
1290 if (dee_action == dee_SKIP) return 1;
1291 if (dee_action == dee_RECURSE)
1292 {
1293 char buffer[1024];
1294 char *nextfile;
1295 directory_type *dir = opendirectory(pathname);
1296
1297 if (dir == NULL)
1298 {
1299 if (!silent)
1300 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1301 strerror(errno));
1302 return 2;
1303 }
1304
1305 while ((nextfile = readdirectory(dir)) != NULL)
1306 {
1307 int frc, blen;
1308 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1309 blen = strlen(buffer);
1310
1311 if (exclude_compiled != NULL &&
1312 pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1313 continue;
1314
1315 if (include_compiled != NULL &&
1316 pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1317 continue;
1318
1319 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1320 if (frc > 1) rc = frc;
1321 else if (frc == 0 && rc == 1) rc = 0;
1322 }
1323
1324 closedirectory(dir);
1325 return rc;
1326 }
1327 }
1328
1329 /* If the file is not a directory and not a regular file, skip it if that's
1330 been requested. */
1331
1332 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1333
1334 /* Control reaches here if we have a regular file, or if we have a directory
1335 and recursion or skipping was not requested, or if we have anything else and
1336 skipping was not requested. The scan proceeds. If this is the first and only
1337 argument at top level, we don't show the file name, unless we are only showing
1338 the file name, or the filename was forced (-H). */
1339
1340 in = fopen(pathname, "r");
1341 if (in == NULL)
1342 {
1343 if (!silent)
1344 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1345 strerror(errno));
1346 return 2;
1347 }
1348
1349 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1350 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1351
1352 fclose(in);
1353 return rc;
1354 }
1355
1356
1357
1358
1359 /*************************************************
1360 * Usage function *
1361 *************************************************/
1362
1363 static int
1364 usage(int rc)
1365 {
1366 option_item *op;
1367 fprintf(stderr, "Usage: pcregrep [-");
1368 for (op = optionlist; op->one_char != 0; op++)
1369 {
1370 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1371 }
1372 fprintf(stderr, "] [long options] [pattern] [files]\n");
1373 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1374 "options.\n");
1375 return rc;
1376 }
1377
1378
1379
1380
1381 /*************************************************
1382 * Help function *
1383 *************************************************/
1384
1385 static void
1386 help(void)
1387 {
1388 option_item *op;
1389
1390 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1391 printf("Search for PATTERN in each FILE or standard input.\n");
1392 printf("PATTERN must be present if neither -e nor -f is used.\n");
1393 printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1394 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1395
1396 printf("Options:\n");
1397
1398 for (op = optionlist; op->one_char != 0; op++)
1399 {
1400 int n;
1401 char s[4];
1402 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1403 printf(" %s --%s%n", s, op->long_name, &n);
1404 n = 30 - n;
1405 if (n < 1) n = 1;
1406 printf("%.*s%s\n", n, " ", op->help_text);
1407 }
1408
1409 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1410 printf("trailing white space is removed and blank lines are ignored.\n");
1411 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1412
1413 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1414 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1415 }
1416
1417
1418
1419
1420 /*************************************************
1421 * Handle a single-letter, no data option *
1422 *************************************************/
1423
1424 static int
1425 handle_option(int letter, int options)
1426 {
1427 switch(letter)
1428 {
1429 case N_FOFFSETS: file_offsets = TRUE; break;
1430 case N_HELP: help(); exit(0);
1431 case N_LOFFSETS: line_offsets = number = TRUE; break;
1432 case 'c': count_only = TRUE; break;
1433 case 'F': process_options |= PO_FIXED_STRINGS; break;
1434 case 'H': filenames = FN_FORCE; break;
1435 case 'h': filenames = FN_NONE; break;
1436 case 'i': options |= PCRE_CASELESS; break;
1437 case 'l': filenames = FN_ONLY; break;
1438 case 'L': filenames = FN_NOMATCH_ONLY; break;
1439 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1440 case 'n': number = TRUE; break;
1441 case 'o': only_matching = TRUE; break;
1442 case 'q': quiet = TRUE; break;
1443 case 'r': dee_action = dee_RECURSE; break;
1444 case 's': silent = TRUE; break;
1445 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1446 case 'v': invert = TRUE; break;
1447 case 'w': process_options |= PO_WORD_MATCH; break;
1448 case 'x': process_options |= PO_LINE_MATCH; break;
1449
1450 case 'V':
1451 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1452 exit(0);
1453 break;
1454
1455 default:
1456 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1457 exit(usage(2));
1458 }
1459
1460 return options;
1461 }
1462
1463
1464
1465
1466 /*************************************************
1467 * Construct printed ordinal *
1468 *************************************************/
1469
1470 /* This turns a number into "1st", "3rd", etc. */
1471
1472 static char *
1473 ordin(int n)
1474 {
1475 static char buffer[8];
1476 char *p = buffer;
1477 sprintf(p, "%d", n);
1478 while (*p != 0) p++;
1479 switch (n%10)
1480 {
1481 case 1: strcpy(p, "st"); break;
1482 case 2: strcpy(p, "nd"); break;
1483 case 3: strcpy(p, "rd"); break;
1484 default: strcpy(p, "th"); break;
1485 }
1486 return buffer;
1487 }
1488
1489
1490
1491 /*************************************************
1492 * Compile a single pattern *
1493 *************************************************/
1494
1495 /* When the -F option has been used, this is called for each substring.
1496 Otherwise it's called for each supplied pattern.
1497
1498 Arguments:
1499 pattern the pattern string
1500 options the PCRE options
1501 filename the file name, or NULL for a command-line pattern
1502 count 0 if this is the only command line pattern, or
1503 number of the command line pattern, or
1504 linenumber for a pattern from a file
1505
1506 Returns: TRUE on success, FALSE after an error
1507 */
1508
1509 static BOOL
1510 compile_single_pattern(char *pattern, int options, char *filename, int count)
1511 {
1512 char buffer[MBUFTHIRD + 16];
1513 const char *error;
1514 int errptr;
1515
1516 if (pattern_count >= MAX_PATTERN_COUNT)
1517 {
1518 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1519 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1520 return FALSE;
1521 }
1522
1523 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1524 suffix[process_options]);
1525 pattern_list[pattern_count] =
1526 pcre_compile(buffer, options, &error, &errptr, pcretables);
1527 if (pattern_list[pattern_count] != NULL)
1528 {
1529 pattern_count++;
1530 return TRUE;
1531 }
1532
1533 /* Handle compile errors */
1534
1535 errptr -= (int)strlen(prefix[process_options]);
1536 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1537
1538 if (filename == NULL)
1539 {
1540 if (count == 0)
1541 fprintf(stderr, "pcregrep: Error in command-line regex "
1542 "at offset %d: %s\n", errptr, error);
1543 else
1544 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1545 "at offset %d: %s\n", ordin(count), errptr, error);
1546 }
1547 else
1548 {
1549 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1550 "at offset %d: %s\n", count, filename, errptr, error);
1551 }
1552
1553 return FALSE;
1554 }
1555
1556
1557
1558 /*************************************************
1559 * Compile one supplied pattern *
1560 *************************************************/
1561
1562 /* When the -F option has been used, each string may be a list of strings,
1563 separated by line breaks. They will be matched literally.
1564
1565 Arguments:
1566 pattern the pattern string
1567 options the PCRE options
1568 filename the file name, or NULL for a command-line pattern
1569 count 0 if this is the only command line pattern, or
1570 number of the command line pattern, or
1571 linenumber for a pattern from a file
1572
1573 Returns: TRUE on success, FALSE after an error
1574 */
1575
1576 static BOOL
1577 compile_pattern(char *pattern, int options, char *filename, int count)
1578 {
1579 if ((process_options & PO_FIXED_STRINGS) != 0)
1580 {
1581 char *eop = pattern + strlen(pattern);
1582 char buffer[MBUFTHIRD];
1583 for(;;)
1584 {
1585 int ellength;
1586 char *p = end_of_line(pattern, eop, &ellength);
1587 if (ellength == 0)
1588 return compile_single_pattern(pattern, options, filename, count);
1589 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1590 pattern = p;
1591 if (!compile_single_pattern(buffer, options, filename, count))
1592 return FALSE;
1593 }
1594 }
1595 else return compile_single_pattern(pattern, options, filename, count);
1596 }
1597
1598
1599
1600 /*************************************************
1601 * Main program *
1602 *************************************************/
1603
1604 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1605
1606 int
1607 main(int argc, char **argv)
1608 {
1609 int i, j;
1610 int rc = 1;
1611 int pcre_options = 0;
1612 int cmd_pattern_count = 0;
1613 int hint_count = 0;
1614 int errptr;
1615 BOOL only_one_at_top;
1616 char *patterns[MAX_PATTERN_COUNT];
1617 const char *locale_from = "--locale";
1618 const char *error;
1619
1620 /* Set the default line ending value from the default in the PCRE library;
1621 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1622 */
1623
1624 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1625 switch(i)
1626 {
1627 default: newline = (char *)"lf"; break;
1628 case '\r': newline = (char *)"cr"; break;
1629 case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1630 case -1: newline = (char *)"any"; break;
1631 case -2: newline = (char *)"anycrlf"; break;
1632 }
1633
1634 /* Process the options */
1635
1636 for (i = 1; i < argc; i++)
1637 {
1638 option_item *op = NULL;
1639 char *option_data = (char *)""; /* default to keep compiler happy */
1640 BOOL longop;
1641 BOOL longopwasequals = FALSE;
1642
1643 if (argv[i][0] != '-') break;
1644
1645 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1646 but only if we have previously had -e or -f to define the patterns. */
1647
1648 if (argv[i][1] == 0)
1649 {
1650 if (pattern_filename != NULL || pattern_count > 0) break;
1651 else exit(usage(2));
1652 }
1653
1654 /* Handle a long name option, or -- to terminate the options */
1655
1656 if (argv[i][1] == '-')
1657 {
1658 char *arg = argv[i] + 2;
1659 char *argequals = strchr(arg, '=');
1660
1661 if (*arg == 0) /* -- terminates options */
1662 {
1663 i++;
1664 break; /* out of the options-handling loop */
1665 }
1666
1667 longop = TRUE;
1668
1669 /* Some long options have data that follows after =, for example file=name.
1670 Some options have variations in the long name spelling: specifically, we
1671 allow "regexp" because GNU grep allows it, though I personally go along
1672 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1673 These options are entered in the table as "regex(p)". No option is in both
1674 these categories, fortunately. */
1675
1676 for (op = optionlist; op->one_char != 0; op++)
1677 {
1678 char *opbra = strchr(op->long_name, '(');
1679 char *equals = strchr(op->long_name, '=');
1680 if (opbra == NULL) /* Not a (p) case */
1681 {
1682 if (equals == NULL) /* Not thing=data case */
1683 {
1684 if (strcmp(arg, op->long_name) == 0) break;
1685 }
1686 else /* Special case xxx=data */
1687 {
1688 int oplen = equals - op->long_name;
1689 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1690 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1691 {
1692 option_data = arg + arglen;
1693 if (*option_data == '=')
1694 {
1695 option_data++;
1696 longopwasequals = TRUE;
1697 }
1698 break;
1699 }
1700 }
1701 }
1702 else /* Special case xxxx(p) */
1703 {
1704 char buff1[24];
1705 char buff2[24];
1706 int baselen = opbra - op->long_name;
1707 sprintf(buff1, "%.*s", baselen, op->long_name);
1708 sprintf(buff2, "%s%.*s", buff1,
1709 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1710 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1711 break;
1712 }
1713 }
1714
1715 if (op->one_char == 0)
1716 {
1717 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1718 exit(usage(2));
1719 }
1720 }
1721
1722
1723 /* Jeffrey Friedl's debugging harness uses these additional options which
1724 are not in the right form for putting in the option table because they use
1725 only one hyphen, yet are more than one character long. By putting them
1726 separately here, they will not get displayed as part of the help() output,
1727 but I don't think Jeffrey will care about that. */
1728
1729 #ifdef JFRIEDL_DEBUG
1730 else if (strcmp(argv[i], "-pre") == 0) {
1731 jfriedl_prefix = argv[++i];
1732 continue;
1733 } else if (strcmp(argv[i], "-post") == 0) {
1734 jfriedl_postfix = argv[++i];
1735 continue;
1736 } else if (strcmp(argv[i], "-XT") == 0) {
1737 sscanf(argv[++i], "%d", &jfriedl_XT);
1738 continue;
1739 } else if (strcmp(argv[i], "-XR") == 0) {
1740 sscanf(argv[++i], "%d", &jfriedl_XR);
1741 continue;
1742 }
1743 #endif
1744
1745
1746 /* One-char options; many that have no data may be in a single argument; we
1747 continue till we hit the last one or one that needs data. */
1748
1749 else
1750 {
1751 char *s = argv[i] + 1;
1752 longop = FALSE;
1753 while (*s != 0)
1754 {
1755 for (op = optionlist; op->one_char != 0; op++)
1756 { if (*s == op->one_char) break; }
1757 if (op->one_char == 0)
1758 {
1759 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1760 *s, argv[i]);
1761 exit(usage(2));
1762 }
1763 if (op->type != OP_NODATA || s[1] == 0)
1764 {
1765 option_data = s+1;
1766 break;
1767 }
1768 pcre_options = handle_option(*s++, pcre_options);
1769 }
1770 }
1771
1772 /* At this point we should have op pointing to a matched option. If the type
1773 is NO_DATA, it means that there is no data, and the option might set
1774 something in the PCRE options. */
1775
1776 if (op->type == OP_NODATA)
1777 {
1778 pcre_options = handle_option(op->one_char, pcre_options);
1779 continue;
1780 }
1781
1782 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1783 either has a value or defaults to something. It cannot have data in a
1784 separate item. At the moment, the only such options are "colo(u)r" and
1785 Jeffrey Friedl's special -S debugging option. */
1786
1787 if (*option_data == 0 &&
1788 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1789 {
1790 switch (op->one_char)
1791 {
1792 case N_COLOUR:
1793 colour_option = (char *)"auto";
1794 break;
1795 #ifdef JFRIEDL_DEBUG
1796 case 'S':
1797 S_arg = 0;
1798 break;
1799 #endif
1800 }
1801 continue;
1802 }
1803
1804 /* Otherwise, find the data string for the option. */
1805
1806 if (*option_data == 0)
1807 {
1808 if (i >= argc - 1 || longopwasequals)
1809 {
1810 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1811 exit(usage(2));
1812 }
1813 option_data = argv[++i];
1814 }
1815
1816 /* If the option type is OP_PATLIST, it's the -e option, which can be called
1817 multiple times to create a list of patterns. */
1818
1819 if (op->type == OP_PATLIST)
1820 {
1821 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1822 {
1823 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1824 MAX_PATTERN_COUNT);
1825 return 2;
1826 }
1827 patterns[cmd_pattern_count++] = option_data;
1828 }
1829
1830 /* Otherwise, deal with single string or numeric data values. */
1831
1832 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1833 {
1834 *((char **)op->dataptr) = option_data;
1835 }
1836 else
1837 {
1838 char *endptr;
1839 int n = strtoul(option_data, &endptr, 10);
1840 if (*endptr != 0)
1841 {
1842 if (longop)
1843 {
1844 char *equals = strchr(op->long_name, '=');
1845 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1846 equals - op->long_name;
1847 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1848 option_data, nlen, op->long_name);
1849 }
1850 else
1851 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1852 option_data, op->one_char);
1853 exit(usage(2));
1854 }
1855 *((int *)op->dataptr) = n;
1856 }
1857 }
1858
1859 /* Options have been decoded. If -C was used, its value is used as a default
1860 for -A and -B. */
1861
1862 if (both_context > 0)
1863 {
1864 if (after_context == 0) after_context = both_context;
1865 if (before_context == 0) before_context = both_context;
1866 }
1867
1868 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
1869 However, the latter two set the only_matching flag. */
1870
1871 if ((only_matching && (file_offsets || line_offsets)) ||
1872 (file_offsets && line_offsets))
1873 {
1874 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
1875 "and/or --line-offsets\n");
1876 exit(usage(2));
1877 }
1878
1879 if (file_offsets || line_offsets) only_matching = TRUE;
1880
1881 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1882 LC_ALL environment variable is set, and if so, use it. */
1883
1884 if (locale == NULL)
1885 {
1886 locale = getenv("LC_ALL");
1887 locale_from = "LCC_ALL";
1888 }
1889
1890 if (locale == NULL)
1891 {
1892 locale = getenv("LC_CTYPE");
1893 locale_from = "LC_CTYPE";
1894 }
1895
1896 /* If a locale has been provided, set it, and generate the tables the PCRE
1897 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1898
1899 if (locale != NULL)
1900 {
1901 if (setlocale(LC_CTYPE, locale) == NULL)
1902 {
1903 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1904 locale, locale_from);
1905 return 2;
1906 }
1907 pcretables = pcre_maketables();
1908 }
1909
1910 /* Sort out colouring */
1911
1912 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1913 {
1914 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1915 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1916 else
1917 {
1918 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1919 colour_option);
1920 return 2;
1921 }
1922 if (do_colour)
1923 {
1924 char *cs = getenv("PCREGREP_COLOUR");
1925 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1926 if (cs != NULL) colour_string = cs;
1927 }
1928 }
1929
1930 /* Interpret the newline type; the default settings are Unix-like. */
1931
1932 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1933 {
1934 pcre_options |= PCRE_NEWLINE_CR;
1935 endlinetype = EL_CR;
1936 }
1937 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1938 {
1939 pcre_options |= PCRE_NEWLINE_LF;
1940 endlinetype = EL_LF;
1941 }
1942 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1943 {
1944 pcre_options |= PCRE_NEWLINE_CRLF;
1945 endlinetype = EL_CRLF;
1946 }
1947 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1948 {
1949 pcre_options |= PCRE_NEWLINE_ANY;
1950 endlinetype = EL_ANY;
1951 }
1952 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1953 {
1954 pcre_options |= PCRE_NEWLINE_ANYCRLF;
1955 endlinetype = EL_ANYCRLF;
1956 }
1957 else
1958 {
1959 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1960 return 2;
1961 }
1962
1963 /* Interpret the text values for -d and -D */
1964
1965 if (dee_option != NULL)
1966 {
1967 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1968 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1969 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1970 else
1971 {
1972 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1973 return 2;
1974 }
1975 }
1976
1977 if (DEE_option != NULL)
1978 {
1979 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1980 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1981 else
1982 {
1983 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1984 return 2;
1985 }
1986 }
1987
1988 /* Check the values for Jeffrey Friedl's debugging options. */
1989
1990 #ifdef JFRIEDL_DEBUG
1991 if (S_arg > 9)
1992 {
1993 fprintf(stderr, "pcregrep: bad value for -S option\n");
1994 return 2;
1995 }
1996 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1997 {
1998 if (jfriedl_XT == 0) jfriedl_XT = 1;
1999 if (jfriedl_XR == 0) jfriedl_XR = 1;
2000 }
2001 #endif
2002
2003 /* Get memory to store the pattern and hints lists. */
2004
2005 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2006 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2007
2008 if (pattern_list == NULL || hints_list == NULL)
2009 {
2010 fprintf(stderr, "pcregrep: malloc failed\n");
2011 goto EXIT2;
2012 }
2013
2014 /* If no patterns were provided by -e, and there is no file provided by -f,
2015 the first argument is the one and only pattern, and it must exist. */
2016
2017 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2018 {
2019 if (i >= argc) return usage(2);
2020 patterns[cmd_pattern_count++] = argv[i++];
2021 }
2022
2023 /* Compile the patterns that were provided on the command line, either by
2024 multiple uses of -e or as a single unkeyed pattern. */
2025
2026 for (j = 0; j < cmd_pattern_count; j++)
2027 {
2028 if (!compile_pattern(patterns[j], pcre_options, NULL,
2029 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2030 goto EXIT2;
2031 }
2032
2033 /* Compile the regular expressions that are provided in a file. */
2034
2035 if (pattern_filename != NULL)
2036 {
2037 int linenumber = 0;
2038 FILE *f;
2039 char *filename;
2040 char buffer[MBUFTHIRD];
2041
2042 if (strcmp(pattern_filename, "-") == 0)
2043 {
2044 f = stdin;
2045 filename = stdin_name;
2046 }
2047 else
2048 {
2049 f = fopen(pattern_filename, "r");
2050 if (f == NULL)
2051 {
2052 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2053 strerror(errno));
2054 goto EXIT2;
2055 }
2056 filename = pattern_filename;
2057 }
2058
2059 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2060 {
2061 char *s = buffer + (int)strlen(buffer);
2062 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2063 *s = 0;
2064 linenumber++;
2065 if (buffer[0] == 0) continue; /* Skip blank lines */
2066 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2067 goto EXIT2;
2068 }
2069
2070 if (f != stdin) fclose(f);
2071 }
2072
2073 /* Study the regular expressions, as we will be running them many times */
2074
2075 for (j = 0; j < pattern_count; j++)
2076 {
2077 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2078 if (error != NULL)
2079 {
2080 char s[16];
2081 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2082 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2083 goto EXIT2;
2084 }
2085 hint_count++;
2086 }
2087
2088 /* If there are include or exclude patterns, compile them. */
2089
2090 if (exclude_pattern != NULL)
2091 {
2092 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2093 pcretables);
2094 if (exclude_compiled == NULL)
2095 {
2096 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2097 errptr, error);
2098 goto EXIT2;
2099 }
2100 }
2101
2102 if (include_pattern != NULL)
2103 {
2104 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2105 pcretables);
2106 if (include_compiled == NULL)
2107 {
2108 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2109 errptr, error);
2110 goto EXIT2;
2111 }
2112 }
2113
2114 /* If there are no further arguments, do the business on stdin and exit. */
2115
2116 if (i >= argc)
2117 {
2118 rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2119 goto EXIT;
2120 }
2121
2122 /* Otherwise, work through the remaining arguments as files or directories.
2123 Pass in the fact that there is only one argument at top level - this suppresses
2124 the file name if the argument is not a directory and filenames are not
2125 otherwise forced. */
2126
2127 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2128
2129 for (; i < argc; i++)
2130 {
2131 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2132 only_one_at_top);
2133 if (frc > 1) rc = frc;
2134 else if (frc == 0 && rc == 1) rc = 0;
2135 }
2136
2137 EXIT:
2138 if (pattern_list != NULL)
2139 {
2140 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2141 free(pattern_list);
2142 }
2143 if (hints_list != NULL)
2144 {
2145 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2146 free(hints_list);
2147 }
2148 return rc;
2149
2150 EXIT2:
2151 rc = 2;
2152 goto EXIT;
2153 }
2154
2155 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12