/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 279 - (hide annotations) (download)
Tue Dec 4 20:01:43 2007 UTC (6 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 60113 byte(s)
Fix -o bugs in pcregrep.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 117 Copyright (c) 1997-2007 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 236 #include "pcre.h"
59 nigel 49
60     #define FALSE 0
61     #define TRUE 1
62    
63     typedef int BOOL;
64    
65 nigel 53 #define MAX_PATTERN_COUNT 100
66 nigel 49
67 nigel 77 #if BUFSIZ > 8192
68     #define MBUFTHIRD BUFSIZ
69     #else
70     #define MBUFTHIRD 8192
71     #endif
72 nigel 49
73 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
74     output. The order is important; it is assumed that a file name is wanted for
75     all values greater than FN_DEFAULT. */
76 nigel 77
77 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78    
79     /* Actions for the -d and -D options */
80    
81     enum { dee_READ, dee_SKIP, dee_RECURSE };
82     enum { DEE_READ, DEE_SKIP };
83    
84     /* Actions for special processing options (flag bits) */
85    
86     #define PO_WORD_MATCH 0x0001
87     #define PO_LINE_MATCH 0x0002
88     #define PO_FIXED_STRINGS 0x0004
89    
90 nigel 93 /* Line ending types */
91 nigel 87
92 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93 nigel 87
94 nigel 93
95    
96 nigel 49 /*************************************************
97     * Global variables *
98     *************************************************/
99    
100 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
101     regular code. */
102    
103     #ifdef JFRIEDL_DEBUG
104     static int S_arg = -1;
105 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107     static const char *jfriedl_prefix = "";
108     static const char *jfriedl_postfix = "";
109 nigel 87 #endif
110    
111 nigel 93 static int endlinetype;
112 nigel 91
113 nigel 87 static char *colour_string = (char *)"1;31";
114     static char *colour_option = NULL;
115     static char *dee_option = NULL;
116     static char *DEE_option = NULL;
117 nigel 91 static char *newline = NULL;
118 nigel 53 static char *pattern_filename = NULL;
119 nigel 77 static char *stdin_name = (char *)"(standard input)";
120 nigel 87 static char *locale = NULL;
121    
122     static const unsigned char *pcretables = NULL;
123    
124 nigel 53 static int pattern_count = 0;
125 ph10 121 static pcre **pattern_list = NULL;
126     static pcre_extra **hints_list = NULL;
127 nigel 49
128 nigel 77 static char *include_pattern = NULL;
129     static char *exclude_pattern = NULL;
130    
131     static pcre *include_compiled = NULL;
132     static pcre *exclude_compiled = NULL;
133    
134     static int after_context = 0;
135     static int before_context = 0;
136     static int both_context = 0;
137 nigel 87 static int dee_action = dee_READ;
138     static int DEE_action = DEE_READ;
139     static int error_count = 0;
140     static int filenames = FN_DEFAULT;
141     static int process_options = 0;
142 nigel 77
143 nigel 49 static BOOL count_only = FALSE;
144 nigel 87 static BOOL do_colour = FALSE;
145 nigel 77 static BOOL hyphenpending = FALSE;
146 nigel 49 static BOOL invert = FALSE;
147 nigel 77 static BOOL multiline = FALSE;
148 nigel 49 static BOOL number = FALSE;
149 nigel 87 static BOOL only_matching = FALSE;
150 nigel 77 static BOOL quiet = FALSE;
151 nigel 49 static BOOL silent = FALSE;
152 nigel 93 static BOOL utf8 = FALSE;
153 nigel 49
154 nigel 53 /* Structure for options and list of them */
155 nigel 49
156 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
157     OP_PATLIST };
158 nigel 77
159 nigel 53 typedef struct option_item {
160 nigel 77 int type;
161 nigel 53 int one_char;
162 nigel 77 void *dataptr;
163 nigel 67 const char *long_name;
164     const char *help_text;
165 nigel 53 } option_item;
166 nigel 49
167 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
168     used to identify them. */
169    
170     #define N_COLOUR (-1)
171     #define N_EXCLUDE (-2)
172     #define N_HELP (-3)
173     #define N_INCLUDE (-4)
174     #define N_LABEL (-5)
175     #define N_LOCALE (-6)
176     #define N_NULL (-7)
177    
178 nigel 53 static option_item optionlist[] = {
179 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
180     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
181     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
182     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
183     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
184     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
185     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
186     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
187     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
188     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
189     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
190     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
191     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
192     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
193     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
194     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
195     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
196     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
197     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
198     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
199     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
200 ph10 149 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
202     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
203     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
204     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
205     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
206     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
207     #ifdef JFRIEDL_DEBUG
208     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
209     #endif
210     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
211     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
212     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
213     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
214     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
215     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
216     { OP_NODATA, 0, NULL, NULL, NULL }
217 nigel 53 };
218    
219 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
220     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
221     that the combination of -w and -x has the same effect as -x on its own, so we
222     can treat them as the same. */
223 nigel 53
224 nigel 87 static const char *prefix[] = {
225     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
226    
227     static const char *suffix[] = {
228     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
229    
230 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
231 nigel 87
232 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233 nigel 87
234 nigel 93 const char utf8_table4[] = {
235     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
237     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
238     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
239    
240    
241    
242 nigel 53 /*************************************************
243 nigel 87 * OS-specific functions *
244 nigel 53 *************************************************/
245    
246     /* These functions are defined so that they can be made system specific,
247 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
248 nigel 53
249    
250     /************* Directory scanning in Unix ***********/
251    
252 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253 nigel 53 #include <sys/types.h>
254     #include <sys/stat.h>
255     #include <dirent.h>
256    
257     typedef DIR directory_type;
258    
259 nigel 67 static int
260 nigel 53 isdirectory(char *filename)
261     {
262     struct stat statbuf;
263     if (stat(filename, &statbuf) < 0)
264     return 0; /* In the expectation that opening as a file will fail */
265     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
266     }
267    
268 nigel 67 static directory_type *
269 nigel 53 opendirectory(char *filename)
270     {
271     return opendir(filename);
272     }
273    
274 nigel 67 static char *
275 nigel 53 readdirectory(directory_type *dir)
276     {
277     for (;;)
278     {
279     struct dirent *dent = readdir(dir);
280     if (dent == NULL) return NULL;
281     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282     return dent->d_name;
283     }
284 ph10 151 /* Control never reaches here */
285 nigel 53 }
286    
287 nigel 67 static void
288 nigel 53 closedirectory(directory_type *dir)
289     {
290     closedir(dir);
291     }
292    
293    
294 nigel 87 /************* Test for regular file in Unix **********/
295    
296     static int
297     isregfile(char *filename)
298     {
299     struct stat statbuf;
300     if (stat(filename, &statbuf) < 0)
301     return 1; /* In the expectation that opening as a file will fail */
302     return (statbuf.st_mode & S_IFMT) == S_IFREG;
303     }
304    
305    
306     /************* Test stdout for being a terminal in Unix **********/
307    
308     static BOOL
309     is_stdout_tty(void)
310     {
311     return isatty(fileno(stdout));
312     }
313    
314    
315 nigel 63 /************* Directory scanning in Win32 ***********/
316 nigel 53
317 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
318 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
319     when it did not exist. */
320 nigel 53
321 nigel 63
322 ph10 97 #elif HAVE_WINDOWS_H
323 nigel 63
324     #ifndef STRICT
325     # define STRICT
326     #endif
327     #ifndef WIN32_LEAN_AND_MEAN
328     # define WIN32_LEAN_AND_MEAN
329     #endif
330 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
331     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
332     #endif
333    
334 nigel 63 #include <windows.h>
335    
336     typedef struct directory_type
337     {
338     HANDLE handle;
339     BOOL first;
340     WIN32_FIND_DATA data;
341     } directory_type;
342    
343     int
344     isdirectory(char *filename)
345     {
346     DWORD attr = GetFileAttributes(filename);
347     if (attr == INVALID_FILE_ATTRIBUTES)
348     return 0;
349     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
350     }
351    
352     directory_type *
353     opendirectory(char *filename)
354     {
355     size_t len;
356     char *pattern;
357     directory_type *dir;
358     DWORD err;
359     len = strlen(filename);
360     pattern = (char *) malloc(len + 3);
361     dir = (directory_type *) malloc(sizeof(*dir));
362     if ((pattern == NULL) || (dir == NULL))
363     {
364     fprintf(stderr, "pcregrep: malloc failed\n");
365     exit(2);
366     }
367     memcpy(pattern, filename, len);
368     memcpy(&(pattern[len]), "\\*", 3);
369     dir->handle = FindFirstFile(pattern, &(dir->data));
370     if (dir->handle != INVALID_HANDLE_VALUE)
371     {
372     free(pattern);
373     dir->first = TRUE;
374     return dir;
375     }
376     err = GetLastError();
377     free(pattern);
378     free(dir);
379     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
380     return NULL;
381     }
382    
383     char *
384     readdirectory(directory_type *dir)
385     {
386     for (;;)
387     {
388     if (!dir->first)
389     {
390     if (!FindNextFile(dir->handle, &(dir->data)))
391     return NULL;
392     }
393     else
394     {
395     dir->first = FALSE;
396     }
397     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
398     return dir->data.cFileName;
399     }
400     #ifndef _MSC_VER
401     return NULL; /* Keep compiler happy; never executed */
402     #endif
403     }
404    
405     void
406     closedirectory(directory_type *dir)
407     {
408     FindClose(dir->handle);
409     free(dir);
410     }
411    
412    
413 nigel 87 /************* Test for regular file in Win32 **********/
414    
415     /* I don't know how to do this, or if it can be done; assume all paths are
416     regular if they are not directories. */
417    
418     int isregfile(char *filename)
419     {
420     return !isdirectory(filename)
421     }
422    
423    
424     /************* Test stdout for being a terminal in Win32 **********/
425    
426     /* I don't know how to do this; assume never */
427    
428     static BOOL
429     is_stdout_tty(void)
430     {
431     FALSE;
432     }
433    
434    
435 nigel 53 /************* Directory scanning when we can't do it ***********/
436    
437     /* The type is void, and apart from isdirectory(), the functions do nothing. */
438    
439 nigel 63 #else
440    
441 nigel 53 typedef void directory_type;
442    
443 nigel 87 int isdirectory(char *filename) { return 0; }
444 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445     char *readdirectory(directory_type *dir) { return (char*)0;}
446 nigel 53 void closedirectory(directory_type *dir) {}
447    
448 nigel 87
449     /************* Test for regular when we can't do it **********/
450    
451     /* Assume all files are regular. */
452    
453     int isregfile(char *filename) { return 1; }
454    
455    
456     /************* Test stdout for being a terminal when we can't do it **********/
457    
458     static BOOL
459     is_stdout_tty(void)
460     {
461     return FALSE;
462     }
463    
464    
465 nigel 53 #endif
466    
467    
468    
469 ph10 137 #ifndef HAVE_STRERROR
470 nigel 49 /*************************************************
471     * Provide strerror() for non-ANSI libraries *
472     *************************************************/
473    
474     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
475     in their libraries, but can provide the same facility by this simple
476     alternative function. */
477    
478     extern int sys_nerr;
479     extern char *sys_errlist[];
480    
481     char *
482     strerror(int n)
483     {
484     if (n < 0 || n >= sys_nerr) return "unknown error number";
485     return sys_errlist[n];
486     }
487     #endif /* HAVE_STRERROR */
488    
489    
490    
491     /*************************************************
492 nigel 93 * Find end of line *
493     *************************************************/
494    
495     /* The length of the endline sequence that is found is set via lenptr. This may
496     be zero at the very end of the file if there is no line-ending sequence there.
497    
498     Arguments:
499     p current position in line
500     endptr end of available data
501     lenptr where to put the length of the eol sequence
502    
503     Returns: pointer to the last byte of the line
504     */
505    
506     static char *
507     end_of_line(char *p, char *endptr, int *lenptr)
508     {
509     switch(endlinetype)
510     {
511     default: /* Just in case */
512     case EL_LF:
513     while (p < endptr && *p != '\n') p++;
514     if (p < endptr)
515     {
516     *lenptr = 1;
517     return p + 1;
518     }
519     *lenptr = 0;
520     return endptr;
521    
522     case EL_CR:
523     while (p < endptr && *p != '\r') p++;
524     if (p < endptr)
525     {
526     *lenptr = 1;
527     return p + 1;
528     }
529     *lenptr = 0;
530     return endptr;
531    
532     case EL_CRLF:
533     for (;;)
534     {
535     while (p < endptr && *p != '\r') p++;
536     if (++p >= endptr)
537     {
538     *lenptr = 0;
539     return endptr;
540     }
541     if (*p == '\n')
542     {
543     *lenptr = 2;
544     return p + 1;
545     }
546     }
547     break;
548    
549 ph10 149 case EL_ANYCRLF:
550     while (p < endptr)
551     {
552     int extra = 0;
553     register int c = *((unsigned char *)p);
554    
555     if (utf8 && c >= 0xc0)
556     {
557     int gcii, gcss;
558     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
559     gcss = 6*extra;
560     c = (c & utf8_table3[extra]) << gcss;
561     for (gcii = 1; gcii <= extra; gcii++)
562     {
563     gcss -= 6;
564     c |= (p[gcii] & 0x3f) << gcss;
565     }
566     }
567    
568     p += 1 + extra;
569    
570     switch (c)
571     {
572     case 0x0a: /* LF */
573     *lenptr = 1;
574     return p;
575    
576     case 0x0d: /* CR */
577     if (p < endptr && *p == 0x0a)
578     {
579     *lenptr = 2;
580     p++;
581     }
582     else *lenptr = 1;
583     return p;
584 ph10 150
585 ph10 149 default:
586     break;
587     }
588     } /* End of loop for ANYCRLF case */
589 ph10 150
590 ph10 149 *lenptr = 0; /* Must have hit the end */
591     return endptr;
592    
593 nigel 93 case EL_ANY:
594     while (p < endptr)
595     {
596     int extra = 0;
597     register int c = *((unsigned char *)p);
598    
599     if (utf8 && c >= 0xc0)
600     {
601     int gcii, gcss;
602     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
603     gcss = 6*extra;
604     c = (c & utf8_table3[extra]) << gcss;
605     for (gcii = 1; gcii <= extra; gcii++)
606     {
607     gcss -= 6;
608     c |= (p[gcii] & 0x3f) << gcss;
609     }
610     }
611    
612     p += 1 + extra;
613    
614     switch (c)
615     {
616     case 0x0a: /* LF */
617     case 0x0b: /* VT */
618     case 0x0c: /* FF */
619     *lenptr = 1;
620     return p;
621    
622     case 0x0d: /* CR */
623     if (p < endptr && *p == 0x0a)
624     {
625     *lenptr = 2;
626     p++;
627     }
628     else *lenptr = 1;
629     return p;
630    
631     case 0x85: /* NEL */
632     *lenptr = utf8? 2 : 1;
633     return p;
634    
635     case 0x2028: /* LS */
636     case 0x2029: /* PS */
637     *lenptr = 3;
638     return p;
639    
640     default:
641     break;
642     }
643     } /* End of loop for ANY case */
644    
645     *lenptr = 0; /* Must have hit the end */
646     return endptr;
647     } /* End of overall switch */
648     }
649    
650    
651    
652     /*************************************************
653     * Find start of previous line *
654     *************************************************/
655    
656     /* This is called when looking back for before lines to print.
657    
658     Arguments:
659     p start of the subsequent line
660     startptr start of available data
661    
662     Returns: pointer to the start of the previous line
663     */
664    
665     static char *
666     previous_line(char *p, char *startptr)
667     {
668     switch(endlinetype)
669     {
670     default: /* Just in case */
671     case EL_LF:
672     p--;
673     while (p > startptr && p[-1] != '\n') p--;
674     return p;
675    
676     case EL_CR:
677     p--;
678     while (p > startptr && p[-1] != '\n') p--;
679     return p;
680    
681     case EL_CRLF:
682     for (;;)
683     {
684     p -= 2;
685     while (p > startptr && p[-1] != '\n') p--;
686     if (p <= startptr + 1 || p[-2] == '\r') return p;
687     }
688     return p; /* But control should never get here */
689    
690     case EL_ANY:
691 ph10 150 case EL_ANYCRLF:
692 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693     if (utf8) while ((*p & 0xc0) == 0x80) p--;
694    
695     while (p > startptr)
696     {
697     register int c;
698     char *pp = p - 1;
699    
700     if (utf8)
701     {
702     int extra = 0;
703     while ((*pp & 0xc0) == 0x80) pp--;
704     c = *((unsigned char *)pp);
705     if (c >= 0xc0)
706     {
707     int gcii, gcss;
708     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
709     gcss = 6*extra;
710     c = (c & utf8_table3[extra]) << gcss;
711     for (gcii = 1; gcii <= extra; gcii++)
712     {
713     gcss -= 6;
714     c |= (pp[gcii] & 0x3f) << gcss;
715     }
716     }
717     }
718     else c = *((unsigned char *)pp);
719    
720 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
721 nigel 93 {
722     case 0x0a: /* LF */
723 ph10 149 case 0x0d: /* CR */
724     return p;
725 ph10 150
726 ph10 149 default:
727     break;
728 ph10 150 }
729 ph10 149
730     else switch (c)
731     {
732     case 0x0a: /* LF */
733 nigel 93 case 0x0b: /* VT */
734     case 0x0c: /* FF */
735     case 0x0d: /* CR */
736     case 0x85: /* NEL */
737     case 0x2028: /* LS */
738     case 0x2029: /* PS */
739     return p;
740    
741     default:
742     break;
743     }
744    
745     p = pp; /* Back one character */
746     } /* End of loop for ANY case */
747    
748     return startptr; /* Hit start of data */
749     } /* End of overall switch */
750     }
751    
752    
753    
754    
755    
756     /*************************************************
757 nigel 77 * Print the previous "after" lines *
758 nigel 49 *************************************************/
759    
760 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
761 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
762     that a binary zero does not terminate it.
763 nigel 77
764     Arguments:
765     lastmatchnumber the number of the last matching line, plus one
766     lastmatchrestart where we restarted after the last match
767     endptr end of available data
768     printname filename for printing
769    
770     Returns: nothing
771     */
772    
773     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
774     char *endptr, char *printname)
775     {
776     if (after_context > 0 && lastmatchnumber > 0)
777     {
778     int count = 0;
779     while (lastmatchrestart < endptr && count++ < after_context)
780     {
781 nigel 93 int ellength;
782 nigel 77 char *pp = lastmatchrestart;
783     if (printname != NULL) fprintf(stdout, "%s-", printname);
784     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
785 nigel 93 pp = end_of_line(pp, endptr, &ellength);
786     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
787     lastmatchrestart = pp;
788 nigel 77 }
789     hyphenpending = TRUE;
790     }
791     }
792    
793    
794    
795     /*************************************************
796     * Grep an individual file *
797     *************************************************/
798    
799     /* This is called from grep_or_recurse() below. It uses a buffer that is three
800     times the value of MBUFTHIRD. The matching point is never allowed to stray into
801     the top third of the buffer, thus keeping more of the file available for
802     context printing or for multiline scanning. For large files, the pointer will
803     be in the middle third most of the time, so the bottom third is available for
804     "before" context printing.
805    
806     Arguments:
807     in the fopened FILE stream
808     printname the file name if it is to be printed for each match
809     or NULL if the file name is not to be printed
810     it cannot be NULL if filenames[_nomatch]_only is set
811    
812     Returns: 0 if there was at least one match
813     1 otherwise (no matches)
814     */
815    
816 nigel 49 static int
817 nigel 77 pcregrep(FILE *in, char *printname)
818 nigel 49 {
819     int rc = 1;
820 nigel 77 int linenumber = 1;
821     int lastmatchnumber = 0;
822 nigel 49 int count = 0;
823     int offsets[99];
824 nigel 77 char *lastmatchrestart = NULL;
825     char buffer[3*MBUFTHIRD];
826     char *ptr = buffer;
827     char *endptr;
828     size_t bufflength;
829     BOOL endhyphenpending = FALSE;
830 nigel 49
831 nigel 77 /* Do the first read into the start of the buffer and set up the pointer to
832     end of what we have. */
833    
834     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
835     endptr = buffer + bufflength;
836    
837     /* Loop while the current pointer is not at the end of the file. For large
838     files, endptr will be at the end of the buffer when we are in the middle of the
839     file, but ptr will never get there, because as soon as it gets over 2/3 of the
840     way, the buffer is shifted left and re-filled. */
841    
842     while (ptr < endptr)
843 nigel 49 {
844 nigel 93 int i, endlinelength;
845 nigel 87 int mrc = 0;
846 nigel 53 BOOL match = FALSE;
847 ph10 279 char *matchptr = ptr;
848 nigel 77 char *t = ptr;
849     size_t length, linelength;
850 nigel 49
851 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
852     of the subject string to pass to pcre_exec(). In multiline mode, it is the
853     length remainder of the data in the buffer. Otherwise, it is the length of
854     the next line. After matching, we always advance by the length of the next
855     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
856     that any match is constrained to be in the first line. */
857    
858 nigel 93 t = end_of_line(t, endptr, &endlinelength);
859     linelength = t - ptr - endlinelength;
860 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
861 nigel 77
862 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
863    
864     #ifdef JFRIEDL_DEBUG
865     if (jfriedl_XT || jfriedl_XR)
866     {
867     #include <sys/time.h>
868     #include <time.h>
869     struct timeval start_time, end_time;
870     struct timezone dummy;
871    
872     if (jfriedl_XT)
873     {
874     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
875     const char *orig = ptr;
876     ptr = malloc(newlen + 1);
877     if (!ptr) {
878     printf("out of memory");
879     exit(2);
880     }
881     endptr = ptr;
882     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
883     for (i = 0; i < jfriedl_XT; i++) {
884     strncpy(endptr, orig, length);
885     endptr += length;
886     }
887     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
888     length = newlen;
889     }
890    
891     if (gettimeofday(&start_time, &dummy) != 0)
892     perror("bad gettimeofday");
893    
894    
895     for (i = 0; i < jfriedl_XR; i++)
896     match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
897    
898     if (gettimeofday(&end_time, &dummy) != 0)
899     perror("bad gettimeofday");
900    
901     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
902     -
903     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
904    
905     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
906     return 0;
907     }
908     #endif
909    
910 ph10 279 /* We come back here after a match when the -o option (only_matching) is set,
911     in order to find any further matches in the same line. */
912    
913     ONLY_MATCHING_RESTART:
914 nigel 89
915 nigel 77 /* Run through all the patterns until one matches. Note that we don't include
916     the final newline in the subject string. */
917    
918 nigel 87 for (i = 0; i < pattern_count; i++)
919 nigel 53 {
920 ph10 279 mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
921 nigel 87 offsets, 99);
922     if (mrc >= 0) { match = TRUE; break; }
923     if (mrc != PCRE_ERROR_NOMATCH)
924     {
925     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
926     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
927     fprintf(stderr, "this line:\n");
928 ph10 279 fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
929 nigel 87 fprintf(stderr, "\n");
930     if (error_count == 0 &&
931     (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
932     {
933     fprintf(stderr, "pcregrep: error %d means that a resource limit "
934     "was exceeded\n", mrc);
935     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
936     }
937     if (error_count++ > 20)
938     {
939     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
940     exit(2);
941     }
942     match = invert; /* No more matching; don't show the line again */
943     break;
944     }
945 nigel 53 }
946 nigel 49
947 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
948 nigel 77
949 nigel 49 if (match != invert)
950     {
951 nigel 77 BOOL hyphenprinted = FALSE;
952    
953 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
954 nigel 77
955 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
956    
957     /* Just count if just counting is wanted. */
958    
959 nigel 49 if (count_only) count++;
960    
961 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
962     in the file. */
963    
964     else if (filenames == FN_ONLY)
965 nigel 49 {
966 nigel 77 fprintf(stdout, "%s\n", printname);
967 nigel 49 return 0;
968     }
969    
970 nigel 87 /* Likewise, if all we want is a yes/no answer. */
971    
972 nigel 77 else if (quiet) return 0;
973 nigel 49
974 nigel 87 /* The --only-matching option prints just the substring that matched, and
975 ph10 279 does not print any context. Afterwards, adjust the start and length, and
976     then jump back to look for further matches in the same line. If we are in
977     invert mode, however, nothing is printed - this could be useful still
978     because the return code is set. */
979 nigel 87
980     else if (only_matching)
981     {
982 ph10 279 if (!invert)
983     {
984     if (printname != NULL) fprintf(stdout, "%s:", printname);
985     if (number) fprintf(stdout, "%d:", linenumber);
986     fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
987     fprintf(stdout, "\n");
988     matchptr += offsets[1];
989     length -= offsets[1];
990     match = FALSE;
991     goto ONLY_MATCHING_RESTART;
992     }
993 nigel 87 }
994    
995     /* This is the default case when none of the above options is set. We print
996     the matching lines(s), possibly preceded and/or followed by other lines of
997     context. */
998    
999 nigel 49 else
1000     {
1001 nigel 77 /* See if there is a requirement to print some "after" lines from a
1002     previous match. We never print any overlaps. */
1003    
1004     if (after_context > 0 && lastmatchnumber > 0)
1005     {
1006 nigel 93 int ellength;
1007 nigel 77 int linecount = 0;
1008     char *p = lastmatchrestart;
1009    
1010     while (p < ptr && linecount < after_context)
1011     {
1012 nigel 93 p = end_of_line(p, ptr, &ellength);
1013 nigel 77 linecount++;
1014     }
1015    
1016     /* It is important to advance lastmatchrestart during this printing so
1017 nigel 87 that it interacts correctly with any "before" printing below. Print
1018     each line's data using fwrite() in case there are binary zeroes. */
1019 nigel 77
1020     while (lastmatchrestart < p)
1021     {
1022     char *pp = lastmatchrestart;
1023     if (printname != NULL) fprintf(stdout, "%s-", printname);
1024     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1025 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1026     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1027     lastmatchrestart = pp;
1028 nigel 77 }
1029     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1030     }
1031    
1032     /* If there were non-contiguous lines printed above, insert hyphens. */
1033    
1034     if (hyphenpending)
1035     {
1036     fprintf(stdout, "--\n");
1037     hyphenpending = FALSE;
1038     hyphenprinted = TRUE;
1039     }
1040    
1041     /* See if there is a requirement to print some "before" lines for this
1042     match. Again, don't print overlaps. */
1043    
1044     if (before_context > 0)
1045     {
1046     int linecount = 0;
1047     char *p = ptr;
1048    
1049     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1050 nigel 87 linecount < before_context)
1051 nigel 77 {
1052 nigel 87 linecount++;
1053 nigel 93 p = previous_line(p, buffer);
1054 nigel 77 }
1055    
1056     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1057     fprintf(stdout, "--\n");
1058    
1059     while (p < ptr)
1060     {
1061 nigel 93 int ellength;
1062 nigel 77 char *pp = p;
1063     if (printname != NULL) fprintf(stdout, "%s-", printname);
1064     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1065 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1066     fwrite(p, 1, pp - p, stdout);
1067     p = pp;
1068 nigel 77 }
1069     }
1070    
1071     /* Now print the matching line(s); ensure we set hyphenpending at the end
1072 nigel 85 of the file if any context lines are being output. */
1073 nigel 77
1074 nigel 85 if (after_context > 0 || before_context > 0)
1075     endhyphenpending = TRUE;
1076    
1077 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1078 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1079 nigel 77
1080     /* In multiline mode, we want to print to the end of the line in which
1081     the end of the matched string is found, so we adjust linelength and the
1082 ph10 222 line number appropriately, but only when there actually was a match
1083     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1084     the match will always be before the first newline sequence. */
1085 nigel 77
1086     if (multiline)
1087     {
1088 nigel 93 int ellength;
1089 ph10 222 char *endmatch = ptr;
1090     if (!invert)
1091 nigel 93 {
1092 ph10 222 endmatch += offsets[1];
1093     t = ptr;
1094     while (t < endmatch)
1095     {
1096     t = end_of_line(t, endptr, &ellength);
1097     if (t <= endmatch) linenumber++; else break;
1098     }
1099 nigel 93 }
1100     endmatch = end_of_line(endmatch, endptr, &ellength);
1101     linelength = endmatch - ptr - ellength;
1102 nigel 77 }
1103    
1104 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1105     zeroes are treated as just another data character. */
1106    
1107     /* This extra option, for Jeffrey Friedl's debugging requirements,
1108     replaces the matched string, or a specific captured string if it exists,
1109     with X. When this happens, colouring is ignored. */
1110    
1111     #ifdef JFRIEDL_DEBUG
1112     if (S_arg >= 0 && S_arg < mrc)
1113     {
1114     int first = S_arg * 2;
1115     int last = first + 1;
1116     fwrite(ptr, 1, offsets[first], stdout);
1117     fprintf(stdout, "X");
1118     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1119     }
1120     else
1121     #endif
1122    
1123     /* We have to split the line(s) up if colouring. */
1124    
1125     if (do_colour)
1126     {
1127     fwrite(ptr, 1, offsets[0], stdout);
1128     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1129     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1130     fprintf(stdout, "%c[00m", 0x1b);
1131 ph10 243 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1132 ph10 239 stdout);
1133 nigel 87 }
1134 nigel 93 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1135 nigel 49 }
1136    
1137 nigel 87 /* End of doing what has to be done for a match */
1138    
1139 nigel 77 rc = 0; /* Had some success */
1140    
1141     /* Remember where the last match happened for after_context. We remember
1142     where we are about to restart, and that line's number. */
1143    
1144 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1145 nigel 77 lastmatchnumber = linenumber + 1;
1146 nigel 49 }
1147 nigel 77
1148 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1149     anything to be printed), we have to move on to the end of the match before
1150     proceeding. */
1151    
1152     if (multiline && invert && match)
1153     {
1154     int ellength;
1155     char *endmatch = ptr + offsets[1];
1156     t = ptr;
1157     while (t < endmatch)
1158     {
1159     t = end_of_line(t, endptr, &ellength);
1160     if (t <= endmatch) linenumber++; else break;
1161     }
1162     endmatch = end_of_line(endmatch, endptr, &ellength);
1163     linelength = endmatch - ptr - ellength;
1164     }
1165    
1166 nigel 77 /* Advance to after the newline and increment the line number. */
1167    
1168 nigel 93 ptr += linelength + endlinelength;
1169 nigel 77 linenumber++;
1170    
1171     /* If we haven't yet reached the end of the file (the buffer is full), and
1172     the current point is in the top 1/3 of the buffer, slide the buffer down by
1173     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1174     about to be lost, print them. */
1175    
1176     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1177     {
1178     if (after_context > 0 &&
1179     lastmatchnumber > 0 &&
1180     lastmatchrestart < buffer + MBUFTHIRD)
1181     {
1182     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1183     lastmatchnumber = 0;
1184     }
1185    
1186     /* Now do the shuffle */
1187    
1188     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1189     ptr -= MBUFTHIRD;
1190     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1191     endptr = buffer + bufflength;
1192    
1193     /* Adjust any last match point */
1194    
1195     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1196     }
1197     } /* Loop through the whole file */
1198    
1199     /* End of file; print final "after" lines if wanted; do_after_lines sets
1200     hyphenpending if it prints something. */
1201    
1202 nigel 87 if (!only_matching && !count_only)
1203     {
1204     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1205     hyphenpending |= endhyphenpending;
1206     }
1207 nigel 77
1208     /* Print the file name if we are looking for those without matches and there
1209     were none. If we found a match, we won't have got this far. */
1210    
1211 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1212 nigel 77 {
1213     fprintf(stdout, "%s\n", printname);
1214     return 0;
1215 nigel 49 }
1216    
1217 nigel 77 /* Print the match count if wanted */
1218    
1219 nigel 49 if (count_only)
1220     {
1221 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1222 nigel 49 fprintf(stdout, "%d\n", count);
1223     }
1224    
1225     return rc;
1226     }
1227    
1228    
1229    
1230     /*************************************************
1231 nigel 53 * Grep a file or recurse into a directory *
1232     *************************************************/
1233    
1234 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1235     recursing; if it's a file, grep it.
1236    
1237     Arguments:
1238     pathname the path to investigate
1239 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1240 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1241    
1242     Returns: 0 if there was at least one match
1243     1 if there were no matches
1244     2 there was some kind of error
1245    
1246     However, file opening failures are suppressed if "silent" is set.
1247     */
1248    
1249 nigel 53 static int
1250 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1251 nigel 53 {
1252     int rc = 1;
1253     int sep;
1254     FILE *in;
1255    
1256 nigel 77 /* If the file name is "-" we scan stdin */
1257 nigel 53
1258 nigel 77 if (strcmp(pathname, "-") == 0)
1259 nigel 53 {
1260 nigel 77 return pcregrep(stdin,
1261 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1262 nigel 77 stdin_name : NULL);
1263     }
1264    
1265    
1266 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1267     each file within it, subject to any include or exclude patterns that were set.
1268     The scanning code is localized so it can be made system-specific. */
1269    
1270     if ((sep = isdirectory(pathname)) != 0)
1271 nigel 77 {
1272 nigel 87 if (dee_action == dee_SKIP) return 1;
1273     if (dee_action == dee_RECURSE)
1274 nigel 53 {
1275 nigel 87 char buffer[1024];
1276     char *nextfile;
1277     directory_type *dir = opendirectory(pathname);
1278 nigel 53
1279 nigel 87 if (dir == NULL)
1280     {
1281     if (!silent)
1282     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1283     strerror(errno));
1284     return 2;
1285     }
1286 nigel 77
1287 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1288     {
1289     int frc, blen;
1290     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1291     blen = strlen(buffer);
1292 nigel 77
1293 nigel 87 if (exclude_compiled != NULL &&
1294     pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1295     continue;
1296 nigel 77
1297 nigel 87 if (include_compiled != NULL &&
1298     pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1299     continue;
1300    
1301     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1302     if (frc > 1) rc = frc;
1303     else if (frc == 0 && rc == 1) rc = 0;
1304     }
1305    
1306     closedirectory(dir);
1307     return rc;
1308 nigel 53 }
1309     }
1310    
1311 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1312     been requested. */
1313 nigel 53
1314 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1315    
1316     /* Control reaches here if we have a regular file, or if we have a directory
1317     and recursion or skipping was not requested, or if we have anything else and
1318     skipping was not requested. The scan proceeds. If this is the first and only
1319     argument at top level, we don't show the file name, unless we are only showing
1320     the file name, or the filename was forced (-H). */
1321    
1322 nigel 77 in = fopen(pathname, "r");
1323 nigel 53 if (in == NULL)
1324     {
1325 nigel 77 if (!silent)
1326     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1327     strerror(errno));
1328 nigel 53 return 2;
1329     }
1330    
1331 nigel 87 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1332     (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1333 nigel 77
1334 nigel 53 fclose(in);
1335     return rc;
1336     }
1337    
1338    
1339    
1340    
1341     /*************************************************
1342 nigel 49 * Usage function *
1343     *************************************************/
1344    
1345     static int
1346     usage(int rc)
1347     {
1348 nigel 87 option_item *op;
1349     fprintf(stderr, "Usage: pcregrep [-");
1350     for (op = optionlist; op->one_char != 0; op++)
1351     {
1352     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1353     }
1354     fprintf(stderr, "] [long options] [pattern] [files]\n");
1355 nigel 53 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1356 nigel 49 return rc;
1357     }
1358    
1359    
1360    
1361    
1362     /*************************************************
1363 nigel 53 * Help function *
1364     *************************************************/
1365    
1366     static void
1367     help(void)
1368     {
1369     option_item *op;
1370    
1371 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1372 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1373 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1374     printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1375 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1376    
1377     printf("Options:\n");
1378    
1379     for (op = optionlist; op->one_char != 0; op++)
1380     {
1381     int n;
1382     char s[4];
1383     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1384     printf(" %s --%s%n", s, op->long_name, &n);
1385     n = 30 - n;
1386     if (n < 1) n = 1;
1387     printf("%.*s%s\n", n, " ", op->help_text);
1388     }
1389    
1390 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1391     printf("trailing white space is removed and blank lines are ignored.\n");
1392     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1393 nigel 53
1394 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1395 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1396     }
1397    
1398    
1399    
1400    
1401     /*************************************************
1402 nigel 77 * Handle a single-letter, no data option *
1403 nigel 53 *************************************************/
1404    
1405     static int
1406     handle_option(int letter, int options)
1407     {
1408     switch(letter)
1409     {
1410 nigel 87 case N_HELP: help(); exit(0);
1411 nigel 53 case 'c': count_only = TRUE; break;
1412 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1413     case 'H': filenames = FN_FORCE; break;
1414     case 'h': filenames = FN_NONE; break;
1415 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1416 nigel 87 case 'l': filenames = FN_ONLY; break;
1417     case 'L': filenames = FN_NOMATCH_ONLY; break;
1418 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1419 nigel 53 case 'n': number = TRUE; break;
1420 nigel 87 case 'o': only_matching = TRUE; break;
1421 nigel 77 case 'q': quiet = TRUE; break;
1422 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1423 nigel 53 case 's': silent = TRUE; break;
1424 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1425 nigel 53 case 'v': invert = TRUE; break;
1426 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1427     case 'x': process_options |= PO_LINE_MATCH; break;
1428 nigel 53
1429     case 'V':
1430 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1431 nigel 53 exit(0);
1432     break;
1433    
1434     default:
1435     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1436     exit(usage(2));
1437     }
1438    
1439     return options;
1440     }
1441    
1442    
1443    
1444    
1445     /*************************************************
1446 nigel 87 * Construct printed ordinal *
1447     *************************************************/
1448    
1449     /* This turns a number into "1st", "3rd", etc. */
1450    
1451     static char *
1452     ordin(int n)
1453     {
1454     static char buffer[8];
1455     char *p = buffer;
1456     sprintf(p, "%d", n);
1457     while (*p != 0) p++;
1458     switch (n%10)
1459     {
1460     case 1: strcpy(p, "st"); break;
1461     case 2: strcpy(p, "nd"); break;
1462     case 3: strcpy(p, "rd"); break;
1463     default: strcpy(p, "th"); break;
1464     }
1465     return buffer;
1466     }
1467    
1468    
1469    
1470     /*************************************************
1471     * Compile a single pattern *
1472     *************************************************/
1473    
1474     /* When the -F option has been used, this is called for each substring.
1475     Otherwise it's called for each supplied pattern.
1476    
1477     Arguments:
1478     pattern the pattern string
1479     options the PCRE options
1480     filename the file name, or NULL for a command-line pattern
1481     count 0 if this is the only command line pattern, or
1482     number of the command line pattern, or
1483     linenumber for a pattern from a file
1484    
1485     Returns: TRUE on success, FALSE after an error
1486     */
1487    
1488     static BOOL
1489     compile_single_pattern(char *pattern, int options, char *filename, int count)
1490     {
1491     char buffer[MBUFTHIRD + 16];
1492     const char *error;
1493     int errptr;
1494    
1495     if (pattern_count >= MAX_PATTERN_COUNT)
1496     {
1497     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1498     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1499     return FALSE;
1500     }
1501    
1502     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1503     suffix[process_options]);
1504     pattern_list[pattern_count] =
1505     pcre_compile(buffer, options, &error, &errptr, pcretables);
1506 ph10 142 if (pattern_list[pattern_count] != NULL)
1507 ph10 141 {
1508 ph10 142 pattern_count++;
1509 ph10 141 return TRUE;
1510 ph10 142 }
1511 nigel 87
1512     /* Handle compile errors */
1513    
1514     errptr -= (int)strlen(prefix[process_options]);
1515     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1516    
1517     if (filename == NULL)
1518     {
1519     if (count == 0)
1520     fprintf(stderr, "pcregrep: Error in command-line regex "
1521     "at offset %d: %s\n", errptr, error);
1522     else
1523     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1524     "at offset %d: %s\n", ordin(count), errptr, error);
1525     }
1526     else
1527     {
1528     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1529     "at offset %d: %s\n", count, filename, errptr, error);
1530     }
1531    
1532     return FALSE;
1533     }
1534    
1535    
1536    
1537     /*************************************************
1538     * Compile one supplied pattern *
1539     *************************************************/
1540    
1541     /* When the -F option has been used, each string may be a list of strings,
1542 nigel 91 separated by line breaks. They will be matched literally.
1543 nigel 87
1544     Arguments:
1545     pattern the pattern string
1546     options the PCRE options
1547     filename the file name, or NULL for a command-line pattern
1548     count 0 if this is the only command line pattern, or
1549     number of the command line pattern, or
1550     linenumber for a pattern from a file
1551    
1552     Returns: TRUE on success, FALSE after an error
1553     */
1554    
1555     static BOOL
1556     compile_pattern(char *pattern, int options, char *filename, int count)
1557     {
1558     if ((process_options & PO_FIXED_STRINGS) != 0)
1559     {
1560 nigel 93 char *eop = pattern + strlen(pattern);
1561 nigel 87 char buffer[MBUFTHIRD];
1562     for(;;)
1563     {
1564 nigel 93 int ellength;
1565     char *p = end_of_line(pattern, eop, &ellength);
1566     if (ellength == 0)
1567 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1568 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1569 nigel 93 pattern = p;
1570 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1571     return FALSE;
1572     }
1573     }
1574     else return compile_single_pattern(pattern, options, filename, count);
1575     }
1576    
1577    
1578    
1579     /*************************************************
1580 nigel 49 * Main program *
1581     *************************************************/
1582    
1583 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1584    
1585 nigel 49 int
1586     main(int argc, char **argv)
1587     {
1588 nigel 53 int i, j;
1589 nigel 49 int rc = 1;
1590 nigel 87 int pcre_options = 0;
1591     int cmd_pattern_count = 0;
1592 ph10 141 int hint_count = 0;
1593 nigel 49 int errptr;
1594 nigel 87 BOOL only_one_at_top;
1595     char *patterns[MAX_PATTERN_COUNT];
1596     const char *locale_from = "--locale";
1597 nigel 49 const char *error;
1598    
1599 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1600     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1601     */
1602 nigel 91
1603     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1604     switch(i)
1605     {
1606     default: newline = (char *)"lf"; break;
1607     case '\r': newline = (char *)"cr"; break;
1608     case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1609 nigel 93 case -1: newline = (char *)"any"; break;
1610 ph10 150 case -2: newline = (char *)"anycrlf"; break;
1611 nigel 91 }
1612    
1613 nigel 49 /* Process the options */
1614    
1615     for (i = 1; i < argc; i++)
1616     {
1617 nigel 77 option_item *op = NULL;
1618     char *option_data = (char *)""; /* default to keep compiler happy */
1619     BOOL longop;
1620     BOOL longopwasequals = FALSE;
1621    
1622 nigel 49 if (argv[i][0] != '-') break;
1623 nigel 53
1624 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1625 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1626 nigel 63
1627 nigel 77 if (argv[i][1] == 0)
1628     {
1629 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1630 nigel 77 else exit(usage(2));
1631     }
1632 nigel 63
1633 nigel 77 /* Handle a long name option, or -- to terminate the options */
1634 nigel 53
1635     if (argv[i][1] == '-')
1636 nigel 49 {
1637 nigel 77 char *arg = argv[i] + 2;
1638     char *argequals = strchr(arg, '=');
1639 nigel 53
1640 nigel 77 if (*arg == 0) /* -- terminates options */
1641 nigel 49 {
1642 nigel 77 i++;
1643     break; /* out of the options-handling loop */
1644 nigel 53 }
1645 nigel 49
1646 nigel 77 longop = TRUE;
1647    
1648     /* Some long options have data that follows after =, for example file=name.
1649     Some options have variations in the long name spelling: specifically, we
1650     allow "regexp" because GNU grep allows it, though I personally go along
1651 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1652     These options are entered in the table as "regex(p)". No option is in both
1653     these categories, fortunately. */
1654 nigel 77
1655 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1656     {
1657 nigel 77 char *opbra = strchr(op->long_name, '(');
1658     char *equals = strchr(op->long_name, '=');
1659     if (opbra == NULL) /* Not a (p) case */
1660 nigel 53 {
1661 nigel 77 if (equals == NULL) /* Not thing=data case */
1662     {
1663     if (strcmp(arg, op->long_name) == 0) break;
1664     }
1665     else /* Special case xxx=data */
1666     {
1667     int oplen = equals - op->long_name;
1668 ph10 199 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1669 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1670     {
1671     option_data = arg + arglen;
1672     if (*option_data == '=')
1673     {
1674     option_data++;
1675     longopwasequals = TRUE;
1676     }
1677     break;
1678     }
1679     }
1680 nigel 53 }
1681 nigel 77 else /* Special case xxxx(p) */
1682     {
1683     char buff1[24];
1684     char buff2[24];
1685     int baselen = opbra - op->long_name;
1686     sprintf(buff1, "%.*s", baselen, op->long_name);
1687 ph10 152 sprintf(buff2, "%s%.*s", buff1,
1688 ph10 151 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1689 nigel 77 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1690     break;
1691     }
1692 nigel 53 }
1693 nigel 77
1694 nigel 53 if (op->one_char == 0)
1695     {
1696     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1697     exit(usage(2));
1698     }
1699     }
1700 nigel 49
1701 nigel 89
1702     /* Jeffrey Friedl's debugging harness uses these additional options which
1703     are not in the right form for putting in the option table because they use
1704     only one hyphen, yet are more than one character long. By putting them
1705     separately here, they will not get displayed as part of the help() output,
1706     but I don't think Jeffrey will care about that. */
1707    
1708     #ifdef JFRIEDL_DEBUG
1709     else if (strcmp(argv[i], "-pre") == 0) {
1710     jfriedl_prefix = argv[++i];
1711     continue;
1712     } else if (strcmp(argv[i], "-post") == 0) {
1713     jfriedl_postfix = argv[++i];
1714     continue;
1715     } else if (strcmp(argv[i], "-XT") == 0) {
1716     sscanf(argv[++i], "%d", &jfriedl_XT);
1717     continue;
1718     } else if (strcmp(argv[i], "-XR") == 0) {
1719     sscanf(argv[++i], "%d", &jfriedl_XR);
1720     continue;
1721     }
1722     #endif
1723    
1724    
1725 nigel 77 /* One-char options; many that have no data may be in a single argument; we
1726     continue till we hit the last one or one that needs data. */
1727 nigel 53
1728     else
1729     {
1730     char *s = argv[i] + 1;
1731 nigel 77 longop = FALSE;
1732 nigel 53 while (*s != 0)
1733     {
1734 nigel 77 for (op = optionlist; op->one_char != 0; op++)
1735     { if (*s == op->one_char) break; }
1736     if (op->one_char == 0)
1737 nigel 53 {
1738 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1739     *s, argv[i]);
1740     exit(usage(2));
1741     }
1742     if (op->type != OP_NODATA || s[1] == 0)
1743     {
1744     option_data = s+1;
1745 nigel 53 break;
1746     }
1747 nigel 87 pcre_options = handle_option(*s++, pcre_options);
1748 nigel 49 }
1749     }
1750 nigel 77
1751 nigel 87 /* At this point we should have op pointing to a matched option. If the type
1752     is NO_DATA, it means that there is no data, and the option might set
1753     something in the PCRE options. */
1754 nigel 77
1755     if (op->type == OP_NODATA)
1756     {
1757 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
1758     continue;
1759     }
1760    
1761     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1762     either has a value or defaults to something. It cannot have data in a
1763     separate item. At the moment, the only such options are "colo(u)r" and
1764 nigel 89 Jeffrey Friedl's special -S debugging option. */
1765 nigel 87
1766     if (*option_data == 0 &&
1767     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1768     {
1769     switch (op->one_char)
1770 nigel 77 {
1771 nigel 87 case N_COLOUR:
1772     colour_option = (char *)"auto";
1773     break;
1774     #ifdef JFRIEDL_DEBUG
1775     case 'S':
1776     S_arg = 0;
1777     break;
1778     #endif
1779 nigel 77 }
1780 nigel 87 continue;
1781     }
1782 nigel 77
1783 nigel 87 /* Otherwise, find the data string for the option. */
1784    
1785     if (*option_data == 0)
1786     {
1787     if (i >= argc - 1 || longopwasequals)
1788 nigel 77 {
1789 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1790     exit(usage(2));
1791     }
1792     option_data = argv[++i];
1793     }
1794    
1795     /* If the option type is OP_PATLIST, it's the -e option, which can be called
1796     multiple times to create a list of patterns. */
1797    
1798     if (op->type == OP_PATLIST)
1799     {
1800     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1801     {
1802     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1803     MAX_PATTERN_COUNT);
1804     return 2;
1805     }
1806     patterns[cmd_pattern_count++] = option_data;
1807     }
1808    
1809     /* Otherwise, deal with single string or numeric data values. */
1810    
1811     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1812     {
1813     *((char **)op->dataptr) = option_data;
1814     }
1815     else
1816     {
1817     char *endptr;
1818     int n = strtoul(option_data, &endptr, 10);
1819     if (*endptr != 0)
1820     {
1821     if (longop)
1822 nigel 77 {
1823 nigel 87 char *equals = strchr(op->long_name, '=');
1824     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1825     equals - op->long_name;
1826     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1827     option_data, nlen, op->long_name);
1828 nigel 77 }
1829 nigel 87 else
1830     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1831     option_data, op->one_char);
1832     exit(usage(2));
1833 nigel 77 }
1834 nigel 87 *((int *)op->dataptr) = n;
1835 nigel 77 }
1836 nigel 49 }
1837    
1838 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
1839     for -A and -B. */
1840    
1841     if (both_context > 0)
1842     {
1843     if (after_context == 0) after_context = both_context;
1844     if (before_context == 0) before_context = both_context;
1845     }
1846    
1847 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1848     LC_ALL environment variable is set, and if so, use it. */
1849 nigel 49
1850 nigel 87 if (locale == NULL)
1851 nigel 53 {
1852 nigel 87 locale = getenv("LC_ALL");
1853     locale_from = "LCC_ALL";
1854 nigel 53 }
1855 nigel 49
1856 nigel 87 if (locale == NULL)
1857     {
1858     locale = getenv("LC_CTYPE");
1859     locale_from = "LC_CTYPE";
1860     }
1861 nigel 49
1862 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
1863     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1864    
1865     if (locale != NULL)
1866 nigel 49 {
1867 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
1868 nigel 53 {
1869 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1870     locale, locale_from);
1871 nigel 53 return 2;
1872     }
1873 nigel 87 pcretables = pcre_maketables();
1874     }
1875 nigel 77
1876 nigel 87 /* Sort out colouring */
1877    
1878     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1879     {
1880     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1881     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1882     else
1883 nigel 53 {
1884 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1885     colour_option);
1886     return 2;
1887 nigel 77 }
1888 nigel 87 if (do_colour)
1889 nigel 77 {
1890 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
1891     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1892     if (cs != NULL) colour_string = cs;
1893 nigel 77 }
1894 nigel 87 }
1895 nigel 77
1896 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
1897    
1898     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1899     {
1900     pcre_options |= PCRE_NEWLINE_CR;
1901 nigel 93 endlinetype = EL_CR;
1902 nigel 91 }
1903     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1904     {
1905     pcre_options |= PCRE_NEWLINE_LF;
1906 nigel 93 endlinetype = EL_LF;
1907 nigel 91 }
1908     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1909     {
1910     pcre_options |= PCRE_NEWLINE_CRLF;
1911 nigel 93 endlinetype = EL_CRLF;
1912 nigel 91 }
1913 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1914     {
1915     pcre_options |= PCRE_NEWLINE_ANY;
1916     endlinetype = EL_ANY;
1917     }
1918 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1919     {
1920     pcre_options |= PCRE_NEWLINE_ANYCRLF;
1921     endlinetype = EL_ANYCRLF;
1922     }
1923 nigel 91 else
1924     {
1925     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1926     return 2;
1927     }
1928    
1929 nigel 87 /* Interpret the text values for -d and -D */
1930    
1931     if (dee_option != NULL)
1932     {
1933     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1934     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1935     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1936     else
1937 nigel 77 {
1938 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1939     return 2;
1940 nigel 53 }
1941 nigel 49 }
1942    
1943 nigel 87 if (DEE_option != NULL)
1944     {
1945     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1946     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1947     else
1948     {
1949     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1950     return 2;
1951     }
1952     }
1953 nigel 49
1954 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
1955 nigel 87
1956     #ifdef JFRIEDL_DEBUG
1957     if (S_arg > 9)
1958 nigel 49 {
1959 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
1960     return 2;
1961     }
1962 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1963     {
1964     if (jfriedl_XT == 0) jfriedl_XT = 1;
1965     if (jfriedl_XR == 0) jfriedl_XR = 1;
1966     }
1967 nigel 87 #endif
1968 nigel 77
1969 nigel 87 /* Get memory to store the pattern and hints lists. */
1970    
1971     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1972     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1973    
1974     if (pattern_list == NULL || hints_list == NULL)
1975     {
1976     fprintf(stderr, "pcregrep: malloc failed\n");
1977 ph10 123 goto EXIT2;
1978 nigel 87 }
1979    
1980     /* If no patterns were provided by -e, and there is no file provided by -f,
1981     the first argument is the one and only pattern, and it must exist. */
1982    
1983     if (cmd_pattern_count == 0 && pattern_filename == NULL)
1984     {
1985 nigel 63 if (i >= argc) return usage(2);
1986 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
1987     }
1988 nigel 77
1989 nigel 87 /* Compile the patterns that were provided on the command line, either by
1990     multiple uses of -e or as a single unkeyed pattern. */
1991    
1992     for (j = 0; j < cmd_pattern_count; j++)
1993     {
1994     if (!compile_pattern(patterns[j], pcre_options, NULL,
1995     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1996 ph10 123 goto EXIT2;
1997 nigel 87 }
1998    
1999     /* Compile the regular expressions that are provided in a file. */
2000    
2001     if (pattern_filename != NULL)
2002     {
2003     int linenumber = 0;
2004     FILE *f;
2005     char *filename;
2006     char buffer[MBUFTHIRD];
2007    
2008     if (strcmp(pattern_filename, "-") == 0)
2009 nigel 77 {
2010 nigel 87 f = stdin;
2011     filename = stdin_name;
2012 nigel 77 }
2013 nigel 87 else
2014 nigel 77 {
2015 nigel 87 f = fopen(pattern_filename, "r");
2016     if (f == NULL)
2017     {
2018     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2019     strerror(errno));
2020 ph10 123 goto EXIT2;
2021 nigel 87 }
2022     filename = pattern_filename;
2023 nigel 77 }
2024    
2025 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2026 nigel 53 {
2027 nigel 87 char *s = buffer + (int)strlen(buffer);
2028     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2029     *s = 0;
2030     linenumber++;
2031     if (buffer[0] == 0) continue; /* Skip blank lines */
2032     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2033 ph10 121 goto EXIT2;
2034 nigel 53 }
2035 nigel 87
2036     if (f != stdin) fclose(f);
2037 nigel 49 }
2038    
2039 nigel 77 /* Study the regular expressions, as we will be running them many times */
2040 nigel 53
2041     for (j = 0; j < pattern_count; j++)
2042     {
2043     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2044     if (error != NULL)
2045     {
2046     char s[16];
2047     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2048     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2049 ph10 121 goto EXIT2;
2050 nigel 53 }
2051 ph10 142 hint_count++;
2052 nigel 53 }
2053    
2054 nigel 77 /* If there are include or exclude patterns, compile them. */
2055    
2056     if (exclude_pattern != NULL)
2057     {
2058 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2059     pcretables);
2060 nigel 77 if (exclude_compiled == NULL)
2061     {
2062     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2063     errptr, error);
2064 ph10 121 goto EXIT2;
2065 nigel 77 }
2066     }
2067    
2068     if (include_pattern != NULL)
2069     {
2070 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2071     pcretables);
2072 nigel 77 if (include_compiled == NULL)
2073     {
2074     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2075     errptr, error);
2076 ph10 121 goto EXIT2;
2077 nigel 77 }
2078     }
2079    
2080 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2081 nigel 49
2082 nigel 87 if (i >= argc)
2083 ph10 121 {
2084     rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2085     goto EXIT;
2086 ph10 123 }
2087 nigel 49
2088 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2089     Pass in the fact that there is only one argument at top level - this suppresses
2090 nigel 87 the file name if the argument is not a directory and filenames are not
2091     otherwise forced. */
2092 nigel 49
2093 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2094 nigel 49
2095     for (; i < argc; i++)
2096     {
2097 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2098     only_one_at_top);
2099 nigel 77 if (frc > 1) rc = frc;
2100     else if (frc == 0 && rc == 1) rc = 0;
2101 nigel 49 }
2102    
2103 ph10 121 EXIT:
2104     if (pattern_list != NULL)
2105     {
2106 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2107 ph10 121 free(pattern_list);
2108 ph10 123 }
2109 ph10 121 if (hints_list != NULL)
2110     {
2111 ph10 141 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2112 ph10 121 free(hints_list);
2113 ph10 123 }
2114 nigel 49 return rc;
2115 ph10 121
2116     EXIT2:
2117     rc = 2;
2118     goto EXIT;
2119 nigel 49 }
2120    
2121 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12