/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 236 - (hide annotations) (download)
Tue Sep 11 12:57:06 2007 UTC (7 years ago) by ph10
File MIME type: text/plain
File size: 59466 byte(s)
<config.h> => "config.h" and also some cases of <pcre.h>.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 117 Copyright (c) 1997-2007 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 236 #include "pcre.h"
59 nigel 49
60     #define FALSE 0
61     #define TRUE 1
62    
63     typedef int BOOL;
64    
65 nigel 53 #define MAX_PATTERN_COUNT 100
66 nigel 49
67 nigel 77 #if BUFSIZ > 8192
68     #define MBUFTHIRD BUFSIZ
69     #else
70     #define MBUFTHIRD 8192
71     #endif
72 nigel 49
73 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
74     output. The order is important; it is assumed that a file name is wanted for
75     all values greater than FN_DEFAULT. */
76 nigel 77
77 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78    
79     /* Actions for the -d and -D options */
80    
81     enum { dee_READ, dee_SKIP, dee_RECURSE };
82     enum { DEE_READ, DEE_SKIP };
83    
84     /* Actions for special processing options (flag bits) */
85    
86     #define PO_WORD_MATCH 0x0001
87     #define PO_LINE_MATCH 0x0002
88     #define PO_FIXED_STRINGS 0x0004
89    
90 nigel 93 /* Line ending types */
91 nigel 87
92 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93 nigel 87
94 nigel 93
95    
96 nigel 49 /*************************************************
97     * Global variables *
98     *************************************************/
99    
100 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
101     regular code. */
102    
103     #ifdef JFRIEDL_DEBUG
104     static int S_arg = -1;
105 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107     static const char *jfriedl_prefix = "";
108     static const char *jfriedl_postfix = "";
109 nigel 87 #endif
110    
111 nigel 93 static int endlinetype;
112 nigel 91
113 nigel 87 static char *colour_string = (char *)"1;31";
114     static char *colour_option = NULL;
115     static char *dee_option = NULL;
116     static char *DEE_option = NULL;
117 nigel 91 static char *newline = NULL;
118 nigel 53 static char *pattern_filename = NULL;
119 nigel 77 static char *stdin_name = (char *)"(standard input)";
120 nigel 87 static char *locale = NULL;
121    
122     static const unsigned char *pcretables = NULL;
123    
124 nigel 53 static int pattern_count = 0;
125 ph10 121 static pcre **pattern_list = NULL;
126     static pcre_extra **hints_list = NULL;
127 nigel 49
128 nigel 77 static char *include_pattern = NULL;
129     static char *exclude_pattern = NULL;
130    
131     static pcre *include_compiled = NULL;
132     static pcre *exclude_compiled = NULL;
133    
134     static int after_context = 0;
135     static int before_context = 0;
136     static int both_context = 0;
137 nigel 87 static int dee_action = dee_READ;
138     static int DEE_action = DEE_READ;
139     static int error_count = 0;
140     static int filenames = FN_DEFAULT;
141     static int process_options = 0;
142 nigel 77
143 nigel 49 static BOOL count_only = FALSE;
144 nigel 87 static BOOL do_colour = FALSE;
145 nigel 77 static BOOL hyphenpending = FALSE;
146 nigel 49 static BOOL invert = FALSE;
147 nigel 77 static BOOL multiline = FALSE;
148 nigel 49 static BOOL number = FALSE;
149 nigel 87 static BOOL only_matching = FALSE;
150 nigel 77 static BOOL quiet = FALSE;
151 nigel 49 static BOOL silent = FALSE;
152 nigel 93 static BOOL utf8 = FALSE;
153 nigel 49
154 nigel 53 /* Structure for options and list of them */
155 nigel 49
156 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
157     OP_PATLIST };
158 nigel 77
159 nigel 53 typedef struct option_item {
160 nigel 77 int type;
161 nigel 53 int one_char;
162 nigel 77 void *dataptr;
163 nigel 67 const char *long_name;
164     const char *help_text;
165 nigel 53 } option_item;
166 nigel 49
167 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
168     used to identify them. */
169    
170     #define N_COLOUR (-1)
171     #define N_EXCLUDE (-2)
172     #define N_HELP (-3)
173     #define N_INCLUDE (-4)
174     #define N_LABEL (-5)
175     #define N_LOCALE (-6)
176     #define N_NULL (-7)
177    
178 nigel 53 static option_item optionlist[] = {
179 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
180     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
181     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
182     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
183     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
184     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
185     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
186     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
187     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
188     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
189     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
190     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
191     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
192     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
193     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
194     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
195     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
196     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
197     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
198     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
199     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
200 ph10 149 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
202     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
203     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
204     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
205     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
206     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
207     #ifdef JFRIEDL_DEBUG
208     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
209     #endif
210     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
211     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
212     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
213     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
214     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
215     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
216     { OP_NODATA, 0, NULL, NULL, NULL }
217 nigel 53 };
218    
219 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
220     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
221     that the combination of -w and -x has the same effect as -x on its own, so we
222     can treat them as the same. */
223 nigel 53
224 nigel 87 static const char *prefix[] = {
225     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
226    
227     static const char *suffix[] = {
228     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
229    
230 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
231 nigel 87
232 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233 nigel 87
234 nigel 93 const char utf8_table4[] = {
235     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
237     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
238     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
239    
240    
241    
242 nigel 53 /*************************************************
243 nigel 87 * OS-specific functions *
244 nigel 53 *************************************************/
245    
246     /* These functions are defined so that they can be made system specific,
247 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
248 nigel 53
249    
250     /************* Directory scanning in Unix ***********/
251    
252 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253 nigel 53 #include <sys/types.h>
254     #include <sys/stat.h>
255     #include <dirent.h>
256    
257     typedef DIR directory_type;
258    
259 nigel 67 static int
260 nigel 53 isdirectory(char *filename)
261     {
262     struct stat statbuf;
263     if (stat(filename, &statbuf) < 0)
264     return 0; /* In the expectation that opening as a file will fail */
265     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
266     }
267    
268 nigel 67 static directory_type *
269 nigel 53 opendirectory(char *filename)
270     {
271     return opendir(filename);
272     }
273    
274 nigel 67 static char *
275 nigel 53 readdirectory(directory_type *dir)
276     {
277     for (;;)
278     {
279     struct dirent *dent = readdir(dir);
280     if (dent == NULL) return NULL;
281     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282     return dent->d_name;
283     }
284 ph10 151 /* Control never reaches here */
285 nigel 53 }
286    
287 nigel 67 static void
288 nigel 53 closedirectory(directory_type *dir)
289     {
290     closedir(dir);
291     }
292    
293    
294 nigel 87 /************* Test for regular file in Unix **********/
295    
296     static int
297     isregfile(char *filename)
298     {
299     struct stat statbuf;
300     if (stat(filename, &statbuf) < 0)
301     return 1; /* In the expectation that opening as a file will fail */
302     return (statbuf.st_mode & S_IFMT) == S_IFREG;
303     }
304    
305    
306     /************* Test stdout for being a terminal in Unix **********/
307    
308     static BOOL
309     is_stdout_tty(void)
310     {
311     return isatty(fileno(stdout));
312     }
313    
314    
315 nigel 63 /************* Directory scanning in Win32 ***********/
316 nigel 53
317 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
318 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
319     when it did not exist. */
320 nigel 53
321 nigel 63
322 ph10 97 #elif HAVE_WINDOWS_H
323 nigel 63
324     #ifndef STRICT
325     # define STRICT
326     #endif
327     #ifndef WIN32_LEAN_AND_MEAN
328     # define WIN32_LEAN_AND_MEAN
329     #endif
330 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
331     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
332     #endif
333    
334 nigel 63 #include <windows.h>
335    
336     typedef struct directory_type
337     {
338     HANDLE handle;
339     BOOL first;
340     WIN32_FIND_DATA data;
341     } directory_type;
342    
343     int
344     isdirectory(char *filename)
345     {
346     DWORD attr = GetFileAttributes(filename);
347     if (attr == INVALID_FILE_ATTRIBUTES)
348     return 0;
349     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
350     }
351    
352     directory_type *
353     opendirectory(char *filename)
354     {
355     size_t len;
356     char *pattern;
357     directory_type *dir;
358     DWORD err;
359     len = strlen(filename);
360     pattern = (char *) malloc(len + 3);
361     dir = (directory_type *) malloc(sizeof(*dir));
362     if ((pattern == NULL) || (dir == NULL))
363     {
364     fprintf(stderr, "pcregrep: malloc failed\n");
365     exit(2);
366     }
367     memcpy(pattern, filename, len);
368     memcpy(&(pattern[len]), "\\*", 3);
369     dir->handle = FindFirstFile(pattern, &(dir->data));
370     if (dir->handle != INVALID_HANDLE_VALUE)
371     {
372     free(pattern);
373     dir->first = TRUE;
374     return dir;
375     }
376     err = GetLastError();
377     free(pattern);
378     free(dir);
379     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
380     return NULL;
381     }
382    
383     char *
384     readdirectory(directory_type *dir)
385     {
386     for (;;)
387     {
388     if (!dir->first)
389     {
390     if (!FindNextFile(dir->handle, &(dir->data)))
391     return NULL;
392     }
393     else
394     {
395     dir->first = FALSE;
396     }
397     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
398     return dir->data.cFileName;
399     }
400     #ifndef _MSC_VER
401     return NULL; /* Keep compiler happy; never executed */
402     #endif
403     }
404    
405     void
406     closedirectory(directory_type *dir)
407     {
408     FindClose(dir->handle);
409     free(dir);
410     }
411    
412    
413 nigel 87 /************* Test for regular file in Win32 **********/
414    
415     /* I don't know how to do this, or if it can be done; assume all paths are
416     regular if they are not directories. */
417    
418     int isregfile(char *filename)
419     {
420     return !isdirectory(filename)
421     }
422    
423    
424     /************* Test stdout for being a terminal in Win32 **********/
425    
426     /* I don't know how to do this; assume never */
427    
428     static BOOL
429     is_stdout_tty(void)
430     {
431     FALSE;
432     }
433    
434    
435 nigel 53 /************* Directory scanning when we can't do it ***********/
436    
437     /* The type is void, and apart from isdirectory(), the functions do nothing. */
438    
439 nigel 63 #else
440    
441 nigel 53 typedef void directory_type;
442    
443 nigel 87 int isdirectory(char *filename) { return 0; }
444 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445     char *readdirectory(directory_type *dir) { return (char*)0;}
446 nigel 53 void closedirectory(directory_type *dir) {}
447    
448 nigel 87
449     /************* Test for regular when we can't do it **********/
450    
451     /* Assume all files are regular. */
452    
453     int isregfile(char *filename) { return 1; }
454    
455    
456     /************* Test stdout for being a terminal when we can't do it **********/
457    
458     static BOOL
459     is_stdout_tty(void)
460     {
461     return FALSE;
462     }
463    
464    
465 nigel 53 #endif
466    
467    
468    
469 ph10 137 #ifndef HAVE_STRERROR
470 nigel 49 /*************************************************
471     * Provide strerror() for non-ANSI libraries *
472     *************************************************/
473    
474     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
475     in their libraries, but can provide the same facility by this simple
476     alternative function. */
477    
478     extern int sys_nerr;
479     extern char *sys_errlist[];
480    
481     char *
482     strerror(int n)
483     {
484     if (n < 0 || n >= sys_nerr) return "unknown error number";
485     return sys_errlist[n];
486     }
487     #endif /* HAVE_STRERROR */
488    
489    
490    
491     /*************************************************
492 nigel 93 * Find end of line *
493     *************************************************/
494    
495     /* The length of the endline sequence that is found is set via lenptr. This may
496     be zero at the very end of the file if there is no line-ending sequence there.
497    
498     Arguments:
499     p current position in line
500     endptr end of available data
501     lenptr where to put the length of the eol sequence
502    
503     Returns: pointer to the last byte of the line
504     */
505    
506     static char *
507     end_of_line(char *p, char *endptr, int *lenptr)
508     {
509     switch(endlinetype)
510     {
511     default: /* Just in case */
512     case EL_LF:
513     while (p < endptr && *p != '\n') p++;
514     if (p < endptr)
515     {
516     *lenptr = 1;
517     return p + 1;
518     }
519     *lenptr = 0;
520     return endptr;
521    
522     case EL_CR:
523     while (p < endptr && *p != '\r') p++;
524     if (p < endptr)
525     {
526     *lenptr = 1;
527     return p + 1;
528     }
529     *lenptr = 0;
530     return endptr;
531    
532     case EL_CRLF:
533     for (;;)
534     {
535     while (p < endptr && *p != '\r') p++;
536     if (++p >= endptr)
537     {
538     *lenptr = 0;
539     return endptr;
540     }
541     if (*p == '\n')
542     {
543     *lenptr = 2;
544     return p + 1;
545     }
546     }
547     break;
548    
549 ph10 149 case EL_ANYCRLF:
550     while (p < endptr)
551     {
552     int extra = 0;
553     register int c = *((unsigned char *)p);
554    
555     if (utf8 && c >= 0xc0)
556     {
557     int gcii, gcss;
558     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
559     gcss = 6*extra;
560     c = (c & utf8_table3[extra]) << gcss;
561     for (gcii = 1; gcii <= extra; gcii++)
562     {
563     gcss -= 6;
564     c |= (p[gcii] & 0x3f) << gcss;
565     }
566     }
567    
568     p += 1 + extra;
569    
570     switch (c)
571     {
572     case 0x0a: /* LF */
573     *lenptr = 1;
574     return p;
575    
576     case 0x0d: /* CR */
577     if (p < endptr && *p == 0x0a)
578     {
579     *lenptr = 2;
580     p++;
581     }
582     else *lenptr = 1;
583     return p;
584 ph10 150
585 ph10 149 default:
586     break;
587     }
588     } /* End of loop for ANYCRLF case */
589 ph10 150
590 ph10 149 *lenptr = 0; /* Must have hit the end */
591     return endptr;
592    
593 nigel 93 case EL_ANY:
594     while (p < endptr)
595     {
596     int extra = 0;
597     register int c = *((unsigned char *)p);
598    
599     if (utf8 && c >= 0xc0)
600     {
601     int gcii, gcss;
602     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
603     gcss = 6*extra;
604     c = (c & utf8_table3[extra]) << gcss;
605     for (gcii = 1; gcii <= extra; gcii++)
606     {
607     gcss -= 6;
608     c |= (p[gcii] & 0x3f) << gcss;
609     }
610     }
611    
612     p += 1 + extra;
613    
614     switch (c)
615     {
616     case 0x0a: /* LF */
617     case 0x0b: /* VT */
618     case 0x0c: /* FF */
619     *lenptr = 1;
620     return p;
621    
622     case 0x0d: /* CR */
623     if (p < endptr && *p == 0x0a)
624     {
625     *lenptr = 2;
626     p++;
627     }
628     else *lenptr = 1;
629     return p;
630    
631     case 0x85: /* NEL */
632     *lenptr = utf8? 2 : 1;
633     return p;
634    
635     case 0x2028: /* LS */
636     case 0x2029: /* PS */
637     *lenptr = 3;
638     return p;
639    
640     default:
641     break;
642     }
643     } /* End of loop for ANY case */
644    
645     *lenptr = 0; /* Must have hit the end */
646     return endptr;
647     } /* End of overall switch */
648     }
649    
650    
651    
652     /*************************************************
653     * Find start of previous line *
654     *************************************************/
655    
656     /* This is called when looking back for before lines to print.
657    
658     Arguments:
659     p start of the subsequent line
660     startptr start of available data
661    
662     Returns: pointer to the start of the previous line
663     */
664    
665     static char *
666     previous_line(char *p, char *startptr)
667     {
668     switch(endlinetype)
669     {
670     default: /* Just in case */
671     case EL_LF:
672     p--;
673     while (p > startptr && p[-1] != '\n') p--;
674     return p;
675    
676     case EL_CR:
677     p--;
678     while (p > startptr && p[-1] != '\n') p--;
679     return p;
680    
681     case EL_CRLF:
682     for (;;)
683     {
684     p -= 2;
685     while (p > startptr && p[-1] != '\n') p--;
686     if (p <= startptr + 1 || p[-2] == '\r') return p;
687     }
688     return p; /* But control should never get here */
689    
690     case EL_ANY:
691 ph10 150 case EL_ANYCRLF:
692 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693     if (utf8) while ((*p & 0xc0) == 0x80) p--;
694    
695     while (p > startptr)
696     {
697     register int c;
698     char *pp = p - 1;
699    
700     if (utf8)
701     {
702     int extra = 0;
703     while ((*pp & 0xc0) == 0x80) pp--;
704     c = *((unsigned char *)pp);
705     if (c >= 0xc0)
706     {
707     int gcii, gcss;
708     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
709     gcss = 6*extra;
710     c = (c & utf8_table3[extra]) << gcss;
711     for (gcii = 1; gcii <= extra; gcii++)
712     {
713     gcss -= 6;
714     c |= (pp[gcii] & 0x3f) << gcss;
715     }
716     }
717     }
718     else c = *((unsigned char *)pp);
719    
720 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
721 nigel 93 {
722     case 0x0a: /* LF */
723 ph10 149 case 0x0d: /* CR */
724     return p;
725 ph10 150
726 ph10 149 default:
727     break;
728 ph10 150 }
729 ph10 149
730     else switch (c)
731     {
732     case 0x0a: /* LF */
733 nigel 93 case 0x0b: /* VT */
734     case 0x0c: /* FF */
735     case 0x0d: /* CR */
736     case 0x85: /* NEL */
737     case 0x2028: /* LS */
738     case 0x2029: /* PS */
739     return p;
740    
741     default:
742     break;
743     }
744    
745     p = pp; /* Back one character */
746     } /* End of loop for ANY case */
747    
748     return startptr; /* Hit start of data */
749     } /* End of overall switch */
750     }
751    
752    
753    
754    
755    
756     /*************************************************
757 nigel 77 * Print the previous "after" lines *
758 nigel 49 *************************************************/
759    
760 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
761 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
762     that a binary zero does not terminate it.
763 nigel 77
764     Arguments:
765     lastmatchnumber the number of the last matching line, plus one
766     lastmatchrestart where we restarted after the last match
767     endptr end of available data
768     printname filename for printing
769    
770     Returns: nothing
771     */
772    
773     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
774     char *endptr, char *printname)
775     {
776     if (after_context > 0 && lastmatchnumber > 0)
777     {
778     int count = 0;
779     while (lastmatchrestart < endptr && count++ < after_context)
780     {
781 nigel 93 int ellength;
782 nigel 77 char *pp = lastmatchrestart;
783     if (printname != NULL) fprintf(stdout, "%s-", printname);
784     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
785 nigel 93 pp = end_of_line(pp, endptr, &ellength);
786     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
787     lastmatchrestart = pp;
788 nigel 77 }
789     hyphenpending = TRUE;
790     }
791     }
792    
793    
794    
795     /*************************************************
796     * Grep an individual file *
797     *************************************************/
798    
799     /* This is called from grep_or_recurse() below. It uses a buffer that is three
800     times the value of MBUFTHIRD. The matching point is never allowed to stray into
801     the top third of the buffer, thus keeping more of the file available for
802     context printing or for multiline scanning. For large files, the pointer will
803     be in the middle third most of the time, so the bottom third is available for
804     "before" context printing.
805    
806     Arguments:
807     in the fopened FILE stream
808     printname the file name if it is to be printed for each match
809     or NULL if the file name is not to be printed
810     it cannot be NULL if filenames[_nomatch]_only is set
811    
812     Returns: 0 if there was at least one match
813     1 otherwise (no matches)
814     */
815    
816 nigel 49 static int
817 nigel 77 pcregrep(FILE *in, char *printname)
818 nigel 49 {
819     int rc = 1;
820 nigel 77 int linenumber = 1;
821     int lastmatchnumber = 0;
822 nigel 49 int count = 0;
823     int offsets[99];
824 nigel 77 char *lastmatchrestart = NULL;
825     char buffer[3*MBUFTHIRD];
826     char *ptr = buffer;
827     char *endptr;
828     size_t bufflength;
829     BOOL endhyphenpending = FALSE;
830 nigel 49
831 nigel 77 /* Do the first read into the start of the buffer and set up the pointer to
832     end of what we have. */
833    
834     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
835     endptr = buffer + bufflength;
836    
837     /* Loop while the current pointer is not at the end of the file. For large
838     files, endptr will be at the end of the buffer when we are in the middle of the
839     file, but ptr will never get there, because as soon as it gets over 2/3 of the
840     way, the buffer is shifted left and re-filled. */
841    
842     while (ptr < endptr)
843 nigel 49 {
844 nigel 93 int i, endlinelength;
845 nigel 87 int mrc = 0;
846 nigel 53 BOOL match = FALSE;
847 nigel 77 char *t = ptr;
848     size_t length, linelength;
849 nigel 49
850 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
851     of the subject string to pass to pcre_exec(). In multiline mode, it is the
852     length remainder of the data in the buffer. Otherwise, it is the length of
853     the next line. After matching, we always advance by the length of the next
854     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
855     that any match is constrained to be in the first line. */
856    
857 nigel 93 t = end_of_line(t, endptr, &endlinelength);
858     linelength = t - ptr - endlinelength;
859 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
860 nigel 77
861 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
862    
863     #ifdef JFRIEDL_DEBUG
864     if (jfriedl_XT || jfriedl_XR)
865     {
866     #include <sys/time.h>
867     #include <time.h>
868     struct timeval start_time, end_time;
869     struct timezone dummy;
870    
871     if (jfriedl_XT)
872     {
873     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
874     const char *orig = ptr;
875     ptr = malloc(newlen + 1);
876     if (!ptr) {
877     printf("out of memory");
878     exit(2);
879     }
880     endptr = ptr;
881     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
882     for (i = 0; i < jfriedl_XT; i++) {
883     strncpy(endptr, orig, length);
884     endptr += length;
885     }
886     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
887     length = newlen;
888     }
889    
890     if (gettimeofday(&start_time, &dummy) != 0)
891     perror("bad gettimeofday");
892    
893    
894     for (i = 0; i < jfriedl_XR; i++)
895     match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
896    
897     if (gettimeofday(&end_time, &dummy) != 0)
898     perror("bad gettimeofday");
899    
900     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
901     -
902     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
903    
904     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
905     return 0;
906     }
907     #endif
908    
909    
910 nigel 77 /* Run through all the patterns until one matches. Note that we don't include
911     the final newline in the subject string. */
912    
913 nigel 87 for (i = 0; i < pattern_count; i++)
914 nigel 53 {
915 nigel 87 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
916     offsets, 99);
917     if (mrc >= 0) { match = TRUE; break; }
918     if (mrc != PCRE_ERROR_NOMATCH)
919     {
920     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
921     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
922     fprintf(stderr, "this line:\n");
923     fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
924     fprintf(stderr, "\n");
925     if (error_count == 0 &&
926     (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
927     {
928     fprintf(stderr, "pcregrep: error %d means that a resource limit "
929     "was exceeded\n", mrc);
930     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
931     }
932     if (error_count++ > 20)
933     {
934     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
935     exit(2);
936     }
937     match = invert; /* No more matching; don't show the line again */
938     break;
939     }
940 nigel 53 }
941 nigel 49
942 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
943 nigel 77
944 nigel 49 if (match != invert)
945     {
946 nigel 77 BOOL hyphenprinted = FALSE;
947    
948 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
949 nigel 77
950 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
951    
952     /* Just count if just counting is wanted. */
953    
954 nigel 49 if (count_only) count++;
955    
956 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
957     in the file. */
958    
959     else if (filenames == FN_ONLY)
960 nigel 49 {
961 nigel 77 fprintf(stdout, "%s\n", printname);
962 nigel 49 return 0;
963     }
964    
965 nigel 87 /* Likewise, if all we want is a yes/no answer. */
966    
967 nigel 77 else if (quiet) return 0;
968 nigel 49
969 nigel 87 /* The --only-matching option prints just the substring that matched, and
970     does not pring any context. */
971    
972     else if (only_matching)
973     {
974     if (printname != NULL) fprintf(stdout, "%s:", printname);
975     if (number) fprintf(stdout, "%d:", linenumber);
976     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
977     fprintf(stdout, "\n");
978     }
979    
980     /* This is the default case when none of the above options is set. We print
981     the matching lines(s), possibly preceded and/or followed by other lines of
982     context. */
983    
984 nigel 49 else
985     {
986 nigel 77 /* See if there is a requirement to print some "after" lines from a
987     previous match. We never print any overlaps. */
988    
989     if (after_context > 0 && lastmatchnumber > 0)
990     {
991 nigel 93 int ellength;
992 nigel 77 int linecount = 0;
993     char *p = lastmatchrestart;
994    
995     while (p < ptr && linecount < after_context)
996     {
997 nigel 93 p = end_of_line(p, ptr, &ellength);
998 nigel 77 linecount++;
999     }
1000    
1001     /* It is important to advance lastmatchrestart during this printing so
1002 nigel 87 that it interacts correctly with any "before" printing below. Print
1003     each line's data using fwrite() in case there are binary zeroes. */
1004 nigel 77
1005     while (lastmatchrestart < p)
1006     {
1007     char *pp = lastmatchrestart;
1008     if (printname != NULL) fprintf(stdout, "%s-", printname);
1009     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1010 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1011     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1012     lastmatchrestart = pp;
1013 nigel 77 }
1014     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1015     }
1016    
1017     /* If there were non-contiguous lines printed above, insert hyphens. */
1018    
1019     if (hyphenpending)
1020     {
1021     fprintf(stdout, "--\n");
1022     hyphenpending = FALSE;
1023     hyphenprinted = TRUE;
1024     }
1025    
1026     /* See if there is a requirement to print some "before" lines for this
1027     match. Again, don't print overlaps. */
1028    
1029     if (before_context > 0)
1030     {
1031     int linecount = 0;
1032     char *p = ptr;
1033    
1034     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1035 nigel 87 linecount < before_context)
1036 nigel 77 {
1037 nigel 87 linecount++;
1038 nigel 93 p = previous_line(p, buffer);
1039 nigel 77 }
1040    
1041     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1042     fprintf(stdout, "--\n");
1043    
1044     while (p < ptr)
1045     {
1046 nigel 93 int ellength;
1047 nigel 77 char *pp = p;
1048     if (printname != NULL) fprintf(stdout, "%s-", printname);
1049     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1050 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1051     fwrite(p, 1, pp - p, stdout);
1052     p = pp;
1053 nigel 77 }
1054     }
1055    
1056     /* Now print the matching line(s); ensure we set hyphenpending at the end
1057 nigel 85 of the file if any context lines are being output. */
1058 nigel 77
1059 nigel 85 if (after_context > 0 || before_context > 0)
1060     endhyphenpending = TRUE;
1061    
1062 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1063 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1064 nigel 77
1065     /* In multiline mode, we want to print to the end of the line in which
1066     the end of the matched string is found, so we adjust linelength and the
1067 ph10 222 line number appropriately, but only when there actually was a match
1068     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1069     the match will always be before the first newline sequence. */
1070 nigel 77
1071     if (multiline)
1072     {
1073 nigel 93 int ellength;
1074 ph10 222 char *endmatch = ptr;
1075     if (!invert)
1076 nigel 93 {
1077 ph10 222 endmatch += offsets[1];
1078     t = ptr;
1079     while (t < endmatch)
1080     {
1081     t = end_of_line(t, endptr, &ellength);
1082     if (t <= endmatch) linenumber++; else break;
1083     }
1084 nigel 93 }
1085     endmatch = end_of_line(endmatch, endptr, &ellength);
1086     linelength = endmatch - ptr - ellength;
1087 nigel 77 }
1088    
1089 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1090     zeroes are treated as just another data character. */
1091    
1092     /* This extra option, for Jeffrey Friedl's debugging requirements,
1093     replaces the matched string, or a specific captured string if it exists,
1094     with X. When this happens, colouring is ignored. */
1095    
1096     #ifdef JFRIEDL_DEBUG
1097     if (S_arg >= 0 && S_arg < mrc)
1098     {
1099     int first = S_arg * 2;
1100     int last = first + 1;
1101     fwrite(ptr, 1, offsets[first], stdout);
1102     fprintf(stdout, "X");
1103     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1104     }
1105     else
1106     #endif
1107    
1108     /* We have to split the line(s) up if colouring. */
1109    
1110     if (do_colour)
1111     {
1112     fwrite(ptr, 1, offsets[0], stdout);
1113     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1114     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1115     fprintf(stdout, "%c[00m", 0x1b);
1116     fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1117     }
1118 nigel 93 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1119 nigel 49 }
1120    
1121 nigel 87 /* End of doing what has to be done for a match */
1122    
1123 nigel 77 rc = 0; /* Had some success */
1124    
1125     /* Remember where the last match happened for after_context. We remember
1126     where we are about to restart, and that line's number. */
1127    
1128 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1129 nigel 77 lastmatchnumber = linenumber + 1;
1130 nigel 49 }
1131 nigel 77
1132 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1133     anything to be printed), we have to move on to the end of the match before
1134     proceeding. */
1135    
1136     if (multiline && invert && match)
1137     {
1138     int ellength;
1139     char *endmatch = ptr + offsets[1];
1140     t = ptr;
1141     while (t < endmatch)
1142     {
1143     t = end_of_line(t, endptr, &ellength);
1144     if (t <= endmatch) linenumber++; else break;
1145     }
1146     endmatch = end_of_line(endmatch, endptr, &ellength);
1147     linelength = endmatch - ptr - ellength;
1148     }
1149    
1150 nigel 77 /* Advance to after the newline and increment the line number. */
1151    
1152 nigel 93 ptr += linelength + endlinelength;
1153 nigel 77 linenumber++;
1154    
1155     /* If we haven't yet reached the end of the file (the buffer is full), and
1156     the current point is in the top 1/3 of the buffer, slide the buffer down by
1157     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1158     about to be lost, print them. */
1159    
1160     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1161     {
1162     if (after_context > 0 &&
1163     lastmatchnumber > 0 &&
1164     lastmatchrestart < buffer + MBUFTHIRD)
1165     {
1166     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1167     lastmatchnumber = 0;
1168     }
1169    
1170     /* Now do the shuffle */
1171    
1172     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1173     ptr -= MBUFTHIRD;
1174     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1175     endptr = buffer + bufflength;
1176    
1177     /* Adjust any last match point */
1178    
1179     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1180     }
1181     } /* Loop through the whole file */
1182    
1183     /* End of file; print final "after" lines if wanted; do_after_lines sets
1184     hyphenpending if it prints something. */
1185    
1186 nigel 87 if (!only_matching && !count_only)
1187     {
1188     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1189     hyphenpending |= endhyphenpending;
1190     }
1191 nigel 77
1192     /* Print the file name if we are looking for those without matches and there
1193     were none. If we found a match, we won't have got this far. */
1194    
1195 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1196 nigel 77 {
1197     fprintf(stdout, "%s\n", printname);
1198     return 0;
1199 nigel 49 }
1200    
1201 nigel 77 /* Print the match count if wanted */
1202    
1203 nigel 49 if (count_only)
1204     {
1205 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1206 nigel 49 fprintf(stdout, "%d\n", count);
1207     }
1208    
1209     return rc;
1210     }
1211    
1212    
1213    
1214     /*************************************************
1215 nigel 53 * Grep a file or recurse into a directory *
1216     *************************************************/
1217    
1218 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1219     recursing; if it's a file, grep it.
1220    
1221     Arguments:
1222     pathname the path to investigate
1223 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1224 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1225    
1226     Returns: 0 if there was at least one match
1227     1 if there were no matches
1228     2 there was some kind of error
1229    
1230     However, file opening failures are suppressed if "silent" is set.
1231     */
1232    
1233 nigel 53 static int
1234 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1235 nigel 53 {
1236     int rc = 1;
1237     int sep;
1238     FILE *in;
1239    
1240 nigel 77 /* If the file name is "-" we scan stdin */
1241 nigel 53
1242 nigel 77 if (strcmp(pathname, "-") == 0)
1243 nigel 53 {
1244 nigel 77 return pcregrep(stdin,
1245 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1246 nigel 77 stdin_name : NULL);
1247     }
1248    
1249    
1250 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1251     each file within it, subject to any include or exclude patterns that were set.
1252     The scanning code is localized so it can be made system-specific. */
1253    
1254     if ((sep = isdirectory(pathname)) != 0)
1255 nigel 77 {
1256 nigel 87 if (dee_action == dee_SKIP) return 1;
1257     if (dee_action == dee_RECURSE)
1258 nigel 53 {
1259 nigel 87 char buffer[1024];
1260     char *nextfile;
1261     directory_type *dir = opendirectory(pathname);
1262 nigel 53
1263 nigel 87 if (dir == NULL)
1264     {
1265     if (!silent)
1266     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1267     strerror(errno));
1268     return 2;
1269     }
1270 nigel 77
1271 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1272     {
1273     int frc, blen;
1274     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1275     blen = strlen(buffer);
1276 nigel 77
1277 nigel 87 if (exclude_compiled != NULL &&
1278     pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1279     continue;
1280 nigel 77
1281 nigel 87 if (include_compiled != NULL &&
1282     pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1283     continue;
1284    
1285     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1286     if (frc > 1) rc = frc;
1287     else if (frc == 0 && rc == 1) rc = 0;
1288     }
1289    
1290     closedirectory(dir);
1291     return rc;
1292 nigel 53 }
1293     }
1294    
1295 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1296     been requested. */
1297 nigel 53
1298 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1299    
1300     /* Control reaches here if we have a regular file, or if we have a directory
1301     and recursion or skipping was not requested, or if we have anything else and
1302     skipping was not requested. The scan proceeds. If this is the first and only
1303     argument at top level, we don't show the file name, unless we are only showing
1304     the file name, or the filename was forced (-H). */
1305    
1306 nigel 77 in = fopen(pathname, "r");
1307 nigel 53 if (in == NULL)
1308     {
1309 nigel 77 if (!silent)
1310     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1311     strerror(errno));
1312 nigel 53 return 2;
1313     }
1314    
1315 nigel 87 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1316     (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1317 nigel 77
1318 nigel 53 fclose(in);
1319     return rc;
1320     }
1321    
1322    
1323    
1324    
1325     /*************************************************
1326 nigel 49 * Usage function *
1327     *************************************************/
1328    
1329     static int
1330     usage(int rc)
1331     {
1332 nigel 87 option_item *op;
1333     fprintf(stderr, "Usage: pcregrep [-");
1334     for (op = optionlist; op->one_char != 0; op++)
1335     {
1336     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1337     }
1338     fprintf(stderr, "] [long options] [pattern] [files]\n");
1339 nigel 53 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1340 nigel 49 return rc;
1341     }
1342    
1343    
1344    
1345    
1346     /*************************************************
1347 nigel 53 * Help function *
1348     *************************************************/
1349    
1350     static void
1351     help(void)
1352     {
1353     option_item *op;
1354    
1355 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1356 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1357 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1358     printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1359 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1360    
1361     printf("Options:\n");
1362    
1363     for (op = optionlist; op->one_char != 0; op++)
1364     {
1365     int n;
1366     char s[4];
1367     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1368     printf(" %s --%s%n", s, op->long_name, &n);
1369     n = 30 - n;
1370     if (n < 1) n = 1;
1371     printf("%.*s%s\n", n, " ", op->help_text);
1372     }
1373    
1374 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1375     printf("trailing white space is removed and blank lines are ignored.\n");
1376     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1377 nigel 53
1378 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1379 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1380     }
1381    
1382    
1383    
1384    
1385     /*************************************************
1386 nigel 77 * Handle a single-letter, no data option *
1387 nigel 53 *************************************************/
1388    
1389     static int
1390     handle_option(int letter, int options)
1391     {
1392     switch(letter)
1393     {
1394 nigel 87 case N_HELP: help(); exit(0);
1395 nigel 53 case 'c': count_only = TRUE; break;
1396 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1397     case 'H': filenames = FN_FORCE; break;
1398     case 'h': filenames = FN_NONE; break;
1399 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1400 nigel 87 case 'l': filenames = FN_ONLY; break;
1401     case 'L': filenames = FN_NOMATCH_ONLY; break;
1402 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1403 nigel 53 case 'n': number = TRUE; break;
1404 nigel 87 case 'o': only_matching = TRUE; break;
1405 nigel 77 case 'q': quiet = TRUE; break;
1406 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1407 nigel 53 case 's': silent = TRUE; break;
1408 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1409 nigel 53 case 'v': invert = TRUE; break;
1410 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1411     case 'x': process_options |= PO_LINE_MATCH; break;
1412 nigel 53
1413     case 'V':
1414 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1415 nigel 53 exit(0);
1416     break;
1417    
1418     default:
1419     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1420     exit(usage(2));
1421     }
1422    
1423     return options;
1424     }
1425    
1426    
1427    
1428    
1429     /*************************************************
1430 nigel 87 * Construct printed ordinal *
1431     *************************************************/
1432    
1433     /* This turns a number into "1st", "3rd", etc. */
1434    
1435     static char *
1436     ordin(int n)
1437     {
1438     static char buffer[8];
1439     char *p = buffer;
1440     sprintf(p, "%d", n);
1441     while (*p != 0) p++;
1442     switch (n%10)
1443     {
1444     case 1: strcpy(p, "st"); break;
1445     case 2: strcpy(p, "nd"); break;
1446     case 3: strcpy(p, "rd"); break;
1447     default: strcpy(p, "th"); break;
1448     }
1449     return buffer;
1450     }
1451    
1452    
1453    
1454     /*************************************************
1455     * Compile a single pattern *
1456     *************************************************/
1457    
1458     /* When the -F option has been used, this is called for each substring.
1459     Otherwise it's called for each supplied pattern.
1460    
1461     Arguments:
1462     pattern the pattern string
1463     options the PCRE options
1464     filename the file name, or NULL for a command-line pattern
1465     count 0 if this is the only command line pattern, or
1466     number of the command line pattern, or
1467     linenumber for a pattern from a file
1468    
1469     Returns: TRUE on success, FALSE after an error
1470     */
1471    
1472     static BOOL
1473     compile_single_pattern(char *pattern, int options, char *filename, int count)
1474     {
1475     char buffer[MBUFTHIRD + 16];
1476     const char *error;
1477     int errptr;
1478    
1479     if (pattern_count >= MAX_PATTERN_COUNT)
1480     {
1481     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1482     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1483     return FALSE;
1484     }
1485    
1486     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1487     suffix[process_options]);
1488     pattern_list[pattern_count] =
1489     pcre_compile(buffer, options, &error, &errptr, pcretables);
1490 ph10 142 if (pattern_list[pattern_count] != NULL)
1491 ph10 141 {
1492 ph10 142 pattern_count++;
1493 ph10 141 return TRUE;
1494 ph10 142 }
1495 nigel 87
1496     /* Handle compile errors */
1497    
1498     errptr -= (int)strlen(prefix[process_options]);
1499     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1500    
1501     if (filename == NULL)
1502     {
1503     if (count == 0)
1504     fprintf(stderr, "pcregrep: Error in command-line regex "
1505     "at offset %d: %s\n", errptr, error);
1506     else
1507     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1508     "at offset %d: %s\n", ordin(count), errptr, error);
1509     }
1510     else
1511     {
1512     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1513     "at offset %d: %s\n", count, filename, errptr, error);
1514     }
1515    
1516     return FALSE;
1517     }
1518    
1519    
1520    
1521     /*************************************************
1522     * Compile one supplied pattern *
1523     *************************************************/
1524    
1525     /* When the -F option has been used, each string may be a list of strings,
1526 nigel 91 separated by line breaks. They will be matched literally.
1527 nigel 87
1528     Arguments:
1529     pattern the pattern string
1530     options the PCRE options
1531     filename the file name, or NULL for a command-line pattern
1532     count 0 if this is the only command line pattern, or
1533     number of the command line pattern, or
1534     linenumber for a pattern from a file
1535    
1536     Returns: TRUE on success, FALSE after an error
1537     */
1538    
1539     static BOOL
1540     compile_pattern(char *pattern, int options, char *filename, int count)
1541     {
1542     if ((process_options & PO_FIXED_STRINGS) != 0)
1543     {
1544 nigel 93 char *eop = pattern + strlen(pattern);
1545 nigel 87 char buffer[MBUFTHIRD];
1546     for(;;)
1547     {
1548 nigel 93 int ellength;
1549     char *p = end_of_line(pattern, eop, &ellength);
1550     if (ellength == 0)
1551 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1552 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1553 nigel 93 pattern = p;
1554 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1555     return FALSE;
1556     }
1557     }
1558     else return compile_single_pattern(pattern, options, filename, count);
1559     }
1560    
1561    
1562    
1563     /*************************************************
1564 nigel 49 * Main program *
1565     *************************************************/
1566    
1567 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1568    
1569 nigel 49 int
1570     main(int argc, char **argv)
1571     {
1572 nigel 53 int i, j;
1573 nigel 49 int rc = 1;
1574 nigel 87 int pcre_options = 0;
1575     int cmd_pattern_count = 0;
1576 ph10 141 int hint_count = 0;
1577 nigel 49 int errptr;
1578 nigel 87 BOOL only_one_at_top;
1579     char *patterns[MAX_PATTERN_COUNT];
1580     const char *locale_from = "--locale";
1581 nigel 49 const char *error;
1582    
1583 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1584     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1585     */
1586 nigel 91
1587     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1588     switch(i)
1589     {
1590     default: newline = (char *)"lf"; break;
1591     case '\r': newline = (char *)"cr"; break;
1592     case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1593 nigel 93 case -1: newline = (char *)"any"; break;
1594 ph10 150 case -2: newline = (char *)"anycrlf"; break;
1595 nigel 91 }
1596    
1597 nigel 49 /* Process the options */
1598    
1599     for (i = 1; i < argc; i++)
1600     {
1601 nigel 77 option_item *op = NULL;
1602     char *option_data = (char *)""; /* default to keep compiler happy */
1603     BOOL longop;
1604     BOOL longopwasequals = FALSE;
1605    
1606 nigel 49 if (argv[i][0] != '-') break;
1607 nigel 53
1608 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1609 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1610 nigel 63
1611 nigel 77 if (argv[i][1] == 0)
1612     {
1613 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1614 nigel 77 else exit(usage(2));
1615     }
1616 nigel 63
1617 nigel 77 /* Handle a long name option, or -- to terminate the options */
1618 nigel 53
1619     if (argv[i][1] == '-')
1620 nigel 49 {
1621 nigel 77 char *arg = argv[i] + 2;
1622     char *argequals = strchr(arg, '=');
1623 nigel 53
1624 nigel 77 if (*arg == 0) /* -- terminates options */
1625 nigel 49 {
1626 nigel 77 i++;
1627     break; /* out of the options-handling loop */
1628 nigel 53 }
1629 nigel 49
1630 nigel 77 longop = TRUE;
1631    
1632     /* Some long options have data that follows after =, for example file=name.
1633     Some options have variations in the long name spelling: specifically, we
1634     allow "regexp" because GNU grep allows it, though I personally go along
1635 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1636     These options are entered in the table as "regex(p)". No option is in both
1637     these categories, fortunately. */
1638 nigel 77
1639 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1640     {
1641 nigel 77 char *opbra = strchr(op->long_name, '(');
1642     char *equals = strchr(op->long_name, '=');
1643     if (opbra == NULL) /* Not a (p) case */
1644 nigel 53 {
1645 nigel 77 if (equals == NULL) /* Not thing=data case */
1646     {
1647     if (strcmp(arg, op->long_name) == 0) break;
1648     }
1649     else /* Special case xxx=data */
1650     {
1651     int oplen = equals - op->long_name;
1652 ph10 199 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1653 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1654     {
1655     option_data = arg + arglen;
1656     if (*option_data == '=')
1657     {
1658     option_data++;
1659     longopwasequals = TRUE;
1660     }
1661     break;
1662     }
1663     }
1664 nigel 53 }
1665 nigel 77 else /* Special case xxxx(p) */
1666     {
1667     char buff1[24];
1668     char buff2[24];
1669     int baselen = opbra - op->long_name;
1670     sprintf(buff1, "%.*s", baselen, op->long_name);
1671 ph10 152 sprintf(buff2, "%s%.*s", buff1,
1672 ph10 151 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1673 nigel 77 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1674     break;
1675     }
1676 nigel 53 }
1677 nigel 77
1678 nigel 53 if (op->one_char == 0)
1679     {
1680     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1681     exit(usage(2));
1682     }
1683     }
1684 nigel 49
1685 nigel 89
1686     /* Jeffrey Friedl's debugging harness uses these additional options which
1687     are not in the right form for putting in the option table because they use
1688     only one hyphen, yet are more than one character long. By putting them
1689     separately here, they will not get displayed as part of the help() output,
1690     but I don't think Jeffrey will care about that. */
1691    
1692     #ifdef JFRIEDL_DEBUG
1693     else if (strcmp(argv[i], "-pre") == 0) {
1694     jfriedl_prefix = argv[++i];
1695     continue;
1696     } else if (strcmp(argv[i], "-post") == 0) {
1697     jfriedl_postfix = argv[++i];
1698     continue;
1699     } else if (strcmp(argv[i], "-XT") == 0) {
1700     sscanf(argv[++i], "%d", &jfriedl_XT);
1701     continue;
1702     } else if (strcmp(argv[i], "-XR") == 0) {
1703     sscanf(argv[++i], "%d", &jfriedl_XR);
1704     continue;
1705     }
1706     #endif
1707    
1708    
1709 nigel 77 /* One-char options; many that have no data may be in a single argument; we
1710     continue till we hit the last one or one that needs data. */
1711 nigel 53
1712     else
1713     {
1714     char *s = argv[i] + 1;
1715 nigel 77 longop = FALSE;
1716 nigel 53 while (*s != 0)
1717     {
1718 nigel 77 for (op = optionlist; op->one_char != 0; op++)
1719     { if (*s == op->one_char) break; }
1720     if (op->one_char == 0)
1721 nigel 53 {
1722 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1723     *s, argv[i]);
1724     exit(usage(2));
1725     }
1726     if (op->type != OP_NODATA || s[1] == 0)
1727     {
1728     option_data = s+1;
1729 nigel 53 break;
1730     }
1731 nigel 87 pcre_options = handle_option(*s++, pcre_options);
1732 nigel 49 }
1733     }
1734 nigel 77
1735 nigel 87 /* At this point we should have op pointing to a matched option. If the type
1736     is NO_DATA, it means that there is no data, and the option might set
1737     something in the PCRE options. */
1738 nigel 77
1739     if (op->type == OP_NODATA)
1740     {
1741 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
1742     continue;
1743     }
1744    
1745     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1746     either has a value or defaults to something. It cannot have data in a
1747     separate item. At the moment, the only such options are "colo(u)r" and
1748 nigel 89 Jeffrey Friedl's special -S debugging option. */
1749 nigel 87
1750     if (*option_data == 0 &&
1751     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1752     {
1753     switch (op->one_char)
1754 nigel 77 {
1755 nigel 87 case N_COLOUR:
1756     colour_option = (char *)"auto";
1757     break;
1758     #ifdef JFRIEDL_DEBUG
1759     case 'S':
1760     S_arg = 0;
1761     break;
1762     #endif
1763 nigel 77 }
1764 nigel 87 continue;
1765     }
1766 nigel 77
1767 nigel 87 /* Otherwise, find the data string for the option. */
1768    
1769     if (*option_data == 0)
1770     {
1771     if (i >= argc - 1 || longopwasequals)
1772 nigel 77 {
1773 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1774     exit(usage(2));
1775     }
1776     option_data = argv[++i];
1777     }
1778    
1779     /* If the option type is OP_PATLIST, it's the -e option, which can be called
1780     multiple times to create a list of patterns. */
1781    
1782     if (op->type == OP_PATLIST)
1783     {
1784     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1785     {
1786     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1787     MAX_PATTERN_COUNT);
1788     return 2;
1789     }
1790     patterns[cmd_pattern_count++] = option_data;
1791     }
1792    
1793     /* Otherwise, deal with single string or numeric data values. */
1794    
1795     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1796     {
1797     *((char **)op->dataptr) = option_data;
1798     }
1799     else
1800     {
1801     char *endptr;
1802     int n = strtoul(option_data, &endptr, 10);
1803     if (*endptr != 0)
1804     {
1805     if (longop)
1806 nigel 77 {
1807 nigel 87 char *equals = strchr(op->long_name, '=');
1808     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1809     equals - op->long_name;
1810     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1811     option_data, nlen, op->long_name);
1812 nigel 77 }
1813 nigel 87 else
1814     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1815     option_data, op->one_char);
1816     exit(usage(2));
1817 nigel 77 }
1818 nigel 87 *((int *)op->dataptr) = n;
1819 nigel 77 }
1820 nigel 49 }
1821    
1822 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
1823     for -A and -B. */
1824    
1825     if (both_context > 0)
1826     {
1827     if (after_context == 0) after_context = both_context;
1828     if (before_context == 0) before_context = both_context;
1829     }
1830    
1831 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1832     LC_ALL environment variable is set, and if so, use it. */
1833 nigel 49
1834 nigel 87 if (locale == NULL)
1835 nigel 53 {
1836 nigel 87 locale = getenv("LC_ALL");
1837     locale_from = "LCC_ALL";
1838 nigel 53 }
1839 nigel 49
1840 nigel 87 if (locale == NULL)
1841     {
1842     locale = getenv("LC_CTYPE");
1843     locale_from = "LC_CTYPE";
1844     }
1845 nigel 49
1846 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
1847     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1848    
1849     if (locale != NULL)
1850 nigel 49 {
1851 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
1852 nigel 53 {
1853 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1854     locale, locale_from);
1855 nigel 53 return 2;
1856     }
1857 nigel 87 pcretables = pcre_maketables();
1858     }
1859 nigel 77
1860 nigel 87 /* Sort out colouring */
1861    
1862     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1863     {
1864     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1865     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1866     else
1867 nigel 53 {
1868 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1869     colour_option);
1870     return 2;
1871 nigel 77 }
1872 nigel 87 if (do_colour)
1873 nigel 77 {
1874 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
1875     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1876     if (cs != NULL) colour_string = cs;
1877 nigel 77 }
1878 nigel 87 }
1879 nigel 77
1880 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
1881    
1882     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1883     {
1884     pcre_options |= PCRE_NEWLINE_CR;
1885 nigel 93 endlinetype = EL_CR;
1886 nigel 91 }
1887     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1888     {
1889     pcre_options |= PCRE_NEWLINE_LF;
1890 nigel 93 endlinetype = EL_LF;
1891 nigel 91 }
1892     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1893     {
1894     pcre_options |= PCRE_NEWLINE_CRLF;
1895 nigel 93 endlinetype = EL_CRLF;
1896 nigel 91 }
1897 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1898     {
1899     pcre_options |= PCRE_NEWLINE_ANY;
1900     endlinetype = EL_ANY;
1901     }
1902 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1903     {
1904     pcre_options |= PCRE_NEWLINE_ANYCRLF;
1905     endlinetype = EL_ANYCRLF;
1906     }
1907 nigel 91 else
1908     {
1909     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1910     return 2;
1911     }
1912    
1913 nigel 87 /* Interpret the text values for -d and -D */
1914    
1915     if (dee_option != NULL)
1916     {
1917     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1918     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1919     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1920     else
1921 nigel 77 {
1922 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1923     return 2;
1924 nigel 53 }
1925 nigel 49 }
1926    
1927 nigel 87 if (DEE_option != NULL)
1928     {
1929     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1930     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1931     else
1932     {
1933     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1934     return 2;
1935     }
1936     }
1937 nigel 49
1938 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
1939 nigel 87
1940     #ifdef JFRIEDL_DEBUG
1941     if (S_arg > 9)
1942 nigel 49 {
1943 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
1944     return 2;
1945     }
1946 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1947     {
1948     if (jfriedl_XT == 0) jfriedl_XT = 1;
1949     if (jfriedl_XR == 0) jfriedl_XR = 1;
1950     }
1951 nigel 87 #endif
1952 nigel 77
1953 nigel 87 /* Get memory to store the pattern and hints lists. */
1954    
1955     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1956     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1957    
1958     if (pattern_list == NULL || hints_list == NULL)
1959     {
1960     fprintf(stderr, "pcregrep: malloc failed\n");
1961 ph10 123 goto EXIT2;
1962 nigel 87 }
1963    
1964     /* If no patterns were provided by -e, and there is no file provided by -f,
1965     the first argument is the one and only pattern, and it must exist. */
1966    
1967     if (cmd_pattern_count == 0 && pattern_filename == NULL)
1968     {
1969 nigel 63 if (i >= argc) return usage(2);
1970 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
1971     }
1972 nigel 77
1973 nigel 87 /* Compile the patterns that were provided on the command line, either by
1974     multiple uses of -e or as a single unkeyed pattern. */
1975    
1976     for (j = 0; j < cmd_pattern_count; j++)
1977     {
1978     if (!compile_pattern(patterns[j], pcre_options, NULL,
1979     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1980 ph10 123 goto EXIT2;
1981 nigel 87 }
1982    
1983     /* Compile the regular expressions that are provided in a file. */
1984    
1985     if (pattern_filename != NULL)
1986     {
1987     int linenumber = 0;
1988     FILE *f;
1989     char *filename;
1990     char buffer[MBUFTHIRD];
1991    
1992     if (strcmp(pattern_filename, "-") == 0)
1993 nigel 77 {
1994 nigel 87 f = stdin;
1995     filename = stdin_name;
1996 nigel 77 }
1997 nigel 87 else
1998 nigel 77 {
1999 nigel 87 f = fopen(pattern_filename, "r");
2000     if (f == NULL)
2001     {
2002     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2003     strerror(errno));
2004 ph10 123 goto EXIT2;
2005 nigel 87 }
2006     filename = pattern_filename;
2007 nigel 77 }
2008    
2009 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2010 nigel 53 {
2011 nigel 87 char *s = buffer + (int)strlen(buffer);
2012     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2013     *s = 0;
2014     linenumber++;
2015     if (buffer[0] == 0) continue; /* Skip blank lines */
2016     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2017 ph10 121 goto EXIT2;
2018 nigel 53 }
2019 nigel 87
2020     if (f != stdin) fclose(f);
2021 nigel 49 }
2022    
2023 nigel 77 /* Study the regular expressions, as we will be running them many times */
2024 nigel 53
2025     for (j = 0; j < pattern_count; j++)
2026     {
2027     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2028     if (error != NULL)
2029     {
2030     char s[16];
2031     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2032     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2033 ph10 121 goto EXIT2;
2034 nigel 53 }
2035 ph10 142 hint_count++;
2036 nigel 53 }
2037    
2038 nigel 77 /* If there are include or exclude patterns, compile them. */
2039    
2040     if (exclude_pattern != NULL)
2041     {
2042 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2043     pcretables);
2044 nigel 77 if (exclude_compiled == NULL)
2045     {
2046     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2047     errptr, error);
2048 ph10 121 goto EXIT2;
2049 nigel 77 }
2050     }
2051    
2052     if (include_pattern != NULL)
2053     {
2054 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2055     pcretables);
2056 nigel 77 if (include_compiled == NULL)
2057     {
2058     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2059     errptr, error);
2060 ph10 121 goto EXIT2;
2061 nigel 77 }
2062     }
2063    
2064 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2065 nigel 49
2066 nigel 87 if (i >= argc)
2067 ph10 121 {
2068     rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2069     goto EXIT;
2070 ph10 123 }
2071 nigel 49
2072 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2073     Pass in the fact that there is only one argument at top level - this suppresses
2074 nigel 87 the file name if the argument is not a directory and filenames are not
2075     otherwise forced. */
2076 nigel 49
2077 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2078 nigel 49
2079     for (; i < argc; i++)
2080     {
2081 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2082     only_one_at_top);
2083 nigel 77 if (frc > 1) rc = frc;
2084     else if (frc == 0 && rc == 1) rc = 0;
2085 nigel 49 }
2086    
2087 ph10 121 EXIT:
2088     if (pattern_list != NULL)
2089     {
2090 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2091 ph10 121 free(pattern_list);
2092 ph10 123 }
2093 ph10 121 if (hints_list != NULL)
2094     {
2095 ph10 141 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2096 ph10 121 free(hints_list);
2097 ph10 123 }
2098 nigel 49 return rc;
2099 ph10 121
2100     EXIT2:
2101     rc = 2;
2102     goto EXIT;
2103 nigel 49 }
2104    
2105 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12