/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 243 - (hide annotations) (download)
Thu Sep 13 09:28:14 2007 UTC (6 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 59494 byte(s)
Detrailed files for 7.4-RC1 test release.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 117 Copyright (c) 1997-2007 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 236 #include "pcre.h"
59 nigel 49
60     #define FALSE 0
61     #define TRUE 1
62    
63     typedef int BOOL;
64    
65 nigel 53 #define MAX_PATTERN_COUNT 100
66 nigel 49
67 nigel 77 #if BUFSIZ > 8192
68     #define MBUFTHIRD BUFSIZ
69     #else
70     #define MBUFTHIRD 8192
71     #endif
72 nigel 49
73 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
74     output. The order is important; it is assumed that a file name is wanted for
75     all values greater than FN_DEFAULT. */
76 nigel 77
77 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78    
79     /* Actions for the -d and -D options */
80    
81     enum { dee_READ, dee_SKIP, dee_RECURSE };
82     enum { DEE_READ, DEE_SKIP };
83    
84     /* Actions for special processing options (flag bits) */
85    
86     #define PO_WORD_MATCH 0x0001
87     #define PO_LINE_MATCH 0x0002
88     #define PO_FIXED_STRINGS 0x0004
89    
90 nigel 93 /* Line ending types */
91 nigel 87
92 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93 nigel 87
94 nigel 93
95    
96 nigel 49 /*************************************************
97     * Global variables *
98     *************************************************/
99    
100 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
101     regular code. */
102    
103     #ifdef JFRIEDL_DEBUG
104     static int S_arg = -1;
105 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107     static const char *jfriedl_prefix = "";
108     static const char *jfriedl_postfix = "";
109 nigel 87 #endif
110    
111 nigel 93 static int endlinetype;
112 nigel 91
113 nigel 87 static char *colour_string = (char *)"1;31";
114     static char *colour_option = NULL;
115     static char *dee_option = NULL;
116     static char *DEE_option = NULL;
117 nigel 91 static char *newline = NULL;
118 nigel 53 static char *pattern_filename = NULL;
119 nigel 77 static char *stdin_name = (char *)"(standard input)";
120 nigel 87 static char *locale = NULL;
121    
122     static const unsigned char *pcretables = NULL;
123    
124 nigel 53 static int pattern_count = 0;
125 ph10 121 static pcre **pattern_list = NULL;
126     static pcre_extra **hints_list = NULL;
127 nigel 49
128 nigel 77 static char *include_pattern = NULL;
129     static char *exclude_pattern = NULL;
130    
131     static pcre *include_compiled = NULL;
132     static pcre *exclude_compiled = NULL;
133    
134     static int after_context = 0;
135     static int before_context = 0;
136     static int both_context = 0;
137 nigel 87 static int dee_action = dee_READ;
138     static int DEE_action = DEE_READ;
139     static int error_count = 0;
140     static int filenames = FN_DEFAULT;
141     static int process_options = 0;
142 nigel 77
143 nigel 49 static BOOL count_only = FALSE;
144 nigel 87 static BOOL do_colour = FALSE;
145 nigel 77 static BOOL hyphenpending = FALSE;
146 nigel 49 static BOOL invert = FALSE;
147 nigel 77 static BOOL multiline = FALSE;
148 nigel 49 static BOOL number = FALSE;
149 nigel 87 static BOOL only_matching = FALSE;
150 nigel 77 static BOOL quiet = FALSE;
151 nigel 49 static BOOL silent = FALSE;
152 nigel 93 static BOOL utf8 = FALSE;
153 nigel 49
154 nigel 53 /* Structure for options and list of them */
155 nigel 49
156 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
157     OP_PATLIST };
158 nigel 77
159 nigel 53 typedef struct option_item {
160 nigel 77 int type;
161 nigel 53 int one_char;
162 nigel 77 void *dataptr;
163 nigel 67 const char *long_name;
164     const char *help_text;
165 nigel 53 } option_item;
166 nigel 49
167 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
168     used to identify them. */
169    
170     #define N_COLOUR (-1)
171     #define N_EXCLUDE (-2)
172     #define N_HELP (-3)
173     #define N_INCLUDE (-4)
174     #define N_LABEL (-5)
175     #define N_LOCALE (-6)
176     #define N_NULL (-7)
177    
178 nigel 53 static option_item optionlist[] = {
179 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
180     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
181     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
182     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
183     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
184     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
185     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
186     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
187     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
188     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
189     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
190     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
191     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
192     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
193     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
194     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
195     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
196     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
197     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
198     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
199     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
200 ph10 149 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
202     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
203     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
204     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
205     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
206     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
207     #ifdef JFRIEDL_DEBUG
208     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
209     #endif
210     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
211     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
212     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
213     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
214     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
215     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
216     { OP_NODATA, 0, NULL, NULL, NULL }
217 nigel 53 };
218    
219 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
220     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
221     that the combination of -w and -x has the same effect as -x on its own, so we
222     can treat them as the same. */
223 nigel 53
224 nigel 87 static const char *prefix[] = {
225     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
226    
227     static const char *suffix[] = {
228     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
229    
230 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
231 nigel 87
232 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233 nigel 87
234 nigel 93 const char utf8_table4[] = {
235     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
237     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
238     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
239    
240    
241    
242 nigel 53 /*************************************************
243 nigel 87 * OS-specific functions *
244 nigel 53 *************************************************/
245    
246     /* These functions are defined so that they can be made system specific,
247 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
248 nigel 53
249    
250     /************* Directory scanning in Unix ***********/
251    
252 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253 nigel 53 #include <sys/types.h>
254     #include <sys/stat.h>
255     #include <dirent.h>
256    
257     typedef DIR directory_type;
258    
259 nigel 67 static int
260 nigel 53 isdirectory(char *filename)
261     {
262     struct stat statbuf;
263     if (stat(filename, &statbuf) < 0)
264     return 0; /* In the expectation that opening as a file will fail */
265     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
266     }
267    
268 nigel 67 static directory_type *
269 nigel 53 opendirectory(char *filename)
270     {
271     return opendir(filename);
272     }
273    
274 nigel 67 static char *
275 nigel 53 readdirectory(directory_type *dir)
276     {
277     for (;;)
278     {
279     struct dirent *dent = readdir(dir);
280     if (dent == NULL) return NULL;
281     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282     return dent->d_name;
283     }
284 ph10 151 /* Control never reaches here */
285 nigel 53 }
286    
287 nigel 67 static void
288 nigel 53 closedirectory(directory_type *dir)
289     {
290     closedir(dir);
291     }
292    
293    
294 nigel 87 /************* Test for regular file in Unix **********/
295    
296     static int
297     isregfile(char *filename)
298     {
299     struct stat statbuf;
300     if (stat(filename, &statbuf) < 0)
301     return 1; /* In the expectation that opening as a file will fail */
302     return (statbuf.st_mode & S_IFMT) == S_IFREG;
303     }
304    
305    
306     /************* Test stdout for being a terminal in Unix **********/
307    
308     static BOOL
309     is_stdout_tty(void)
310     {
311     return isatty(fileno(stdout));
312     }
313    
314    
315 nigel 63 /************* Directory scanning in Win32 ***********/
316 nigel 53
317 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
318 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
319     when it did not exist. */
320 nigel 53
321 nigel 63
322 ph10 97 #elif HAVE_WINDOWS_H
323 nigel 63
324     #ifndef STRICT
325     # define STRICT
326     #endif
327     #ifndef WIN32_LEAN_AND_MEAN
328     # define WIN32_LEAN_AND_MEAN
329     #endif
330 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
331     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
332     #endif
333    
334 nigel 63 #include <windows.h>
335    
336     typedef struct directory_type
337     {
338     HANDLE handle;
339     BOOL first;
340     WIN32_FIND_DATA data;
341     } directory_type;
342    
343     int
344     isdirectory(char *filename)
345     {
346     DWORD attr = GetFileAttributes(filename);
347     if (attr == INVALID_FILE_ATTRIBUTES)
348     return 0;
349     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
350     }
351    
352     directory_type *
353     opendirectory(char *filename)
354     {
355     size_t len;
356     char *pattern;
357     directory_type *dir;
358     DWORD err;
359     len = strlen(filename);
360     pattern = (char *) malloc(len + 3);
361     dir = (directory_type *) malloc(sizeof(*dir));
362     if ((pattern == NULL) || (dir == NULL))
363     {
364     fprintf(stderr, "pcregrep: malloc failed\n");
365     exit(2);
366     }
367     memcpy(pattern, filename, len);
368     memcpy(&(pattern[len]), "\\*", 3);
369     dir->handle = FindFirstFile(pattern, &(dir->data));
370     if (dir->handle != INVALID_HANDLE_VALUE)
371     {
372     free(pattern);
373     dir->first = TRUE;
374     return dir;
375     }
376     err = GetLastError();
377     free(pattern);
378     free(dir);
379     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
380     return NULL;
381     }
382    
383     char *
384     readdirectory(directory_type *dir)
385     {
386     for (;;)
387     {
388     if (!dir->first)
389     {
390     if (!FindNextFile(dir->handle, &(dir->data)))
391     return NULL;
392     }
393     else
394     {
395     dir->first = FALSE;
396     }
397     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
398     return dir->data.cFileName;
399     }
400     #ifndef _MSC_VER
401     return NULL; /* Keep compiler happy; never executed */
402     #endif
403     }
404    
405     void
406     closedirectory(directory_type *dir)
407     {
408     FindClose(dir->handle);
409     free(dir);
410     }
411    
412    
413 nigel 87 /************* Test for regular file in Win32 **********/
414    
415     /* I don't know how to do this, or if it can be done; assume all paths are
416     regular if they are not directories. */
417    
418     int isregfile(char *filename)
419     {
420     return !isdirectory(filename)
421     }
422    
423    
424     /************* Test stdout for being a terminal in Win32 **********/
425    
426     /* I don't know how to do this; assume never */
427    
428     static BOOL
429     is_stdout_tty(void)
430     {
431     FALSE;
432     }
433    
434    
435 nigel 53 /************* Directory scanning when we can't do it ***********/
436    
437     /* The type is void, and apart from isdirectory(), the functions do nothing. */
438    
439 nigel 63 #else
440    
441 nigel 53 typedef void directory_type;
442    
443 nigel 87 int isdirectory(char *filename) { return 0; }
444 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445     char *readdirectory(directory_type *dir) { return (char*)0;}
446 nigel 53 void closedirectory(directory_type *dir) {}
447    
448 nigel 87
449     /************* Test for regular when we can't do it **********/
450    
451     /* Assume all files are regular. */
452    
453     int isregfile(char *filename) { return 1; }
454    
455    
456     /************* Test stdout for being a terminal when we can't do it **********/
457    
458     static BOOL
459     is_stdout_tty(void)
460     {
461     return FALSE;
462     }
463    
464    
465 nigel 53 #endif
466    
467    
468    
469 ph10 137 #ifndef HAVE_STRERROR
470 nigel 49 /*************************************************
471     * Provide strerror() for non-ANSI libraries *
472     *************************************************/
473    
474     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
475     in their libraries, but can provide the same facility by this simple
476     alternative function. */
477    
478     extern int sys_nerr;
479     extern char *sys_errlist[];
480    
481     char *
482     strerror(int n)
483     {
484     if (n < 0 || n >= sys_nerr) return "unknown error number";
485     return sys_errlist[n];
486     }
487     #endif /* HAVE_STRERROR */
488    
489    
490    
491     /*************************************************
492 nigel 93 * Find end of line *
493     *************************************************/
494    
495     /* The length of the endline sequence that is found is set via lenptr. This may
496     be zero at the very end of the file if there is no line-ending sequence there.
497    
498     Arguments:
499     p current position in line
500     endptr end of available data
501     lenptr where to put the length of the eol sequence
502    
503     Returns: pointer to the last byte of the line
504     */
505    
506     static char *
507     end_of_line(char *p, char *endptr, int *lenptr)
508     {
509     switch(endlinetype)
510     {
511     default: /* Just in case */
512     case EL_LF:
513     while (p < endptr && *p != '\n') p++;
514     if (p < endptr)
515     {
516     *lenptr = 1;
517     return p + 1;
518     }
519     *lenptr = 0;
520     return endptr;
521    
522     case EL_CR:
523     while (p < endptr && *p != '\r') p++;
524     if (p < endptr)
525     {
526     *lenptr = 1;
527     return p + 1;
528     }
529     *lenptr = 0;
530     return endptr;
531    
532     case EL_CRLF:
533     for (;;)
534     {
535     while (p < endptr && *p != '\r') p++;
536     if (++p >= endptr)
537     {
538     *lenptr = 0;
539     return endptr;
540     }
541     if (*p == '\n')
542     {
543     *lenptr = 2;
544     return p + 1;
545     }
546     }
547     break;
548    
549 ph10 149 case EL_ANYCRLF:
550     while (p < endptr)
551     {
552     int extra = 0;
553     register int c = *((unsigned char *)p);
554    
555     if (utf8 && c >= 0xc0)
556     {
557     int gcii, gcss;
558     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
559     gcss = 6*extra;
560     c = (c & utf8_table3[extra]) << gcss;
561     for (gcii = 1; gcii <= extra; gcii++)
562     {
563     gcss -= 6;
564     c |= (p[gcii] & 0x3f) << gcss;
565     }
566     }
567    
568     p += 1 + extra;
569    
570     switch (c)
571     {
572     case 0x0a: /* LF */
573     *lenptr = 1;
574     return p;
575    
576     case 0x0d: /* CR */
577     if (p < endptr && *p == 0x0a)
578     {
579     *lenptr = 2;
580     p++;
581     }
582     else *lenptr = 1;
583     return p;
584 ph10 150
585 ph10 149 default:
586     break;
587     }
588     } /* End of loop for ANYCRLF case */
589 ph10 150
590 ph10 149 *lenptr = 0; /* Must have hit the end */
591     return endptr;
592    
593 nigel 93 case EL_ANY:
594     while (p < endptr)
595     {
596     int extra = 0;
597     register int c = *((unsigned char *)p);
598    
599     if (utf8 && c >= 0xc0)
600     {
601     int gcii, gcss;
602     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
603     gcss = 6*extra;
604     c = (c & utf8_table3[extra]) << gcss;
605     for (gcii = 1; gcii <= extra; gcii++)
606     {
607     gcss -= 6;
608     c |= (p[gcii] & 0x3f) << gcss;
609     }
610     }
611    
612     p += 1 + extra;
613    
614     switch (c)
615     {
616     case 0x0a: /* LF */
617     case 0x0b: /* VT */
618     case 0x0c: /* FF */
619     *lenptr = 1;
620     return p;
621    
622     case 0x0d: /* CR */
623     if (p < endptr && *p == 0x0a)
624     {
625     *lenptr = 2;
626     p++;
627     }
628     else *lenptr = 1;
629     return p;
630    
631     case 0x85: /* NEL */
632     *lenptr = utf8? 2 : 1;
633     return p;
634    
635     case 0x2028: /* LS */
636     case 0x2029: /* PS */
637     *lenptr = 3;
638     return p;
639    
640     default:
641     break;
642     }
643     } /* End of loop for ANY case */
644    
645     *lenptr = 0; /* Must have hit the end */
646     return endptr;
647     } /* End of overall switch */
648     }
649    
650    
651    
652     /*************************************************
653     * Find start of previous line *
654     *************************************************/
655    
656     /* This is called when looking back for before lines to print.
657    
658     Arguments:
659     p start of the subsequent line
660     startptr start of available data
661    
662     Returns: pointer to the start of the previous line
663     */
664    
665     static char *
666     previous_line(char *p, char *startptr)
667     {
668     switch(endlinetype)
669     {
670     default: /* Just in case */
671     case EL_LF:
672     p--;
673     while (p > startptr && p[-1] != '\n') p--;
674     return p;
675    
676     case EL_CR:
677     p--;
678     while (p > startptr && p[-1] != '\n') p--;
679     return p;
680    
681     case EL_CRLF:
682     for (;;)
683     {
684     p -= 2;
685     while (p > startptr && p[-1] != '\n') p--;
686     if (p <= startptr + 1 || p[-2] == '\r') return p;
687     }
688     return p; /* But control should never get here */
689    
690     case EL_ANY:
691 ph10 150 case EL_ANYCRLF:
692 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693     if (utf8) while ((*p & 0xc0) == 0x80) p--;
694    
695     while (p > startptr)
696     {
697     register int c;
698     char *pp = p - 1;
699    
700     if (utf8)
701     {
702     int extra = 0;
703     while ((*pp & 0xc0) == 0x80) pp--;
704     c = *((unsigned char *)pp);
705     if (c >= 0xc0)
706     {
707     int gcii, gcss;
708     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
709     gcss = 6*extra;
710     c = (c & utf8_table3[extra]) << gcss;
711     for (gcii = 1; gcii <= extra; gcii++)
712     {
713     gcss -= 6;
714     c |= (pp[gcii] & 0x3f) << gcss;
715     }
716     }
717     }
718     else c = *((unsigned char *)pp);
719    
720 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
721 nigel 93 {
722     case 0x0a: /* LF */
723 ph10 149 case 0x0d: /* CR */
724     return p;
725 ph10 150
726 ph10 149 default:
727     break;
728 ph10 150 }
729 ph10 149
730     else switch (c)
731     {
732     case 0x0a: /* LF */
733 nigel 93 case 0x0b: /* VT */
734     case 0x0c: /* FF */
735     case 0x0d: /* CR */
736     case 0x85: /* NEL */
737     case 0x2028: /* LS */
738     case 0x2029: /* PS */
739     return p;
740    
741     default:
742     break;
743     }
744    
745     p = pp; /* Back one character */
746     } /* End of loop for ANY case */
747    
748     return startptr; /* Hit start of data */
749     } /* End of overall switch */
750     }
751    
752    
753    
754    
755    
756     /*************************************************
757 nigel 77 * Print the previous "after" lines *
758 nigel 49 *************************************************/
759    
760 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
761 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
762     that a binary zero does not terminate it.
763 nigel 77
764     Arguments:
765     lastmatchnumber the number of the last matching line, plus one
766     lastmatchrestart where we restarted after the last match
767     endptr end of available data
768     printname filename for printing
769    
770     Returns: nothing
771     */
772    
773     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
774     char *endptr, char *printname)
775     {
776     if (after_context > 0 && lastmatchnumber > 0)
777     {
778     int count = 0;
779     while (lastmatchrestart < endptr && count++ < after_context)
780     {
781 nigel 93 int ellength;
782 nigel 77 char *pp = lastmatchrestart;
783     if (printname != NULL) fprintf(stdout, "%s-", printname);
784     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
785 nigel 93 pp = end_of_line(pp, endptr, &ellength);
786     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
787     lastmatchrestart = pp;
788 nigel 77 }
789     hyphenpending = TRUE;
790     }
791     }
792    
793    
794    
795     /*************************************************
796     * Grep an individual file *
797     *************************************************/
798    
799     /* This is called from grep_or_recurse() below. It uses a buffer that is three
800     times the value of MBUFTHIRD. The matching point is never allowed to stray into
801     the top third of the buffer, thus keeping more of the file available for
802     context printing or for multiline scanning. For large files, the pointer will
803     be in the middle third most of the time, so the bottom third is available for
804     "before" context printing.
805    
806     Arguments:
807     in the fopened FILE stream
808     printname the file name if it is to be printed for each match
809     or NULL if the file name is not to be printed
810     it cannot be NULL if filenames[_nomatch]_only is set
811    
812     Returns: 0 if there was at least one match
813     1 otherwise (no matches)
814     */
815    
816 nigel 49 static int
817 nigel 77 pcregrep(FILE *in, char *printname)
818 nigel 49 {
819     int rc = 1;
820 nigel 77 int linenumber = 1;
821     int lastmatchnumber = 0;
822 nigel 49 int count = 0;
823     int offsets[99];
824 nigel 77 char *lastmatchrestart = NULL;
825     char buffer[3*MBUFTHIRD];
826     char *ptr = buffer;
827     char *endptr;
828     size_t bufflength;
829     BOOL endhyphenpending = FALSE;
830 nigel 49
831 nigel 77 /* Do the first read into the start of the buffer and set up the pointer to
832     end of what we have. */
833    
834     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
835     endptr = buffer + bufflength;
836    
837     /* Loop while the current pointer is not at the end of the file. For large
838     files, endptr will be at the end of the buffer when we are in the middle of the
839     file, but ptr will never get there, because as soon as it gets over 2/3 of the
840     way, the buffer is shifted left and re-filled. */
841    
842     while (ptr < endptr)
843 nigel 49 {
844 nigel 93 int i, endlinelength;
845 nigel 87 int mrc = 0;
846 nigel 53 BOOL match = FALSE;
847 nigel 77 char *t = ptr;
848     size_t length, linelength;
849 nigel 49
850 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
851     of the subject string to pass to pcre_exec(). In multiline mode, it is the
852     length remainder of the data in the buffer. Otherwise, it is the length of
853     the next line. After matching, we always advance by the length of the next
854     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
855     that any match is constrained to be in the first line. */
856    
857 nigel 93 t = end_of_line(t, endptr, &endlinelength);
858     linelength = t - ptr - endlinelength;
859 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
860 nigel 77
861 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
862    
863     #ifdef JFRIEDL_DEBUG
864     if (jfriedl_XT || jfriedl_XR)
865     {
866     #include <sys/time.h>
867     #include <time.h>
868     struct timeval start_time, end_time;
869     struct timezone dummy;
870    
871     if (jfriedl_XT)
872     {
873     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
874     const char *orig = ptr;
875     ptr = malloc(newlen + 1);
876     if (!ptr) {
877     printf("out of memory");
878     exit(2);
879     }
880     endptr = ptr;
881     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
882     for (i = 0; i < jfriedl_XT; i++) {
883     strncpy(endptr, orig, length);
884     endptr += length;
885     }
886     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
887     length = newlen;
888     }
889    
890     if (gettimeofday(&start_time, &dummy) != 0)
891     perror("bad gettimeofday");
892    
893    
894     for (i = 0; i < jfriedl_XR; i++)
895     match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
896    
897     if (gettimeofday(&end_time, &dummy) != 0)
898     perror("bad gettimeofday");
899    
900     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
901     -
902     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
903    
904     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
905     return 0;
906     }
907     #endif
908    
909    
910 nigel 77 /* Run through all the patterns until one matches. Note that we don't include
911     the final newline in the subject string. */
912    
913 nigel 87 for (i = 0; i < pattern_count; i++)
914 nigel 53 {
915 nigel 87 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
916     offsets, 99);
917     if (mrc >= 0) { match = TRUE; break; }
918     if (mrc != PCRE_ERROR_NOMATCH)
919     {
920     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
921     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
922     fprintf(stderr, "this line:\n");
923     fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
924     fprintf(stderr, "\n");
925     if (error_count == 0 &&
926     (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
927     {
928     fprintf(stderr, "pcregrep: error %d means that a resource limit "
929     "was exceeded\n", mrc);
930     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
931     }
932     if (error_count++ > 20)
933     {
934     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
935     exit(2);
936     }
937     match = invert; /* No more matching; don't show the line again */
938     break;
939     }
940 nigel 53 }
941 nigel 49
942 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
943 nigel 77
944 nigel 49 if (match != invert)
945     {
946 nigel 77 BOOL hyphenprinted = FALSE;
947    
948 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
949 nigel 77
950 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
951    
952     /* Just count if just counting is wanted. */
953    
954 nigel 49 if (count_only) count++;
955    
956 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
957     in the file. */
958    
959     else if (filenames == FN_ONLY)
960 nigel 49 {
961 nigel 77 fprintf(stdout, "%s\n", printname);
962 nigel 49 return 0;
963     }
964    
965 nigel 87 /* Likewise, if all we want is a yes/no answer. */
966    
967 nigel 77 else if (quiet) return 0;
968 nigel 49
969 nigel 87 /* The --only-matching option prints just the substring that matched, and
970     does not pring any context. */
971    
972     else if (only_matching)
973     {
974     if (printname != NULL) fprintf(stdout, "%s:", printname);
975     if (number) fprintf(stdout, "%d:", linenumber);
976     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
977     fprintf(stdout, "\n");
978     }
979    
980     /* This is the default case when none of the above options is set. We print
981     the matching lines(s), possibly preceded and/or followed by other lines of
982     context. */
983    
984 nigel 49 else
985     {
986 nigel 77 /* See if there is a requirement to print some "after" lines from a
987     previous match. We never print any overlaps. */
988    
989     if (after_context > 0 && lastmatchnumber > 0)
990     {
991 nigel 93 int ellength;
992 nigel 77 int linecount = 0;
993     char *p = lastmatchrestart;
994    
995     while (p < ptr && linecount < after_context)
996     {
997 nigel 93 p = end_of_line(p, ptr, &ellength);
998 nigel 77 linecount++;
999     }
1000    
1001     /* It is important to advance lastmatchrestart during this printing so
1002 nigel 87 that it interacts correctly with any "before" printing below. Print
1003     each line's data using fwrite() in case there are binary zeroes. */
1004 nigel 77
1005     while (lastmatchrestart < p)
1006     {
1007     char *pp = lastmatchrestart;
1008     if (printname != NULL) fprintf(stdout, "%s-", printname);
1009     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1010 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1011     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1012     lastmatchrestart = pp;
1013 nigel 77 }
1014     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1015     }
1016    
1017     /* If there were non-contiguous lines printed above, insert hyphens. */
1018    
1019     if (hyphenpending)
1020     {
1021     fprintf(stdout, "--\n");
1022     hyphenpending = FALSE;
1023     hyphenprinted = TRUE;
1024     }
1025    
1026     /* See if there is a requirement to print some "before" lines for this
1027     match. Again, don't print overlaps. */
1028    
1029     if (before_context > 0)
1030     {
1031     int linecount = 0;
1032     char *p = ptr;
1033    
1034     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1035 nigel 87 linecount < before_context)
1036 nigel 77 {
1037 nigel 87 linecount++;
1038 nigel 93 p = previous_line(p, buffer);
1039 nigel 77 }
1040    
1041     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1042     fprintf(stdout, "--\n");
1043    
1044     while (p < ptr)
1045     {
1046 nigel 93 int ellength;
1047 nigel 77 char *pp = p;
1048     if (printname != NULL) fprintf(stdout, "%s-", printname);
1049     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1050 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1051     fwrite(p, 1, pp - p, stdout);
1052     p = pp;
1053 nigel 77 }
1054     }
1055    
1056     /* Now print the matching line(s); ensure we set hyphenpending at the end
1057 nigel 85 of the file if any context lines are being output. */
1058 nigel 77
1059 nigel 85 if (after_context > 0 || before_context > 0)
1060     endhyphenpending = TRUE;
1061    
1062 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1063 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1064 nigel 77
1065     /* In multiline mode, we want to print to the end of the line in which
1066     the end of the matched string is found, so we adjust linelength and the
1067 ph10 222 line number appropriately, but only when there actually was a match
1068     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1069     the match will always be before the first newline sequence. */
1070 nigel 77
1071     if (multiline)
1072     {
1073 nigel 93 int ellength;
1074 ph10 222 char *endmatch = ptr;
1075     if (!invert)
1076 nigel 93 {
1077 ph10 222 endmatch += offsets[1];
1078     t = ptr;
1079     while (t < endmatch)
1080     {
1081     t = end_of_line(t, endptr, &ellength);
1082     if (t <= endmatch) linenumber++; else break;
1083     }
1084 nigel 93 }
1085     endmatch = end_of_line(endmatch, endptr, &ellength);
1086     linelength = endmatch - ptr - ellength;
1087 nigel 77 }
1088    
1089 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1090     zeroes are treated as just another data character. */
1091    
1092     /* This extra option, for Jeffrey Friedl's debugging requirements,
1093     replaces the matched string, or a specific captured string if it exists,
1094     with X. When this happens, colouring is ignored. */
1095    
1096     #ifdef JFRIEDL_DEBUG
1097     if (S_arg >= 0 && S_arg < mrc)
1098     {
1099     int first = S_arg * 2;
1100     int last = first + 1;
1101     fwrite(ptr, 1, offsets[first], stdout);
1102     fprintf(stdout, "X");
1103     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1104     }
1105     else
1106     #endif
1107    
1108     /* We have to split the line(s) up if colouring. */
1109    
1110     if (do_colour)
1111     {
1112     fwrite(ptr, 1, offsets[0], stdout);
1113     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1114     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1115     fprintf(stdout, "%c[00m", 0x1b);
1116 ph10 243 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1117 ph10 239 stdout);
1118 nigel 87 }
1119 nigel 93 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1120 nigel 49 }
1121    
1122 nigel 87 /* End of doing what has to be done for a match */
1123    
1124 nigel 77 rc = 0; /* Had some success */
1125    
1126     /* Remember where the last match happened for after_context. We remember
1127     where we are about to restart, and that line's number. */
1128    
1129 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1130 nigel 77 lastmatchnumber = linenumber + 1;
1131 nigel 49 }
1132 nigel 77
1133 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1134     anything to be printed), we have to move on to the end of the match before
1135     proceeding. */
1136    
1137     if (multiline && invert && match)
1138     {
1139     int ellength;
1140     char *endmatch = ptr + offsets[1];
1141     t = ptr;
1142     while (t < endmatch)
1143     {
1144     t = end_of_line(t, endptr, &ellength);
1145     if (t <= endmatch) linenumber++; else break;
1146     }
1147     endmatch = end_of_line(endmatch, endptr, &ellength);
1148     linelength = endmatch - ptr - ellength;
1149     }
1150    
1151 nigel 77 /* Advance to after the newline and increment the line number. */
1152    
1153 nigel 93 ptr += linelength + endlinelength;
1154 nigel 77 linenumber++;
1155    
1156     /* If we haven't yet reached the end of the file (the buffer is full), and
1157     the current point is in the top 1/3 of the buffer, slide the buffer down by
1158     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1159     about to be lost, print them. */
1160    
1161     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1162     {
1163     if (after_context > 0 &&
1164     lastmatchnumber > 0 &&
1165     lastmatchrestart < buffer + MBUFTHIRD)
1166     {
1167     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1168     lastmatchnumber = 0;
1169     }
1170    
1171     /* Now do the shuffle */
1172    
1173     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1174     ptr -= MBUFTHIRD;
1175     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1176     endptr = buffer + bufflength;
1177    
1178     /* Adjust any last match point */
1179    
1180     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1181     }
1182     } /* Loop through the whole file */
1183    
1184     /* End of file; print final "after" lines if wanted; do_after_lines sets
1185     hyphenpending if it prints something. */
1186    
1187 nigel 87 if (!only_matching && !count_only)
1188     {
1189     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1190     hyphenpending |= endhyphenpending;
1191     }
1192 nigel 77
1193     /* Print the file name if we are looking for those without matches and there
1194     were none. If we found a match, we won't have got this far. */
1195    
1196 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1197 nigel 77 {
1198     fprintf(stdout, "%s\n", printname);
1199     return 0;
1200 nigel 49 }
1201    
1202 nigel 77 /* Print the match count if wanted */
1203    
1204 nigel 49 if (count_only)
1205     {
1206 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1207 nigel 49 fprintf(stdout, "%d\n", count);
1208     }
1209    
1210     return rc;
1211     }
1212    
1213    
1214    
1215     /*************************************************
1216 nigel 53 * Grep a file or recurse into a directory *
1217     *************************************************/
1218    
1219 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1220     recursing; if it's a file, grep it.
1221    
1222     Arguments:
1223     pathname the path to investigate
1224 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1225 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1226    
1227     Returns: 0 if there was at least one match
1228     1 if there were no matches
1229     2 there was some kind of error
1230    
1231     However, file opening failures are suppressed if "silent" is set.
1232     */
1233    
1234 nigel 53 static int
1235 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1236 nigel 53 {
1237     int rc = 1;
1238     int sep;
1239     FILE *in;
1240    
1241 nigel 77 /* If the file name is "-" we scan stdin */
1242 nigel 53
1243 nigel 77 if (strcmp(pathname, "-") == 0)
1244 nigel 53 {
1245 nigel 77 return pcregrep(stdin,
1246 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1247 nigel 77 stdin_name : NULL);
1248     }
1249    
1250    
1251 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1252     each file within it, subject to any include or exclude patterns that were set.
1253     The scanning code is localized so it can be made system-specific. */
1254    
1255     if ((sep = isdirectory(pathname)) != 0)
1256 nigel 77 {
1257 nigel 87 if (dee_action == dee_SKIP) return 1;
1258     if (dee_action == dee_RECURSE)
1259 nigel 53 {
1260 nigel 87 char buffer[1024];
1261     char *nextfile;
1262     directory_type *dir = opendirectory(pathname);
1263 nigel 53
1264 nigel 87 if (dir == NULL)
1265     {
1266     if (!silent)
1267     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1268     strerror(errno));
1269     return 2;
1270     }
1271 nigel 77
1272 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1273     {
1274     int frc, blen;
1275     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1276     blen = strlen(buffer);
1277 nigel 77
1278 nigel 87 if (exclude_compiled != NULL &&
1279     pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1280     continue;
1281 nigel 77
1282 nigel 87 if (include_compiled != NULL &&
1283     pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1284     continue;
1285    
1286     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1287     if (frc > 1) rc = frc;
1288     else if (frc == 0 && rc == 1) rc = 0;
1289     }
1290    
1291     closedirectory(dir);
1292     return rc;
1293 nigel 53 }
1294     }
1295    
1296 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1297     been requested. */
1298 nigel 53
1299 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1300    
1301     /* Control reaches here if we have a regular file, or if we have a directory
1302     and recursion or skipping was not requested, or if we have anything else and
1303     skipping was not requested. The scan proceeds. If this is the first and only
1304     argument at top level, we don't show the file name, unless we are only showing
1305     the file name, or the filename was forced (-H). */
1306    
1307 nigel 77 in = fopen(pathname, "r");
1308 nigel 53 if (in == NULL)
1309     {
1310 nigel 77 if (!silent)
1311     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1312     strerror(errno));
1313 nigel 53 return 2;
1314     }
1315    
1316 nigel 87 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1317     (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1318 nigel 77
1319 nigel 53 fclose(in);
1320     return rc;
1321     }
1322    
1323    
1324    
1325    
1326     /*************************************************
1327 nigel 49 * Usage function *
1328     *************************************************/
1329    
1330     static int
1331     usage(int rc)
1332     {
1333 nigel 87 option_item *op;
1334     fprintf(stderr, "Usage: pcregrep [-");
1335     for (op = optionlist; op->one_char != 0; op++)
1336     {
1337     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1338     }
1339     fprintf(stderr, "] [long options] [pattern] [files]\n");
1340 nigel 53 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1341 nigel 49 return rc;
1342     }
1343    
1344    
1345    
1346    
1347     /*************************************************
1348 nigel 53 * Help function *
1349     *************************************************/
1350    
1351     static void
1352     help(void)
1353     {
1354     option_item *op;
1355    
1356 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1357 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1358 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1359     printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1360 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1361    
1362     printf("Options:\n");
1363    
1364     for (op = optionlist; op->one_char != 0; op++)
1365     {
1366     int n;
1367     char s[4];
1368     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1369     printf(" %s --%s%n", s, op->long_name, &n);
1370     n = 30 - n;
1371     if (n < 1) n = 1;
1372     printf("%.*s%s\n", n, " ", op->help_text);
1373     }
1374    
1375 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1376     printf("trailing white space is removed and blank lines are ignored.\n");
1377     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1378 nigel 53
1379 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1380 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1381     }
1382    
1383    
1384    
1385    
1386     /*************************************************
1387 nigel 77 * Handle a single-letter, no data option *
1388 nigel 53 *************************************************/
1389    
1390     static int
1391     handle_option(int letter, int options)
1392     {
1393     switch(letter)
1394     {
1395 nigel 87 case N_HELP: help(); exit(0);
1396 nigel 53 case 'c': count_only = TRUE; break;
1397 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1398     case 'H': filenames = FN_FORCE; break;
1399     case 'h': filenames = FN_NONE; break;
1400 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1401 nigel 87 case 'l': filenames = FN_ONLY; break;
1402     case 'L': filenames = FN_NOMATCH_ONLY; break;
1403 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1404 nigel 53 case 'n': number = TRUE; break;
1405 nigel 87 case 'o': only_matching = TRUE; break;
1406 nigel 77 case 'q': quiet = TRUE; break;
1407 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1408 nigel 53 case 's': silent = TRUE; break;
1409 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1410 nigel 53 case 'v': invert = TRUE; break;
1411 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1412     case 'x': process_options |= PO_LINE_MATCH; break;
1413 nigel 53
1414     case 'V':
1415 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1416 nigel 53 exit(0);
1417     break;
1418    
1419     default:
1420     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1421     exit(usage(2));
1422     }
1423    
1424     return options;
1425     }
1426    
1427    
1428    
1429    
1430     /*************************************************
1431 nigel 87 * Construct printed ordinal *
1432     *************************************************/
1433    
1434     /* This turns a number into "1st", "3rd", etc. */
1435    
1436     static char *
1437     ordin(int n)
1438     {
1439     static char buffer[8];
1440     char *p = buffer;
1441     sprintf(p, "%d", n);
1442     while (*p != 0) p++;
1443     switch (n%10)
1444     {
1445     case 1: strcpy(p, "st"); break;
1446     case 2: strcpy(p, "nd"); break;
1447     case 3: strcpy(p, "rd"); break;
1448     default: strcpy(p, "th"); break;
1449     }
1450     return buffer;
1451     }
1452    
1453    
1454    
1455     /*************************************************
1456     * Compile a single pattern *
1457     *************************************************/
1458    
1459     /* When the -F option has been used, this is called for each substring.
1460     Otherwise it's called for each supplied pattern.
1461    
1462     Arguments:
1463     pattern the pattern string
1464     options the PCRE options
1465     filename the file name, or NULL for a command-line pattern
1466     count 0 if this is the only command line pattern, or
1467     number of the command line pattern, or
1468     linenumber for a pattern from a file
1469    
1470     Returns: TRUE on success, FALSE after an error
1471     */
1472    
1473     static BOOL
1474     compile_single_pattern(char *pattern, int options, char *filename, int count)
1475     {
1476     char buffer[MBUFTHIRD + 16];
1477     const char *error;
1478     int errptr;
1479    
1480     if (pattern_count >= MAX_PATTERN_COUNT)
1481     {
1482     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1483     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1484     return FALSE;
1485     }
1486    
1487     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1488     suffix[process_options]);
1489     pattern_list[pattern_count] =
1490     pcre_compile(buffer, options, &error, &errptr, pcretables);
1491 ph10 142 if (pattern_list[pattern_count] != NULL)
1492 ph10 141 {
1493 ph10 142 pattern_count++;
1494 ph10 141 return TRUE;
1495 ph10 142 }
1496 nigel 87
1497     /* Handle compile errors */
1498    
1499     errptr -= (int)strlen(prefix[process_options]);
1500     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1501    
1502     if (filename == NULL)
1503     {
1504     if (count == 0)
1505     fprintf(stderr, "pcregrep: Error in command-line regex "
1506     "at offset %d: %s\n", errptr, error);
1507     else
1508     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1509     "at offset %d: %s\n", ordin(count), errptr, error);
1510     }
1511     else
1512     {
1513     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1514     "at offset %d: %s\n", count, filename, errptr, error);
1515     }
1516    
1517     return FALSE;
1518     }
1519    
1520    
1521    
1522     /*************************************************
1523     * Compile one supplied pattern *
1524     *************************************************/
1525    
1526     /* When the -F option has been used, each string may be a list of strings,
1527 nigel 91 separated by line breaks. They will be matched literally.
1528 nigel 87
1529     Arguments:
1530     pattern the pattern string
1531     options the PCRE options
1532     filename the file name, or NULL for a command-line pattern
1533     count 0 if this is the only command line pattern, or
1534     number of the command line pattern, or
1535     linenumber for a pattern from a file
1536    
1537     Returns: TRUE on success, FALSE after an error
1538     */
1539    
1540     static BOOL
1541     compile_pattern(char *pattern, int options, char *filename, int count)
1542     {
1543     if ((process_options & PO_FIXED_STRINGS) != 0)
1544     {
1545 nigel 93 char *eop = pattern + strlen(pattern);
1546 nigel 87 char buffer[MBUFTHIRD];
1547     for(;;)
1548     {
1549 nigel 93 int ellength;
1550     char *p = end_of_line(pattern, eop, &ellength);
1551     if (ellength == 0)
1552 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1553 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1554 nigel 93 pattern = p;
1555 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1556     return FALSE;
1557     }
1558     }
1559     else return compile_single_pattern(pattern, options, filename, count);
1560     }
1561    
1562    
1563    
1564     /*************************************************
1565 nigel 49 * Main program *
1566     *************************************************/
1567    
1568 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1569    
1570 nigel 49 int
1571     main(int argc, char **argv)
1572     {
1573 nigel 53 int i, j;
1574 nigel 49 int rc = 1;
1575 nigel 87 int pcre_options = 0;
1576     int cmd_pattern_count = 0;
1577 ph10 141 int hint_count = 0;
1578 nigel 49 int errptr;
1579 nigel 87 BOOL only_one_at_top;
1580     char *patterns[MAX_PATTERN_COUNT];
1581     const char *locale_from = "--locale";
1582 nigel 49 const char *error;
1583    
1584 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1585     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1586     */
1587 nigel 91
1588     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1589     switch(i)
1590     {
1591     default: newline = (char *)"lf"; break;
1592     case '\r': newline = (char *)"cr"; break;
1593     case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1594 nigel 93 case -1: newline = (char *)"any"; break;
1595 ph10 150 case -2: newline = (char *)"anycrlf"; break;
1596 nigel 91 }
1597    
1598 nigel 49 /* Process the options */
1599    
1600     for (i = 1; i < argc; i++)
1601     {
1602 nigel 77 option_item *op = NULL;
1603     char *option_data = (char *)""; /* default to keep compiler happy */
1604     BOOL longop;
1605     BOOL longopwasequals = FALSE;
1606    
1607 nigel 49 if (argv[i][0] != '-') break;
1608 nigel 53
1609 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1610 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1611 nigel 63
1612 nigel 77 if (argv[i][1] == 0)
1613     {
1614 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1615 nigel 77 else exit(usage(2));
1616     }
1617 nigel 63
1618 nigel 77 /* Handle a long name option, or -- to terminate the options */
1619 nigel 53
1620     if (argv[i][1] == '-')
1621 nigel 49 {
1622 nigel 77 char *arg = argv[i] + 2;
1623     char *argequals = strchr(arg, '=');
1624 nigel 53
1625 nigel 77 if (*arg == 0) /* -- terminates options */
1626 nigel 49 {
1627 nigel 77 i++;
1628     break; /* out of the options-handling loop */
1629 nigel 53 }
1630 nigel 49
1631 nigel 77 longop = TRUE;
1632    
1633     /* Some long options have data that follows after =, for example file=name.
1634     Some options have variations in the long name spelling: specifically, we
1635     allow "regexp" because GNU grep allows it, though I personally go along
1636 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1637     These options are entered in the table as "regex(p)". No option is in both
1638     these categories, fortunately. */
1639 nigel 77
1640 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1641     {
1642 nigel 77 char *opbra = strchr(op->long_name, '(');
1643     char *equals = strchr(op->long_name, '=');
1644     if (opbra == NULL) /* Not a (p) case */
1645 nigel 53 {
1646 nigel 77 if (equals == NULL) /* Not thing=data case */
1647     {
1648     if (strcmp(arg, op->long_name) == 0) break;
1649     }
1650     else /* Special case xxx=data */
1651     {
1652     int oplen = equals - op->long_name;
1653 ph10 199 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1654 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1655     {
1656     option_data = arg + arglen;
1657     if (*option_data == '=')
1658     {
1659     option_data++;
1660     longopwasequals = TRUE;
1661     }
1662     break;
1663     }
1664     }
1665 nigel 53 }
1666 nigel 77 else /* Special case xxxx(p) */
1667     {
1668     char buff1[24];
1669     char buff2[24];
1670     int baselen = opbra - op->long_name;
1671     sprintf(buff1, "%.*s", baselen, op->long_name);
1672 ph10 152 sprintf(buff2, "%s%.*s", buff1,
1673 ph10 151 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1674 nigel 77 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1675     break;
1676     }
1677 nigel 53 }
1678 nigel 77
1679 nigel 53 if (op->one_char == 0)
1680     {
1681     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1682     exit(usage(2));
1683     }
1684     }
1685 nigel 49
1686 nigel 89
1687     /* Jeffrey Friedl's debugging harness uses these additional options which
1688     are not in the right form for putting in the option table because they use
1689     only one hyphen, yet are more than one character long. By putting them
1690     separately here, they will not get displayed as part of the help() output,
1691     but I don't think Jeffrey will care about that. */
1692    
1693     #ifdef JFRIEDL_DEBUG
1694     else if (strcmp(argv[i], "-pre") == 0) {
1695     jfriedl_prefix = argv[++i];
1696     continue;
1697     } else if (strcmp(argv[i], "-post") == 0) {
1698     jfriedl_postfix = argv[++i];
1699     continue;
1700     } else if (strcmp(argv[i], "-XT") == 0) {
1701     sscanf(argv[++i], "%d", &jfriedl_XT);
1702     continue;
1703     } else if (strcmp(argv[i], "-XR") == 0) {
1704     sscanf(argv[++i], "%d", &jfriedl_XR);
1705     continue;
1706     }
1707     #endif
1708    
1709    
1710 nigel 77 /* One-char options; many that have no data may be in a single argument; we
1711     continue till we hit the last one or one that needs data. */
1712 nigel 53
1713     else
1714     {
1715     char *s = argv[i] + 1;
1716 nigel 77 longop = FALSE;
1717 nigel 53 while (*s != 0)
1718     {
1719 nigel 77 for (op = optionlist; op->one_char != 0; op++)
1720     { if (*s == op->one_char) break; }
1721     if (op->one_char == 0)
1722 nigel 53 {
1723 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1724     *s, argv[i]);
1725     exit(usage(2));
1726     }
1727     if (op->type != OP_NODATA || s[1] == 0)
1728     {
1729     option_data = s+1;
1730 nigel 53 break;
1731     }
1732 nigel 87 pcre_options = handle_option(*s++, pcre_options);
1733 nigel 49 }
1734     }
1735 nigel 77
1736 nigel 87 /* At this point we should have op pointing to a matched option. If the type
1737     is NO_DATA, it means that there is no data, and the option might set
1738     something in the PCRE options. */
1739 nigel 77
1740     if (op->type == OP_NODATA)
1741     {
1742 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
1743     continue;
1744     }
1745    
1746     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1747     either has a value or defaults to something. It cannot have data in a
1748     separate item. At the moment, the only such options are "colo(u)r" and
1749 nigel 89 Jeffrey Friedl's special -S debugging option. */
1750 nigel 87
1751     if (*option_data == 0 &&
1752     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1753     {
1754     switch (op->one_char)
1755 nigel 77 {
1756 nigel 87 case N_COLOUR:
1757     colour_option = (char *)"auto";
1758     break;
1759     #ifdef JFRIEDL_DEBUG
1760     case 'S':
1761     S_arg = 0;
1762     break;
1763     #endif
1764 nigel 77 }
1765 nigel 87 continue;
1766     }
1767 nigel 77
1768 nigel 87 /* Otherwise, find the data string for the option. */
1769    
1770     if (*option_data == 0)
1771     {
1772     if (i >= argc - 1 || longopwasequals)
1773 nigel 77 {
1774 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1775     exit(usage(2));
1776     }
1777     option_data = argv[++i];
1778     }
1779    
1780     /* If the option type is OP_PATLIST, it's the -e option, which can be called
1781     multiple times to create a list of patterns. */
1782    
1783     if (op->type == OP_PATLIST)
1784     {
1785     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1786     {
1787     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1788     MAX_PATTERN_COUNT);
1789     return 2;
1790     }
1791     patterns[cmd_pattern_count++] = option_data;
1792     }
1793    
1794     /* Otherwise, deal with single string or numeric data values. */
1795    
1796     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1797     {
1798     *((char **)op->dataptr) = option_data;
1799     }
1800     else
1801     {
1802     char *endptr;
1803     int n = strtoul(option_data, &endptr, 10);
1804     if (*endptr != 0)
1805     {
1806     if (longop)
1807 nigel 77 {
1808 nigel 87 char *equals = strchr(op->long_name, '=');
1809     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1810     equals - op->long_name;
1811     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1812     option_data, nlen, op->long_name);
1813 nigel 77 }
1814 nigel 87 else
1815     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1816     option_data, op->one_char);
1817     exit(usage(2));
1818 nigel 77 }
1819 nigel 87 *((int *)op->dataptr) = n;
1820 nigel 77 }
1821 nigel 49 }
1822    
1823 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
1824     for -A and -B. */
1825    
1826     if (both_context > 0)
1827     {
1828     if (after_context == 0) after_context = both_context;
1829     if (before_context == 0) before_context = both_context;
1830     }
1831    
1832 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1833     LC_ALL environment variable is set, and if so, use it. */
1834 nigel 49
1835 nigel 87 if (locale == NULL)
1836 nigel 53 {
1837 nigel 87 locale = getenv("LC_ALL");
1838     locale_from = "LCC_ALL";
1839 nigel 53 }
1840 nigel 49
1841 nigel 87 if (locale == NULL)
1842     {
1843     locale = getenv("LC_CTYPE");
1844     locale_from = "LC_CTYPE";
1845     }
1846 nigel 49
1847 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
1848     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1849    
1850     if (locale != NULL)
1851 nigel 49 {
1852 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
1853 nigel 53 {
1854 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1855     locale, locale_from);
1856 nigel 53 return 2;
1857     }
1858 nigel 87 pcretables = pcre_maketables();
1859     }
1860 nigel 77
1861 nigel 87 /* Sort out colouring */
1862    
1863     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1864     {
1865     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1866     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1867     else
1868 nigel 53 {
1869 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1870     colour_option);
1871     return 2;
1872 nigel 77 }
1873 nigel 87 if (do_colour)
1874 nigel 77 {
1875 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
1876     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1877     if (cs != NULL) colour_string = cs;
1878 nigel 77 }
1879 nigel 87 }
1880 nigel 77
1881 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
1882    
1883     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1884     {
1885     pcre_options |= PCRE_NEWLINE_CR;
1886 nigel 93 endlinetype = EL_CR;
1887 nigel 91 }
1888     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1889     {
1890     pcre_options |= PCRE_NEWLINE_LF;
1891 nigel 93 endlinetype = EL_LF;
1892 nigel 91 }
1893     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1894     {
1895     pcre_options |= PCRE_NEWLINE_CRLF;
1896 nigel 93 endlinetype = EL_CRLF;
1897 nigel 91 }
1898 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1899     {
1900     pcre_options |= PCRE_NEWLINE_ANY;
1901     endlinetype = EL_ANY;
1902     }
1903 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1904     {
1905     pcre_options |= PCRE_NEWLINE_ANYCRLF;
1906     endlinetype = EL_ANYCRLF;
1907     }
1908 nigel 91 else
1909     {
1910     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1911     return 2;
1912     }
1913    
1914 nigel 87 /* Interpret the text values for -d and -D */
1915    
1916     if (dee_option != NULL)
1917     {
1918     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1919     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1920     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1921     else
1922 nigel 77 {
1923 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1924     return 2;
1925 nigel 53 }
1926 nigel 49 }
1927    
1928 nigel 87 if (DEE_option != NULL)
1929     {
1930     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1931     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1932     else
1933     {
1934     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1935     return 2;
1936     }
1937     }
1938 nigel 49
1939 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
1940 nigel 87
1941     #ifdef JFRIEDL_DEBUG
1942     if (S_arg > 9)
1943 nigel 49 {
1944 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
1945     return 2;
1946     }
1947 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1948     {
1949     if (jfriedl_XT == 0) jfriedl_XT = 1;
1950     if (jfriedl_XR == 0) jfriedl_XR = 1;
1951     }
1952 nigel 87 #endif
1953 nigel 77
1954 nigel 87 /* Get memory to store the pattern and hints lists. */
1955    
1956     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1957     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1958    
1959     if (pattern_list == NULL || hints_list == NULL)
1960     {
1961     fprintf(stderr, "pcregrep: malloc failed\n");
1962 ph10 123 goto EXIT2;
1963 nigel 87 }
1964    
1965     /* If no patterns were provided by -e, and there is no file provided by -f,
1966     the first argument is the one and only pattern, and it must exist. */
1967    
1968     if (cmd_pattern_count == 0 && pattern_filename == NULL)
1969     {
1970 nigel 63 if (i >= argc) return usage(2);
1971 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
1972     }
1973 nigel 77
1974 nigel 87 /* Compile the patterns that were provided on the command line, either by
1975     multiple uses of -e or as a single unkeyed pattern. */
1976    
1977     for (j = 0; j < cmd_pattern_count; j++)
1978     {
1979     if (!compile_pattern(patterns[j], pcre_options, NULL,
1980     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1981 ph10 123 goto EXIT2;
1982 nigel 87 }
1983    
1984     /* Compile the regular expressions that are provided in a file. */
1985    
1986     if (pattern_filename != NULL)
1987     {
1988     int linenumber = 0;
1989     FILE *f;
1990     char *filename;
1991     char buffer[MBUFTHIRD];
1992    
1993     if (strcmp(pattern_filename, "-") == 0)
1994 nigel 77 {
1995 nigel 87 f = stdin;
1996     filename = stdin_name;
1997 nigel 77 }
1998 nigel 87 else
1999 nigel 77 {
2000 nigel 87 f = fopen(pattern_filename, "r");
2001     if (f == NULL)
2002     {
2003     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2004     strerror(errno));
2005 ph10 123 goto EXIT2;
2006 nigel 87 }
2007     filename = pattern_filename;
2008 nigel 77 }
2009    
2010 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2011 nigel 53 {
2012 nigel 87 char *s = buffer + (int)strlen(buffer);
2013     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2014     *s = 0;
2015     linenumber++;
2016     if (buffer[0] == 0) continue; /* Skip blank lines */
2017     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2018 ph10 121 goto EXIT2;
2019 nigel 53 }
2020 nigel 87
2021     if (f != stdin) fclose(f);
2022 nigel 49 }
2023    
2024 nigel 77 /* Study the regular expressions, as we will be running them many times */
2025 nigel 53
2026     for (j = 0; j < pattern_count; j++)
2027     {
2028     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2029     if (error != NULL)
2030     {
2031     char s[16];
2032     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2033     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2034 ph10 121 goto EXIT2;
2035 nigel 53 }
2036 ph10 142 hint_count++;
2037 nigel 53 }
2038    
2039 nigel 77 /* If there are include or exclude patterns, compile them. */
2040    
2041     if (exclude_pattern != NULL)
2042     {
2043 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2044     pcretables);
2045 nigel 77 if (exclude_compiled == NULL)
2046     {
2047     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2048     errptr, error);
2049 ph10 121 goto EXIT2;
2050 nigel 77 }
2051     }
2052    
2053     if (include_pattern != NULL)
2054     {
2055 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2056     pcretables);
2057 nigel 77 if (include_compiled == NULL)
2058     {
2059     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2060     errptr, error);
2061 ph10 121 goto EXIT2;
2062 nigel 77 }
2063     }
2064    
2065 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2066 nigel 49
2067 nigel 87 if (i >= argc)
2068 ph10 121 {
2069     rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2070     goto EXIT;
2071 ph10 123 }
2072 nigel 49
2073 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2074     Pass in the fact that there is only one argument at top level - this suppresses
2075 nigel 87 the file name if the argument is not a directory and filenames are not
2076     otherwise forced. */
2077 nigel 49
2078 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2079 nigel 49
2080     for (; i < argc; i++)
2081     {
2082 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2083     only_one_at_top);
2084 nigel 77 if (frc > 1) rc = frc;
2085     else if (frc == 0 && rc == 1) rc = 0;
2086 nigel 49 }
2087    
2088 ph10 121 EXIT:
2089     if (pattern_list != NULL)
2090     {
2091 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2092 ph10 121 free(pattern_list);
2093 ph10 123 }
2094 ph10 121 if (hints_list != NULL)
2095     {
2096 ph10 141 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2097 ph10 121 free(hints_list);
2098 ph10 123 }
2099 nigel 49 return rc;
2100 ph10 121
2101     EXIT2:
2102     rc = 2;
2103     goto EXIT;
2104 nigel 49 }
2105    
2106 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12