/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 283 - (hide annotations) (download)
Fri Dec 7 19:59:19 2007 UTC (6 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 61706 byte(s)
David Byron's patch for typos and one re-arrangement in Windows code in 
pcregrep.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 117 Copyright (c) 1997-2007 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 236 #include "pcre.h"
59 nigel 49
60     #define FALSE 0
61     #define TRUE 1
62    
63     typedef int BOOL;
64    
65 nigel 53 #define MAX_PATTERN_COUNT 100
66 nigel 49
67 nigel 77 #if BUFSIZ > 8192
68     #define MBUFTHIRD BUFSIZ
69     #else
70     #define MBUFTHIRD 8192
71     #endif
72 nigel 49
73 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
74     output. The order is important; it is assumed that a file name is wanted for
75     all values greater than FN_DEFAULT. */
76 nigel 77
77 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78    
79     /* Actions for the -d and -D options */
80    
81     enum { dee_READ, dee_SKIP, dee_RECURSE };
82     enum { DEE_READ, DEE_SKIP };
83    
84     /* Actions for special processing options (flag bits) */
85    
86     #define PO_WORD_MATCH 0x0001
87     #define PO_LINE_MATCH 0x0002
88     #define PO_FIXED_STRINGS 0x0004
89    
90 nigel 93 /* Line ending types */
91 nigel 87
92 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93 nigel 87
94 nigel 93
95    
96 nigel 49 /*************************************************
97     * Global variables *
98     *************************************************/
99    
100 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
101     regular code. */
102    
103     #ifdef JFRIEDL_DEBUG
104     static int S_arg = -1;
105 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107     static const char *jfriedl_prefix = "";
108     static const char *jfriedl_postfix = "";
109 nigel 87 #endif
110    
111 nigel 93 static int endlinetype;
112 nigel 91
113 nigel 87 static char *colour_string = (char *)"1;31";
114     static char *colour_option = NULL;
115     static char *dee_option = NULL;
116     static char *DEE_option = NULL;
117 nigel 91 static char *newline = NULL;
118 nigel 53 static char *pattern_filename = NULL;
119 nigel 77 static char *stdin_name = (char *)"(standard input)";
120 nigel 87 static char *locale = NULL;
121    
122     static const unsigned char *pcretables = NULL;
123    
124 nigel 53 static int pattern_count = 0;
125 ph10 121 static pcre **pattern_list = NULL;
126     static pcre_extra **hints_list = NULL;
127 nigel 49
128 nigel 77 static char *include_pattern = NULL;
129     static char *exclude_pattern = NULL;
130    
131     static pcre *include_compiled = NULL;
132     static pcre *exclude_compiled = NULL;
133    
134     static int after_context = 0;
135     static int before_context = 0;
136     static int both_context = 0;
137 nigel 87 static int dee_action = dee_READ;
138     static int DEE_action = DEE_READ;
139     static int error_count = 0;
140     static int filenames = FN_DEFAULT;
141     static int process_options = 0;
142 nigel 77
143 nigel 49 static BOOL count_only = FALSE;
144 nigel 87 static BOOL do_colour = FALSE;
145 ph10 280 static BOOL file_offsets = FALSE;
146 nigel 77 static BOOL hyphenpending = FALSE;
147 nigel 49 static BOOL invert = FALSE;
148 ph10 280 static BOOL line_offsets = FALSE;
149 nigel 77 static BOOL multiline = FALSE;
150 nigel 49 static BOOL number = FALSE;
151 nigel 87 static BOOL only_matching = FALSE;
152 nigel 77 static BOOL quiet = FALSE;
153 nigel 49 static BOOL silent = FALSE;
154 nigel 93 static BOOL utf8 = FALSE;
155 nigel 49
156 nigel 53 /* Structure for options and list of them */
157 nigel 49
158 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
159     OP_PATLIST };
160 nigel 77
161 nigel 53 typedef struct option_item {
162 nigel 77 int type;
163 nigel 53 int one_char;
164 nigel 77 void *dataptr;
165 nigel 67 const char *long_name;
166     const char *help_text;
167 nigel 53 } option_item;
168 nigel 49
169 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
170     used to identify them. */
171    
172     #define N_COLOUR (-1)
173     #define N_EXCLUDE (-2)
174     #define N_HELP (-3)
175     #define N_INCLUDE (-4)
176     #define N_LABEL (-5)
177     #define N_LOCALE (-6)
178     #define N_NULL (-7)
179 ph10 280 #define N_LOFFSETS (-8)
180     #define N_FOFFSETS (-9)
181 nigel 87
182 nigel 53 static option_item optionlist[] = {
183 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
184     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
185     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
186     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
187     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
188     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
189     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
190     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
191     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
192     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
193     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
194     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
195     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
196 ph10 280 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
197 nigel 87 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
198     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
199     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
200     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
201     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
202     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
203 ph10 280 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
204 nigel 87 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
205     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
206 ph10 280 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
207 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
208     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
209     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
210     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
211     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
212     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
213     #ifdef JFRIEDL_DEBUG
214     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
215     #endif
216     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
217     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
218     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
219     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
220     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
221     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
222     { OP_NODATA, 0, NULL, NULL, NULL }
223 nigel 53 };
224    
225 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
226     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
227     that the combination of -w and -x has the same effect as -x on its own, so we
228     can treat them as the same. */
229 nigel 53
230 nigel 87 static const char *prefix[] = {
231     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
232    
233     static const char *suffix[] = {
234     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
235    
236 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
237 nigel 87
238 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
239 nigel 87
240 nigel 93 const char utf8_table4[] = {
241     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
242     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
243     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
244     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
245    
246    
247    
248 nigel 53 /*************************************************
249 nigel 87 * OS-specific functions *
250 nigel 53 *************************************************/
251    
252     /* These functions are defined so that they can be made system specific,
253 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
254 nigel 53
255    
256     /************* Directory scanning in Unix ***********/
257    
258 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
259 nigel 53 #include <sys/types.h>
260     #include <sys/stat.h>
261     #include <dirent.h>
262    
263     typedef DIR directory_type;
264    
265 nigel 67 static int
266 nigel 53 isdirectory(char *filename)
267     {
268     struct stat statbuf;
269     if (stat(filename, &statbuf) < 0)
270     return 0; /* In the expectation that opening as a file will fail */
271     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
272     }
273    
274 nigel 67 static directory_type *
275 nigel 53 opendirectory(char *filename)
276     {
277     return opendir(filename);
278     }
279    
280 nigel 67 static char *
281 nigel 53 readdirectory(directory_type *dir)
282     {
283     for (;;)
284     {
285     struct dirent *dent = readdir(dir);
286     if (dent == NULL) return NULL;
287     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
288     return dent->d_name;
289     }
290 ph10 151 /* Control never reaches here */
291 nigel 53 }
292    
293 nigel 67 static void
294 nigel 53 closedirectory(directory_type *dir)
295     {
296     closedir(dir);
297     }
298    
299    
300 nigel 87 /************* Test for regular file in Unix **********/
301    
302     static int
303     isregfile(char *filename)
304     {
305     struct stat statbuf;
306     if (stat(filename, &statbuf) < 0)
307     return 1; /* In the expectation that opening as a file will fail */
308     return (statbuf.st_mode & S_IFMT) == S_IFREG;
309     }
310    
311    
312     /************* Test stdout for being a terminal in Unix **********/
313    
314     static BOOL
315     is_stdout_tty(void)
316     {
317     return isatty(fileno(stdout));
318     }
319    
320    
321 nigel 63 /************* Directory scanning in Win32 ***********/
322 nigel 53
323 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
324 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
325 ph10 283 when it did not exist. David Byron added a patch that moved the #include of
326     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
327     */
328 nigel 53
329 ph10 97 #elif HAVE_WINDOWS_H
330 nigel 63
331     #ifndef STRICT
332     # define STRICT
333     #endif
334     #ifndef WIN32_LEAN_AND_MEAN
335     # define WIN32_LEAN_AND_MEAN
336     #endif
337 ph10 283
338     #include <windows.h>
339    
340 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
341     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
342     #endif
343    
344 nigel 63 typedef struct directory_type
345     {
346     HANDLE handle;
347     BOOL first;
348     WIN32_FIND_DATA data;
349     } directory_type;
350    
351     int
352     isdirectory(char *filename)
353     {
354     DWORD attr = GetFileAttributes(filename);
355     if (attr == INVALID_FILE_ATTRIBUTES)
356     return 0;
357     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
358     }
359    
360     directory_type *
361     opendirectory(char *filename)
362     {
363     size_t len;
364     char *pattern;
365     directory_type *dir;
366     DWORD err;
367     len = strlen(filename);
368     pattern = (char *) malloc(len + 3);
369     dir = (directory_type *) malloc(sizeof(*dir));
370     if ((pattern == NULL) || (dir == NULL))
371     {
372     fprintf(stderr, "pcregrep: malloc failed\n");
373     exit(2);
374     }
375     memcpy(pattern, filename, len);
376     memcpy(&(pattern[len]), "\\*", 3);
377     dir->handle = FindFirstFile(pattern, &(dir->data));
378     if (dir->handle != INVALID_HANDLE_VALUE)
379     {
380     free(pattern);
381     dir->first = TRUE;
382     return dir;
383     }
384     err = GetLastError();
385     free(pattern);
386     free(dir);
387     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
388     return NULL;
389     }
390    
391     char *
392     readdirectory(directory_type *dir)
393     {
394     for (;;)
395     {
396     if (!dir->first)
397     {
398     if (!FindNextFile(dir->handle, &(dir->data)))
399     return NULL;
400     }
401     else
402     {
403     dir->first = FALSE;
404     }
405     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
406     return dir->data.cFileName;
407     }
408     #ifndef _MSC_VER
409     return NULL; /* Keep compiler happy; never executed */
410     #endif
411     }
412    
413     void
414     closedirectory(directory_type *dir)
415     {
416     FindClose(dir->handle);
417     free(dir);
418     }
419    
420    
421 nigel 87 /************* Test for regular file in Win32 **********/
422    
423     /* I don't know how to do this, or if it can be done; assume all paths are
424     regular if they are not directories. */
425    
426     int isregfile(char *filename)
427     {
428 ph10 283 return !isdirectory(filename);
429 nigel 87 }
430    
431    
432     /************* Test stdout for being a terminal in Win32 **********/
433    
434     /* I don't know how to do this; assume never */
435    
436     static BOOL
437     is_stdout_tty(void)
438     {
439 ph10 283 return FALSE;
440 nigel 87 }
441    
442    
443 nigel 53 /************* Directory scanning when we can't do it ***********/
444    
445     /* The type is void, and apart from isdirectory(), the functions do nothing. */
446    
447 nigel 63 #else
448    
449 nigel 53 typedef void directory_type;
450    
451 nigel 87 int isdirectory(char *filename) { return 0; }
452 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
453     char *readdirectory(directory_type *dir) { return (char*)0;}
454 nigel 53 void closedirectory(directory_type *dir) {}
455    
456 nigel 87
457     /************* Test for regular when we can't do it **********/
458    
459     /* Assume all files are regular. */
460    
461     int isregfile(char *filename) { return 1; }
462    
463    
464     /************* Test stdout for being a terminal when we can't do it **********/
465    
466     static BOOL
467     is_stdout_tty(void)
468     {
469     return FALSE;
470     }
471    
472    
473 nigel 53 #endif
474    
475    
476    
477 ph10 137 #ifndef HAVE_STRERROR
478 nigel 49 /*************************************************
479     * Provide strerror() for non-ANSI libraries *
480     *************************************************/
481    
482     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
483     in their libraries, but can provide the same facility by this simple
484     alternative function. */
485    
486     extern int sys_nerr;
487     extern char *sys_errlist[];
488    
489     char *
490     strerror(int n)
491     {
492     if (n < 0 || n >= sys_nerr) return "unknown error number";
493     return sys_errlist[n];
494     }
495     #endif /* HAVE_STRERROR */
496    
497    
498    
499     /*************************************************
500 nigel 93 * Find end of line *
501     *************************************************/
502    
503     /* The length of the endline sequence that is found is set via lenptr. This may
504     be zero at the very end of the file if there is no line-ending sequence there.
505    
506     Arguments:
507     p current position in line
508     endptr end of available data
509     lenptr where to put the length of the eol sequence
510    
511     Returns: pointer to the last byte of the line
512     */
513    
514     static char *
515     end_of_line(char *p, char *endptr, int *lenptr)
516     {
517     switch(endlinetype)
518     {
519     default: /* Just in case */
520     case EL_LF:
521     while (p < endptr && *p != '\n') p++;
522     if (p < endptr)
523     {
524     *lenptr = 1;
525     return p + 1;
526     }
527     *lenptr = 0;
528     return endptr;
529    
530     case EL_CR:
531     while (p < endptr && *p != '\r') p++;
532     if (p < endptr)
533     {
534     *lenptr = 1;
535     return p + 1;
536     }
537     *lenptr = 0;
538     return endptr;
539    
540     case EL_CRLF:
541     for (;;)
542     {
543     while (p < endptr && *p != '\r') p++;
544     if (++p >= endptr)
545     {
546     *lenptr = 0;
547     return endptr;
548     }
549     if (*p == '\n')
550     {
551     *lenptr = 2;
552     return p + 1;
553     }
554     }
555     break;
556    
557 ph10 149 case EL_ANYCRLF:
558     while (p < endptr)
559     {
560     int extra = 0;
561     register int c = *((unsigned char *)p);
562    
563     if (utf8 && c >= 0xc0)
564     {
565     int gcii, gcss;
566     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
567     gcss = 6*extra;
568     c = (c & utf8_table3[extra]) << gcss;
569     for (gcii = 1; gcii <= extra; gcii++)
570     {
571     gcss -= 6;
572     c |= (p[gcii] & 0x3f) << gcss;
573     }
574     }
575    
576     p += 1 + extra;
577    
578     switch (c)
579     {
580     case 0x0a: /* LF */
581     *lenptr = 1;
582     return p;
583    
584     case 0x0d: /* CR */
585     if (p < endptr && *p == 0x0a)
586     {
587     *lenptr = 2;
588     p++;
589     }
590     else *lenptr = 1;
591     return p;
592 ph10 150
593 ph10 149 default:
594     break;
595     }
596     } /* End of loop for ANYCRLF case */
597 ph10 150
598 ph10 149 *lenptr = 0; /* Must have hit the end */
599     return endptr;
600    
601 nigel 93 case EL_ANY:
602     while (p < endptr)
603     {
604     int extra = 0;
605     register int c = *((unsigned char *)p);
606    
607     if (utf8 && c >= 0xc0)
608     {
609     int gcii, gcss;
610     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
611     gcss = 6*extra;
612     c = (c & utf8_table3[extra]) << gcss;
613     for (gcii = 1; gcii <= extra; gcii++)
614     {
615     gcss -= 6;
616     c |= (p[gcii] & 0x3f) << gcss;
617     }
618     }
619    
620     p += 1 + extra;
621    
622     switch (c)
623     {
624     case 0x0a: /* LF */
625     case 0x0b: /* VT */
626     case 0x0c: /* FF */
627     *lenptr = 1;
628     return p;
629    
630     case 0x0d: /* CR */
631     if (p < endptr && *p == 0x0a)
632     {
633     *lenptr = 2;
634     p++;
635     }
636     else *lenptr = 1;
637     return p;
638    
639     case 0x85: /* NEL */
640     *lenptr = utf8? 2 : 1;
641     return p;
642    
643     case 0x2028: /* LS */
644     case 0x2029: /* PS */
645     *lenptr = 3;
646     return p;
647    
648     default:
649     break;
650     }
651     } /* End of loop for ANY case */
652    
653     *lenptr = 0; /* Must have hit the end */
654     return endptr;
655     } /* End of overall switch */
656     }
657    
658    
659    
660     /*************************************************
661     * Find start of previous line *
662     *************************************************/
663    
664     /* This is called when looking back for before lines to print.
665    
666     Arguments:
667     p start of the subsequent line
668     startptr start of available data
669    
670     Returns: pointer to the start of the previous line
671     */
672    
673     static char *
674     previous_line(char *p, char *startptr)
675     {
676     switch(endlinetype)
677     {
678     default: /* Just in case */
679     case EL_LF:
680     p--;
681     while (p > startptr && p[-1] != '\n') p--;
682     return p;
683    
684     case EL_CR:
685     p--;
686     while (p > startptr && p[-1] != '\n') p--;
687     return p;
688    
689     case EL_CRLF:
690     for (;;)
691     {
692     p -= 2;
693     while (p > startptr && p[-1] != '\n') p--;
694     if (p <= startptr + 1 || p[-2] == '\r') return p;
695     }
696     return p; /* But control should never get here */
697    
698     case EL_ANY:
699 ph10 150 case EL_ANYCRLF:
700 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
701     if (utf8) while ((*p & 0xc0) == 0x80) p--;
702    
703     while (p > startptr)
704     {
705     register int c;
706     char *pp = p - 1;
707    
708     if (utf8)
709     {
710     int extra = 0;
711     while ((*pp & 0xc0) == 0x80) pp--;
712     c = *((unsigned char *)pp);
713     if (c >= 0xc0)
714     {
715     int gcii, gcss;
716     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
717     gcss = 6*extra;
718     c = (c & utf8_table3[extra]) << gcss;
719     for (gcii = 1; gcii <= extra; gcii++)
720     {
721     gcss -= 6;
722     c |= (pp[gcii] & 0x3f) << gcss;
723     }
724     }
725     }
726     else c = *((unsigned char *)pp);
727    
728 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
729 nigel 93 {
730     case 0x0a: /* LF */
731 ph10 149 case 0x0d: /* CR */
732     return p;
733 ph10 150
734 ph10 149 default:
735     break;
736 ph10 150 }
737 ph10 149
738     else switch (c)
739     {
740     case 0x0a: /* LF */
741 nigel 93 case 0x0b: /* VT */
742     case 0x0c: /* FF */
743     case 0x0d: /* CR */
744     case 0x85: /* NEL */
745     case 0x2028: /* LS */
746     case 0x2029: /* PS */
747     return p;
748    
749     default:
750     break;
751     }
752    
753     p = pp; /* Back one character */
754     } /* End of loop for ANY case */
755    
756     return startptr; /* Hit start of data */
757     } /* End of overall switch */
758     }
759    
760    
761    
762    
763    
764     /*************************************************
765 nigel 77 * Print the previous "after" lines *
766 nigel 49 *************************************************/
767    
768 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
769 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
770     that a binary zero does not terminate it.
771 nigel 77
772     Arguments:
773     lastmatchnumber the number of the last matching line, plus one
774     lastmatchrestart where we restarted after the last match
775     endptr end of available data
776     printname filename for printing
777    
778     Returns: nothing
779     */
780    
781     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
782     char *endptr, char *printname)
783     {
784     if (after_context > 0 && lastmatchnumber > 0)
785     {
786     int count = 0;
787     while (lastmatchrestart < endptr && count++ < after_context)
788     {
789 nigel 93 int ellength;
790 nigel 77 char *pp = lastmatchrestart;
791     if (printname != NULL) fprintf(stdout, "%s-", printname);
792     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
793 nigel 93 pp = end_of_line(pp, endptr, &ellength);
794     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
795     lastmatchrestart = pp;
796 nigel 77 }
797     hyphenpending = TRUE;
798     }
799     }
800    
801    
802    
803     /*************************************************
804     * Grep an individual file *
805     *************************************************/
806    
807     /* This is called from grep_or_recurse() below. It uses a buffer that is three
808     times the value of MBUFTHIRD. The matching point is never allowed to stray into
809     the top third of the buffer, thus keeping more of the file available for
810     context printing or for multiline scanning. For large files, the pointer will
811     be in the middle third most of the time, so the bottom third is available for
812     "before" context printing.
813    
814     Arguments:
815     in the fopened FILE stream
816     printname the file name if it is to be printed for each match
817     or NULL if the file name is not to be printed
818     it cannot be NULL if filenames[_nomatch]_only is set
819    
820     Returns: 0 if there was at least one match
821     1 otherwise (no matches)
822     */
823    
824 nigel 49 static int
825 nigel 77 pcregrep(FILE *in, char *printname)
826 nigel 49 {
827     int rc = 1;
828 nigel 77 int linenumber = 1;
829     int lastmatchnumber = 0;
830 nigel 49 int count = 0;
831 ph10 280 int filepos = 0;
832 nigel 49 int offsets[99];
833 nigel 77 char *lastmatchrestart = NULL;
834     char buffer[3*MBUFTHIRD];
835     char *ptr = buffer;
836     char *endptr;
837     size_t bufflength;
838     BOOL endhyphenpending = FALSE;
839 nigel 49
840 nigel 77 /* Do the first read into the start of the buffer and set up the pointer to
841     end of what we have. */
842    
843     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
844     endptr = buffer + bufflength;
845    
846     /* Loop while the current pointer is not at the end of the file. For large
847     files, endptr will be at the end of the buffer when we are in the middle of the
848     file, but ptr will never get there, because as soon as it gets over 2/3 of the
849     way, the buffer is shifted left and re-filled. */
850    
851     while (ptr < endptr)
852 nigel 49 {
853 nigel 93 int i, endlinelength;
854 nigel 87 int mrc = 0;
855 nigel 53 BOOL match = FALSE;
856 ph10 279 char *matchptr = ptr;
857 nigel 77 char *t = ptr;
858     size_t length, linelength;
859 nigel 49
860 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
861     of the subject string to pass to pcre_exec(). In multiline mode, it is the
862     length remainder of the data in the buffer. Otherwise, it is the length of
863     the next line. After matching, we always advance by the length of the next
864     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
865     that any match is constrained to be in the first line. */
866    
867 nigel 93 t = end_of_line(t, endptr, &endlinelength);
868     linelength = t - ptr - endlinelength;
869 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
870 nigel 77
871 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
872    
873     #ifdef JFRIEDL_DEBUG
874     if (jfriedl_XT || jfriedl_XR)
875     {
876     #include <sys/time.h>
877     #include <time.h>
878     struct timeval start_time, end_time;
879     struct timezone dummy;
880    
881     if (jfriedl_XT)
882     {
883     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
884     const char *orig = ptr;
885     ptr = malloc(newlen + 1);
886     if (!ptr) {
887     printf("out of memory");
888     exit(2);
889     }
890     endptr = ptr;
891     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
892     for (i = 0; i < jfriedl_XT; i++) {
893     strncpy(endptr, orig, length);
894     endptr += length;
895     }
896     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
897     length = newlen;
898     }
899    
900     if (gettimeofday(&start_time, &dummy) != 0)
901     perror("bad gettimeofday");
902    
903    
904     for (i = 0; i < jfriedl_XR; i++)
905     match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
906    
907     if (gettimeofday(&end_time, &dummy) != 0)
908     perror("bad gettimeofday");
909    
910     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
911     -
912     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
913    
914     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
915     return 0;
916     }
917     #endif
918    
919 ph10 279 /* We come back here after a match when the -o option (only_matching) is set,
920     in order to find any further matches in the same line. */
921    
922     ONLY_MATCHING_RESTART:
923 nigel 89
924 nigel 77 /* Run through all the patterns until one matches. Note that we don't include
925     the final newline in the subject string. */
926    
927 nigel 87 for (i = 0; i < pattern_count; i++)
928 nigel 53 {
929 ph10 279 mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
930 nigel 87 offsets, 99);
931     if (mrc >= 0) { match = TRUE; break; }
932     if (mrc != PCRE_ERROR_NOMATCH)
933     {
934     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
935     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
936     fprintf(stderr, "this line:\n");
937 ph10 279 fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
938 nigel 87 fprintf(stderr, "\n");
939     if (error_count == 0 &&
940     (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
941     {
942     fprintf(stderr, "pcregrep: error %d means that a resource limit "
943     "was exceeded\n", mrc);
944     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
945     }
946     if (error_count++ > 20)
947     {
948     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
949     exit(2);
950     }
951     match = invert; /* No more matching; don't show the line again */
952     break;
953     }
954 nigel 53 }
955 nigel 49
956 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
957 nigel 77
958 nigel 49 if (match != invert)
959     {
960 nigel 77 BOOL hyphenprinted = FALSE;
961    
962 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
963 nigel 77
964 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
965    
966     /* Just count if just counting is wanted. */
967    
968 nigel 49 if (count_only) count++;
969    
970 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
971     in the file. */
972    
973     else if (filenames == FN_ONLY)
974 nigel 49 {
975 nigel 77 fprintf(stdout, "%s\n", printname);
976 nigel 49 return 0;
977     }
978    
979 nigel 87 /* Likewise, if all we want is a yes/no answer. */
980    
981 nigel 77 else if (quiet) return 0;
982 nigel 49
983 nigel 87 /* The --only-matching option prints just the substring that matched, and
984 ph10 280 the --file-offsets and --line-offsets options output offsets for the
985     matching substring (they both force --only-matching). None of these options
986     prints any context. Afterwards, adjust the start and length, and then jump
987     back to look for further matches in the same line. If we are in invert
988     mode, however, nothing is printed - this could be still useful because the
989     return code is set. */
990 nigel 87
991     else if (only_matching)
992     {
993 ph10 279 if (!invert)
994     {
995     if (printname != NULL) fprintf(stdout, "%s:", printname);
996     if (number) fprintf(stdout, "%d:", linenumber);
997 ph10 280 if (line_offsets)
998     fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
999     offsets[1] - offsets[0]);
1000     else if (file_offsets)
1001     fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1002     offsets[1] - offsets[0]);
1003     else
1004     fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1005 ph10 279 fprintf(stdout, "\n");
1006     matchptr += offsets[1];
1007     length -= offsets[1];
1008     match = FALSE;
1009     goto ONLY_MATCHING_RESTART;
1010     }
1011 nigel 87 }
1012    
1013     /* This is the default case when none of the above options is set. We print
1014     the matching lines(s), possibly preceded and/or followed by other lines of
1015     context. */
1016    
1017 nigel 49 else
1018     {
1019 nigel 77 /* See if there is a requirement to print some "after" lines from a
1020     previous match. We never print any overlaps. */
1021    
1022     if (after_context > 0 && lastmatchnumber > 0)
1023     {
1024 nigel 93 int ellength;
1025 nigel 77 int linecount = 0;
1026     char *p = lastmatchrestart;
1027    
1028     while (p < ptr && linecount < after_context)
1029     {
1030 nigel 93 p = end_of_line(p, ptr, &ellength);
1031 nigel 77 linecount++;
1032     }
1033    
1034     /* It is important to advance lastmatchrestart during this printing so
1035 nigel 87 that it interacts correctly with any "before" printing below. Print
1036     each line's data using fwrite() in case there are binary zeroes. */
1037 nigel 77
1038     while (lastmatchrestart < p)
1039     {
1040     char *pp = lastmatchrestart;
1041     if (printname != NULL) fprintf(stdout, "%s-", printname);
1042     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1043 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1044     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1045     lastmatchrestart = pp;
1046 nigel 77 }
1047     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1048     }
1049    
1050     /* If there were non-contiguous lines printed above, insert hyphens. */
1051    
1052     if (hyphenpending)
1053     {
1054     fprintf(stdout, "--\n");
1055     hyphenpending = FALSE;
1056     hyphenprinted = TRUE;
1057     }
1058    
1059     /* See if there is a requirement to print some "before" lines for this
1060     match. Again, don't print overlaps. */
1061    
1062     if (before_context > 0)
1063     {
1064     int linecount = 0;
1065     char *p = ptr;
1066    
1067     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1068 nigel 87 linecount < before_context)
1069 nigel 77 {
1070 nigel 87 linecount++;
1071 nigel 93 p = previous_line(p, buffer);
1072 nigel 77 }
1073    
1074     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1075     fprintf(stdout, "--\n");
1076    
1077     while (p < ptr)
1078     {
1079 nigel 93 int ellength;
1080 nigel 77 char *pp = p;
1081     if (printname != NULL) fprintf(stdout, "%s-", printname);
1082     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1083 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1084     fwrite(p, 1, pp - p, stdout);
1085     p = pp;
1086 nigel 77 }
1087     }
1088    
1089     /* Now print the matching line(s); ensure we set hyphenpending at the end
1090 nigel 85 of the file if any context lines are being output. */
1091 nigel 77
1092 nigel 85 if (after_context > 0 || before_context > 0)
1093     endhyphenpending = TRUE;
1094    
1095 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1096 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1097 nigel 77
1098     /* In multiline mode, we want to print to the end of the line in which
1099     the end of the matched string is found, so we adjust linelength and the
1100 ph10 222 line number appropriately, but only when there actually was a match
1101     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1102     the match will always be before the first newline sequence. */
1103 nigel 77
1104     if (multiline)
1105     {
1106 nigel 93 int ellength;
1107 ph10 222 char *endmatch = ptr;
1108     if (!invert)
1109 nigel 93 {
1110 ph10 222 endmatch += offsets[1];
1111     t = ptr;
1112     while (t < endmatch)
1113     {
1114     t = end_of_line(t, endptr, &ellength);
1115     if (t <= endmatch) linenumber++; else break;
1116     }
1117 nigel 93 }
1118     endmatch = end_of_line(endmatch, endptr, &ellength);
1119     linelength = endmatch - ptr - ellength;
1120 nigel 77 }
1121    
1122 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1123     zeroes are treated as just another data character. */
1124    
1125     /* This extra option, for Jeffrey Friedl's debugging requirements,
1126     replaces the matched string, or a specific captured string if it exists,
1127     with X. When this happens, colouring is ignored. */
1128    
1129     #ifdef JFRIEDL_DEBUG
1130     if (S_arg >= 0 && S_arg < mrc)
1131     {
1132     int first = S_arg * 2;
1133     int last = first + 1;
1134     fwrite(ptr, 1, offsets[first], stdout);
1135     fprintf(stdout, "X");
1136     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1137     }
1138     else
1139     #endif
1140    
1141     /* We have to split the line(s) up if colouring. */
1142    
1143     if (do_colour)
1144     {
1145     fwrite(ptr, 1, offsets[0], stdout);
1146     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1147     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1148     fprintf(stdout, "%c[00m", 0x1b);
1149 ph10 243 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1150 ph10 239 stdout);
1151 nigel 87 }
1152 nigel 93 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1153 nigel 49 }
1154    
1155 nigel 87 /* End of doing what has to be done for a match */
1156    
1157 nigel 77 rc = 0; /* Had some success */
1158    
1159     /* Remember where the last match happened for after_context. We remember
1160     where we are about to restart, and that line's number. */
1161    
1162 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1163 nigel 77 lastmatchnumber = linenumber + 1;
1164 nigel 49 }
1165 nigel 77
1166 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1167     anything to be printed), we have to move on to the end of the match before
1168     proceeding. */
1169    
1170     if (multiline && invert && match)
1171     {
1172     int ellength;
1173     char *endmatch = ptr + offsets[1];
1174     t = ptr;
1175     while (t < endmatch)
1176     {
1177     t = end_of_line(t, endptr, &ellength);
1178     if (t <= endmatch) linenumber++; else break;
1179     }
1180     endmatch = end_of_line(endmatch, endptr, &ellength);
1181     linelength = endmatch - ptr - ellength;
1182     }
1183    
1184 ph10 280 /* Advance to after the newline and increment the line number. The file
1185     offset to the current line is maintained in filepos. */
1186 nigel 77
1187 nigel 93 ptr += linelength + endlinelength;
1188 ph10 280 filepos += linelength + endlinelength;
1189 nigel 77 linenumber++;
1190    
1191     /* If we haven't yet reached the end of the file (the buffer is full), and
1192     the current point is in the top 1/3 of the buffer, slide the buffer down by
1193     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1194     about to be lost, print them. */
1195    
1196     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1197     {
1198     if (after_context > 0 &&
1199     lastmatchnumber > 0 &&
1200     lastmatchrestart < buffer + MBUFTHIRD)
1201     {
1202     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1203     lastmatchnumber = 0;
1204     }
1205    
1206     /* Now do the shuffle */
1207    
1208     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1209     ptr -= MBUFTHIRD;
1210     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1211     endptr = buffer + bufflength;
1212    
1213     /* Adjust any last match point */
1214    
1215     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1216     }
1217     } /* Loop through the whole file */
1218    
1219     /* End of file; print final "after" lines if wanted; do_after_lines sets
1220     hyphenpending if it prints something. */
1221    
1222 nigel 87 if (!only_matching && !count_only)
1223     {
1224     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1225     hyphenpending |= endhyphenpending;
1226     }
1227 nigel 77
1228     /* Print the file name if we are looking for those without matches and there
1229     were none. If we found a match, we won't have got this far. */
1230    
1231 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1232 nigel 77 {
1233     fprintf(stdout, "%s\n", printname);
1234     return 0;
1235 nigel 49 }
1236    
1237 nigel 77 /* Print the match count if wanted */
1238    
1239 nigel 49 if (count_only)
1240     {
1241 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1242 nigel 49 fprintf(stdout, "%d\n", count);
1243     }
1244    
1245     return rc;
1246     }
1247    
1248    
1249    
1250     /*************************************************
1251 nigel 53 * Grep a file or recurse into a directory *
1252     *************************************************/
1253    
1254 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1255     recursing; if it's a file, grep it.
1256    
1257     Arguments:
1258     pathname the path to investigate
1259 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1260 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1261    
1262     Returns: 0 if there was at least one match
1263     1 if there were no matches
1264     2 there was some kind of error
1265    
1266     However, file opening failures are suppressed if "silent" is set.
1267     */
1268    
1269 nigel 53 static int
1270 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1271 nigel 53 {
1272     int rc = 1;
1273     int sep;
1274     FILE *in;
1275    
1276 nigel 77 /* If the file name is "-" we scan stdin */
1277 nigel 53
1278 nigel 77 if (strcmp(pathname, "-") == 0)
1279 nigel 53 {
1280 nigel 77 return pcregrep(stdin,
1281 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1282 nigel 77 stdin_name : NULL);
1283     }
1284    
1285    
1286 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1287     each file within it, subject to any include or exclude patterns that were set.
1288     The scanning code is localized so it can be made system-specific. */
1289    
1290     if ((sep = isdirectory(pathname)) != 0)
1291 nigel 77 {
1292 nigel 87 if (dee_action == dee_SKIP) return 1;
1293     if (dee_action == dee_RECURSE)
1294 nigel 53 {
1295 nigel 87 char buffer[1024];
1296     char *nextfile;
1297     directory_type *dir = opendirectory(pathname);
1298 nigel 53
1299 nigel 87 if (dir == NULL)
1300     {
1301     if (!silent)
1302     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1303     strerror(errno));
1304     return 2;
1305     }
1306 nigel 77
1307 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1308     {
1309     int frc, blen;
1310     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1311     blen = strlen(buffer);
1312 nigel 77
1313 nigel 87 if (exclude_compiled != NULL &&
1314     pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1315     continue;
1316 nigel 77
1317 nigel 87 if (include_compiled != NULL &&
1318     pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1319     continue;
1320    
1321     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1322     if (frc > 1) rc = frc;
1323     else if (frc == 0 && rc == 1) rc = 0;
1324     }
1325    
1326     closedirectory(dir);
1327     return rc;
1328 nigel 53 }
1329     }
1330    
1331 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1332     been requested. */
1333 nigel 53
1334 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1335    
1336     /* Control reaches here if we have a regular file, or if we have a directory
1337     and recursion or skipping was not requested, or if we have anything else and
1338     skipping was not requested. The scan proceeds. If this is the first and only
1339     argument at top level, we don't show the file name, unless we are only showing
1340     the file name, or the filename was forced (-H). */
1341    
1342 nigel 77 in = fopen(pathname, "r");
1343 nigel 53 if (in == NULL)
1344     {
1345 nigel 77 if (!silent)
1346     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1347     strerror(errno));
1348 nigel 53 return 2;
1349     }
1350    
1351 nigel 87 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1352     (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1353 nigel 77
1354 nigel 53 fclose(in);
1355     return rc;
1356     }
1357    
1358    
1359    
1360    
1361     /*************************************************
1362 nigel 49 * Usage function *
1363     *************************************************/
1364    
1365     static int
1366     usage(int rc)
1367     {
1368 nigel 87 option_item *op;
1369     fprintf(stderr, "Usage: pcregrep [-");
1370     for (op = optionlist; op->one_char != 0; op++)
1371     {
1372     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1373     }
1374     fprintf(stderr, "] [long options] [pattern] [files]\n");
1375 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1376     "options.\n");
1377 nigel 49 return rc;
1378     }
1379    
1380    
1381    
1382    
1383     /*************************************************
1384 nigel 53 * Help function *
1385     *************************************************/
1386    
1387     static void
1388     help(void)
1389     {
1390     option_item *op;
1391    
1392 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1393 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1394 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1395     printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1396 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1397    
1398     printf("Options:\n");
1399    
1400     for (op = optionlist; op->one_char != 0; op++)
1401     {
1402     int n;
1403     char s[4];
1404     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1405     printf(" %s --%s%n", s, op->long_name, &n);
1406     n = 30 - n;
1407     if (n < 1) n = 1;
1408     printf("%.*s%s\n", n, " ", op->help_text);
1409     }
1410    
1411 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1412     printf("trailing white space is removed and blank lines are ignored.\n");
1413     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1414 nigel 53
1415 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1416 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1417     }
1418    
1419    
1420    
1421    
1422     /*************************************************
1423 nigel 77 * Handle a single-letter, no data option *
1424 nigel 53 *************************************************/
1425    
1426     static int
1427     handle_option(int letter, int options)
1428     {
1429     switch(letter)
1430     {
1431 ph10 280 case N_FOFFSETS: file_offsets = TRUE; break;
1432 nigel 87 case N_HELP: help(); exit(0);
1433 ph10 280 case N_LOFFSETS: line_offsets = number = TRUE; break;
1434 nigel 53 case 'c': count_only = TRUE; break;
1435 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1436     case 'H': filenames = FN_FORCE; break;
1437     case 'h': filenames = FN_NONE; break;
1438 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1439 nigel 87 case 'l': filenames = FN_ONLY; break;
1440     case 'L': filenames = FN_NOMATCH_ONLY; break;
1441 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1442 nigel 53 case 'n': number = TRUE; break;
1443 nigel 87 case 'o': only_matching = TRUE; break;
1444 nigel 77 case 'q': quiet = TRUE; break;
1445 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1446 nigel 53 case 's': silent = TRUE; break;
1447 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1448 nigel 53 case 'v': invert = TRUE; break;
1449 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1450     case 'x': process_options |= PO_LINE_MATCH; break;
1451 nigel 53
1452     case 'V':
1453 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1454 nigel 53 exit(0);
1455     break;
1456    
1457     default:
1458     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1459     exit(usage(2));
1460     }
1461    
1462     return options;
1463     }
1464    
1465    
1466    
1467    
1468     /*************************************************
1469 nigel 87 * Construct printed ordinal *
1470     *************************************************/
1471    
1472     /* This turns a number into "1st", "3rd", etc. */
1473    
1474     static char *
1475     ordin(int n)
1476     {
1477     static char buffer[8];
1478     char *p = buffer;
1479     sprintf(p, "%d", n);
1480     while (*p != 0) p++;
1481     switch (n%10)
1482     {
1483     case 1: strcpy(p, "st"); break;
1484     case 2: strcpy(p, "nd"); break;
1485     case 3: strcpy(p, "rd"); break;
1486     default: strcpy(p, "th"); break;
1487     }
1488     return buffer;
1489     }
1490    
1491    
1492    
1493     /*************************************************
1494     * Compile a single pattern *
1495     *************************************************/
1496    
1497     /* When the -F option has been used, this is called for each substring.
1498     Otherwise it's called for each supplied pattern.
1499    
1500     Arguments:
1501     pattern the pattern string
1502     options the PCRE options
1503     filename the file name, or NULL for a command-line pattern
1504     count 0 if this is the only command line pattern, or
1505     number of the command line pattern, or
1506     linenumber for a pattern from a file
1507    
1508     Returns: TRUE on success, FALSE after an error
1509     */
1510    
1511     static BOOL
1512     compile_single_pattern(char *pattern, int options, char *filename, int count)
1513     {
1514     char buffer[MBUFTHIRD + 16];
1515     const char *error;
1516     int errptr;
1517    
1518     if (pattern_count >= MAX_PATTERN_COUNT)
1519     {
1520     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1521     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1522     return FALSE;
1523     }
1524    
1525     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1526     suffix[process_options]);
1527     pattern_list[pattern_count] =
1528     pcre_compile(buffer, options, &error, &errptr, pcretables);
1529 ph10 142 if (pattern_list[pattern_count] != NULL)
1530 ph10 141 {
1531 ph10 142 pattern_count++;
1532 ph10 141 return TRUE;
1533 ph10 142 }
1534 nigel 87
1535     /* Handle compile errors */
1536    
1537     errptr -= (int)strlen(prefix[process_options]);
1538     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1539    
1540     if (filename == NULL)
1541     {
1542     if (count == 0)
1543     fprintf(stderr, "pcregrep: Error in command-line regex "
1544     "at offset %d: %s\n", errptr, error);
1545     else
1546     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1547     "at offset %d: %s\n", ordin(count), errptr, error);
1548     }
1549     else
1550     {
1551     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1552     "at offset %d: %s\n", count, filename, errptr, error);
1553     }
1554    
1555     return FALSE;
1556     }
1557    
1558    
1559    
1560     /*************************************************
1561     * Compile one supplied pattern *
1562     *************************************************/
1563    
1564     /* When the -F option has been used, each string may be a list of strings,
1565 nigel 91 separated by line breaks. They will be matched literally.
1566 nigel 87
1567     Arguments:
1568     pattern the pattern string
1569     options the PCRE options
1570     filename the file name, or NULL for a command-line pattern
1571     count 0 if this is the only command line pattern, or
1572     number of the command line pattern, or
1573     linenumber for a pattern from a file
1574    
1575     Returns: TRUE on success, FALSE after an error
1576     */
1577    
1578     static BOOL
1579     compile_pattern(char *pattern, int options, char *filename, int count)
1580     {
1581     if ((process_options & PO_FIXED_STRINGS) != 0)
1582     {
1583 nigel 93 char *eop = pattern + strlen(pattern);
1584 nigel 87 char buffer[MBUFTHIRD];
1585     for(;;)
1586     {
1587 nigel 93 int ellength;
1588     char *p = end_of_line(pattern, eop, &ellength);
1589     if (ellength == 0)
1590 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1591 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1592 nigel 93 pattern = p;
1593 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1594     return FALSE;
1595     }
1596     }
1597     else return compile_single_pattern(pattern, options, filename, count);
1598     }
1599    
1600    
1601    
1602     /*************************************************
1603 nigel 49 * Main program *
1604     *************************************************/
1605    
1606 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1607    
1608 nigel 49 int
1609     main(int argc, char **argv)
1610     {
1611 nigel 53 int i, j;
1612 nigel 49 int rc = 1;
1613 nigel 87 int pcre_options = 0;
1614     int cmd_pattern_count = 0;
1615 ph10 141 int hint_count = 0;
1616 nigel 49 int errptr;
1617 nigel 87 BOOL only_one_at_top;
1618     char *patterns[MAX_PATTERN_COUNT];
1619     const char *locale_from = "--locale";
1620 nigel 49 const char *error;
1621    
1622 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1623     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1624     */
1625 nigel 91
1626     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1627     switch(i)
1628     {
1629     default: newline = (char *)"lf"; break;
1630     case '\r': newline = (char *)"cr"; break;
1631     case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1632 nigel 93 case -1: newline = (char *)"any"; break;
1633 ph10 150 case -2: newline = (char *)"anycrlf"; break;
1634 nigel 91 }
1635    
1636 nigel 49 /* Process the options */
1637    
1638     for (i = 1; i < argc; i++)
1639     {
1640 nigel 77 option_item *op = NULL;
1641     char *option_data = (char *)""; /* default to keep compiler happy */
1642     BOOL longop;
1643     BOOL longopwasequals = FALSE;
1644    
1645 nigel 49 if (argv[i][0] != '-') break;
1646 nigel 53
1647 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1648 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1649 nigel 63
1650 nigel 77 if (argv[i][1] == 0)
1651     {
1652 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1653 nigel 77 else exit(usage(2));
1654     }
1655 nigel 63
1656 nigel 77 /* Handle a long name option, or -- to terminate the options */
1657 nigel 53
1658     if (argv[i][1] == '-')
1659 nigel 49 {
1660 nigel 77 char *arg = argv[i] + 2;
1661     char *argequals = strchr(arg, '=');
1662 nigel 53
1663 nigel 77 if (*arg == 0) /* -- terminates options */
1664 nigel 49 {
1665 nigel 77 i++;
1666     break; /* out of the options-handling loop */
1667 nigel 53 }
1668 nigel 49
1669 nigel 77 longop = TRUE;
1670    
1671     /* Some long options have data that follows after =, for example file=name.
1672     Some options have variations in the long name spelling: specifically, we
1673     allow "regexp" because GNU grep allows it, though I personally go along
1674 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1675     These options are entered in the table as "regex(p)". No option is in both
1676     these categories, fortunately. */
1677 nigel 77
1678 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1679     {
1680 nigel 77 char *opbra = strchr(op->long_name, '(');
1681     char *equals = strchr(op->long_name, '=');
1682     if (opbra == NULL) /* Not a (p) case */
1683 nigel 53 {
1684 nigel 77 if (equals == NULL) /* Not thing=data case */
1685     {
1686     if (strcmp(arg, op->long_name) == 0) break;
1687     }
1688     else /* Special case xxx=data */
1689     {
1690     int oplen = equals - op->long_name;
1691 ph10 199 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1692 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1693     {
1694     option_data = arg + arglen;
1695     if (*option_data == '=')
1696     {
1697     option_data++;
1698     longopwasequals = TRUE;
1699     }
1700     break;
1701     }
1702     }
1703 nigel 53 }
1704 nigel 77 else /* Special case xxxx(p) */
1705     {
1706     char buff1[24];
1707     char buff2[24];
1708     int baselen = opbra - op->long_name;
1709     sprintf(buff1, "%.*s", baselen, op->long_name);
1710 ph10 152 sprintf(buff2, "%s%.*s", buff1,
1711 ph10 151 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1712 nigel 77 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1713     break;
1714     }
1715 nigel 53 }
1716 nigel 77
1717 nigel 53 if (op->one_char == 0)
1718     {
1719     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1720     exit(usage(2));
1721     }
1722     }
1723 nigel 49
1724 nigel 89
1725     /* Jeffrey Friedl's debugging harness uses these additional options which
1726     are not in the right form for putting in the option table because they use
1727     only one hyphen, yet are more than one character long. By putting them
1728     separately here, they will not get displayed as part of the help() output,
1729     but I don't think Jeffrey will care about that. */
1730    
1731     #ifdef JFRIEDL_DEBUG
1732     else if (strcmp(argv[i], "-pre") == 0) {
1733     jfriedl_prefix = argv[++i];
1734     continue;
1735     } else if (strcmp(argv[i], "-post") == 0) {
1736     jfriedl_postfix = argv[++i];
1737     continue;
1738     } else if (strcmp(argv[i], "-XT") == 0) {
1739     sscanf(argv[++i], "%d", &jfriedl_XT);
1740     continue;
1741     } else if (strcmp(argv[i], "-XR") == 0) {
1742     sscanf(argv[++i], "%d", &jfriedl_XR);
1743     continue;
1744     }
1745     #endif
1746    
1747    
1748 nigel 77 /* One-char options; many that have no data may be in a single argument; we
1749     continue till we hit the last one or one that needs data. */
1750 nigel 53
1751     else
1752     {
1753     char *s = argv[i] + 1;
1754 nigel 77 longop = FALSE;
1755 nigel 53 while (*s != 0)
1756     {
1757 nigel 77 for (op = optionlist; op->one_char != 0; op++)
1758     { if (*s == op->one_char) break; }
1759     if (op->one_char == 0)
1760 nigel 53 {
1761 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1762     *s, argv[i]);
1763     exit(usage(2));
1764     }
1765     if (op->type != OP_NODATA || s[1] == 0)
1766     {
1767     option_data = s+1;
1768 nigel 53 break;
1769     }
1770 nigel 87 pcre_options = handle_option(*s++, pcre_options);
1771 nigel 49 }
1772     }
1773 nigel 77
1774 nigel 87 /* At this point we should have op pointing to a matched option. If the type
1775     is NO_DATA, it means that there is no data, and the option might set
1776     something in the PCRE options. */
1777 nigel 77
1778     if (op->type == OP_NODATA)
1779     {
1780 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
1781     continue;
1782     }
1783    
1784     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1785     either has a value or defaults to something. It cannot have data in a
1786     separate item. At the moment, the only such options are "colo(u)r" and
1787 nigel 89 Jeffrey Friedl's special -S debugging option. */
1788 nigel 87
1789     if (*option_data == 0 &&
1790     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1791     {
1792     switch (op->one_char)
1793 nigel 77 {
1794 nigel 87 case N_COLOUR:
1795     colour_option = (char *)"auto";
1796     break;
1797     #ifdef JFRIEDL_DEBUG
1798     case 'S':
1799     S_arg = 0;
1800     break;
1801     #endif
1802 nigel 77 }
1803 nigel 87 continue;
1804     }
1805 nigel 77
1806 nigel 87 /* Otherwise, find the data string for the option. */
1807    
1808     if (*option_data == 0)
1809     {
1810     if (i >= argc - 1 || longopwasequals)
1811 nigel 77 {
1812 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1813     exit(usage(2));
1814     }
1815     option_data = argv[++i];
1816     }
1817    
1818     /* If the option type is OP_PATLIST, it's the -e option, which can be called
1819     multiple times to create a list of patterns. */
1820    
1821     if (op->type == OP_PATLIST)
1822     {
1823     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1824     {
1825     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1826     MAX_PATTERN_COUNT);
1827     return 2;
1828     }
1829     patterns[cmd_pattern_count++] = option_data;
1830     }
1831    
1832     /* Otherwise, deal with single string or numeric data values. */
1833    
1834     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1835     {
1836     *((char **)op->dataptr) = option_data;
1837     }
1838     else
1839     {
1840     char *endptr;
1841     int n = strtoul(option_data, &endptr, 10);
1842     if (*endptr != 0)
1843     {
1844     if (longop)
1845 nigel 77 {
1846 nigel 87 char *equals = strchr(op->long_name, '=');
1847     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1848     equals - op->long_name;
1849     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1850     option_data, nlen, op->long_name);
1851 nigel 77 }
1852 nigel 87 else
1853     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1854     option_data, op->one_char);
1855     exit(usage(2));
1856 nigel 77 }
1857 nigel 87 *((int *)op->dataptr) = n;
1858 nigel 77 }
1859 nigel 49 }
1860    
1861 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
1862     for -A and -B. */
1863    
1864     if (both_context > 0)
1865     {
1866     if (after_context == 0) after_context = both_context;
1867     if (before_context == 0) before_context = both_context;
1868     }
1869 ph10 280
1870     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
1871     However, the latter two set the only_matching flag. */
1872 nigel 77
1873 ph10 280 if ((only_matching && (file_offsets || line_offsets)) ||
1874     (file_offsets && line_offsets))
1875     {
1876     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
1877     "and/or --line-offsets\n");
1878     exit(usage(2));
1879     }
1880    
1881     if (file_offsets || line_offsets) only_matching = TRUE;
1882    
1883 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1884     LC_ALL environment variable is set, and if so, use it. */
1885 nigel 49
1886 nigel 87 if (locale == NULL)
1887 nigel 53 {
1888 nigel 87 locale = getenv("LC_ALL");
1889     locale_from = "LCC_ALL";
1890 nigel 53 }
1891 nigel 49
1892 nigel 87 if (locale == NULL)
1893     {
1894     locale = getenv("LC_CTYPE");
1895     locale_from = "LC_CTYPE";
1896     }
1897 nigel 49
1898 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
1899     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1900    
1901     if (locale != NULL)
1902 nigel 49 {
1903 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
1904 nigel 53 {
1905 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1906     locale, locale_from);
1907 nigel 53 return 2;
1908     }
1909 nigel 87 pcretables = pcre_maketables();
1910     }
1911 nigel 77
1912 nigel 87 /* Sort out colouring */
1913    
1914     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1915     {
1916     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1917     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1918     else
1919 nigel 53 {
1920 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1921     colour_option);
1922     return 2;
1923 nigel 77 }
1924 nigel 87 if (do_colour)
1925 nigel 77 {
1926 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
1927     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1928     if (cs != NULL) colour_string = cs;
1929 nigel 77 }
1930 nigel 87 }
1931 nigel 77
1932 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
1933    
1934     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1935     {
1936     pcre_options |= PCRE_NEWLINE_CR;
1937 nigel 93 endlinetype = EL_CR;
1938 nigel 91 }
1939     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1940     {
1941     pcre_options |= PCRE_NEWLINE_LF;
1942 nigel 93 endlinetype = EL_LF;
1943 nigel 91 }
1944     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1945     {
1946     pcre_options |= PCRE_NEWLINE_CRLF;
1947 nigel 93 endlinetype = EL_CRLF;
1948 nigel 91 }
1949 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1950     {
1951     pcre_options |= PCRE_NEWLINE_ANY;
1952     endlinetype = EL_ANY;
1953     }
1954 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1955     {
1956     pcre_options |= PCRE_NEWLINE_ANYCRLF;
1957     endlinetype = EL_ANYCRLF;
1958     }
1959 nigel 91 else
1960     {
1961     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1962     return 2;
1963     }
1964    
1965 nigel 87 /* Interpret the text values for -d and -D */
1966    
1967     if (dee_option != NULL)
1968     {
1969     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1970     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1971     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1972     else
1973 nigel 77 {
1974 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1975     return 2;
1976 nigel 53 }
1977 nigel 49 }
1978    
1979 nigel 87 if (DEE_option != NULL)
1980     {
1981     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1982     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1983     else
1984     {
1985     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1986     return 2;
1987     }
1988     }
1989 nigel 49
1990 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
1991 nigel 87
1992     #ifdef JFRIEDL_DEBUG
1993     if (S_arg > 9)
1994 nigel 49 {
1995 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
1996     return 2;
1997     }
1998 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1999     {
2000     if (jfriedl_XT == 0) jfriedl_XT = 1;
2001     if (jfriedl_XR == 0) jfriedl_XR = 1;
2002     }
2003 nigel 87 #endif
2004 nigel 77
2005 nigel 87 /* Get memory to store the pattern and hints lists. */
2006    
2007     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2008     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2009    
2010     if (pattern_list == NULL || hints_list == NULL)
2011     {
2012     fprintf(stderr, "pcregrep: malloc failed\n");
2013 ph10 123 goto EXIT2;
2014 nigel 87 }
2015    
2016     /* If no patterns were provided by -e, and there is no file provided by -f,
2017     the first argument is the one and only pattern, and it must exist. */
2018    
2019     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2020     {
2021 nigel 63 if (i >= argc) return usage(2);
2022 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2023     }
2024 nigel 77
2025 nigel 87 /* Compile the patterns that were provided on the command line, either by
2026     multiple uses of -e or as a single unkeyed pattern. */
2027    
2028     for (j = 0; j < cmd_pattern_count; j++)
2029     {
2030     if (!compile_pattern(patterns[j], pcre_options, NULL,
2031     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2032 ph10 123 goto EXIT2;
2033 nigel 87 }
2034    
2035     /* Compile the regular expressions that are provided in a file. */
2036    
2037     if (pattern_filename != NULL)
2038     {
2039     int linenumber = 0;
2040     FILE *f;
2041     char *filename;
2042     char buffer[MBUFTHIRD];
2043    
2044     if (strcmp(pattern_filename, "-") == 0)
2045 nigel 77 {
2046 nigel 87 f = stdin;
2047     filename = stdin_name;
2048 nigel 77 }
2049 nigel 87 else
2050 nigel 77 {
2051 nigel 87 f = fopen(pattern_filename, "r");
2052     if (f == NULL)
2053     {
2054     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2055     strerror(errno));
2056 ph10 123 goto EXIT2;
2057 nigel 87 }
2058     filename = pattern_filename;
2059 nigel 77 }
2060    
2061 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2062 nigel 53 {
2063 nigel 87 char *s = buffer + (int)strlen(buffer);
2064     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2065     *s = 0;
2066     linenumber++;
2067     if (buffer[0] == 0) continue; /* Skip blank lines */
2068     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2069 ph10 121 goto EXIT2;
2070 nigel 53 }
2071 nigel 87
2072     if (f != stdin) fclose(f);
2073 nigel 49 }
2074    
2075 nigel 77 /* Study the regular expressions, as we will be running them many times */
2076 nigel 53
2077     for (j = 0; j < pattern_count; j++)
2078     {
2079     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2080     if (error != NULL)
2081     {
2082     char s[16];
2083     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2084     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2085 ph10 121 goto EXIT2;
2086 nigel 53 }
2087 ph10 142 hint_count++;
2088 nigel 53 }
2089    
2090 nigel 77 /* If there are include or exclude patterns, compile them. */
2091    
2092     if (exclude_pattern != NULL)
2093     {
2094 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2095     pcretables);
2096 nigel 77 if (exclude_compiled == NULL)
2097     {
2098     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2099     errptr, error);
2100 ph10 121 goto EXIT2;
2101 nigel 77 }
2102     }
2103    
2104     if (include_pattern != NULL)
2105     {
2106 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2107     pcretables);
2108 nigel 77 if (include_compiled == NULL)
2109     {
2110     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2111     errptr, error);
2112 ph10 121 goto EXIT2;
2113 nigel 77 }
2114     }
2115    
2116 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2117 nigel 49
2118 nigel 87 if (i >= argc)
2119 ph10 121 {
2120     rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2121     goto EXIT;
2122 ph10 123 }
2123 nigel 49
2124 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2125     Pass in the fact that there is only one argument at top level - this suppresses
2126 nigel 87 the file name if the argument is not a directory and filenames are not
2127     otherwise forced. */
2128 nigel 49
2129 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2130 nigel 49
2131     for (; i < argc; i++)
2132     {
2133 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2134     only_one_at_top);
2135 nigel 77 if (frc > 1) rc = frc;
2136     else if (frc == 0 && rc == 1) rc = 0;
2137 nigel 49 }
2138    
2139 ph10 121 EXIT:
2140     if (pattern_list != NULL)
2141     {
2142 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2143 ph10 121 free(pattern_list);
2144 ph10 123 }
2145 ph10 121 if (hints_list != NULL)
2146     {
2147 ph10 141 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2148 ph10 121 free(hints_list);
2149 ph10 123 }
2150 nigel 49 return rc;
2151 ph10 121
2152     EXIT2:
2153     rc = 2;
2154     goto EXIT;
2155 nigel 49 }
2156    
2157 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12