/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 199 - (hide annotations) (download)
Tue Jul 31 14:39:09 2007 UTC (7 years ago) by ph10
File MIME type: text/plain
File size: 58796 byte(s)
Daniel's patch for config.h and Windows DLL declarations (not fully working).

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 117 Copyright (c) 1997-2007 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 199 #include <config.h>
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 137 #include <pcre.h>
59 nigel 49
60     #define FALSE 0
61     #define TRUE 1
62    
63     typedef int BOOL;
64    
65 nigel 53 #define MAX_PATTERN_COUNT 100
66 nigel 49
67 nigel 77 #if BUFSIZ > 8192
68     #define MBUFTHIRD BUFSIZ
69     #else
70     #define MBUFTHIRD 8192
71     #endif
72 nigel 49
73 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
74     output. The order is important; it is assumed that a file name is wanted for
75     all values greater than FN_DEFAULT. */
76 nigel 77
77 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78    
79     /* Actions for the -d and -D options */
80    
81     enum { dee_READ, dee_SKIP, dee_RECURSE };
82     enum { DEE_READ, DEE_SKIP };
83    
84     /* Actions for special processing options (flag bits) */
85    
86     #define PO_WORD_MATCH 0x0001
87     #define PO_LINE_MATCH 0x0002
88     #define PO_FIXED_STRINGS 0x0004
89    
90 nigel 93 /* Line ending types */
91 nigel 87
92 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93 nigel 87
94 nigel 93
95    
96 nigel 49 /*************************************************
97     * Global variables *
98     *************************************************/
99    
100 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
101     regular code. */
102    
103     #ifdef JFRIEDL_DEBUG
104     static int S_arg = -1;
105 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107     static const char *jfriedl_prefix = "";
108     static const char *jfriedl_postfix = "";
109 nigel 87 #endif
110    
111 nigel 93 static int endlinetype;
112 nigel 91
113 nigel 87 static char *colour_string = (char *)"1;31";
114     static char *colour_option = NULL;
115     static char *dee_option = NULL;
116     static char *DEE_option = NULL;
117 nigel 91 static char *newline = NULL;
118 nigel 53 static char *pattern_filename = NULL;
119 nigel 77 static char *stdin_name = (char *)"(standard input)";
120 nigel 87 static char *locale = NULL;
121    
122     static const unsigned char *pcretables = NULL;
123    
124 nigel 53 static int pattern_count = 0;
125 ph10 121 static pcre **pattern_list = NULL;
126     static pcre_extra **hints_list = NULL;
127 nigel 49
128 nigel 77 static char *include_pattern = NULL;
129     static char *exclude_pattern = NULL;
130    
131     static pcre *include_compiled = NULL;
132     static pcre *exclude_compiled = NULL;
133    
134     static int after_context = 0;
135     static int before_context = 0;
136     static int both_context = 0;
137 nigel 87 static int dee_action = dee_READ;
138     static int DEE_action = DEE_READ;
139     static int error_count = 0;
140     static int filenames = FN_DEFAULT;
141     static int process_options = 0;
142 nigel 77
143 nigel 49 static BOOL count_only = FALSE;
144 nigel 87 static BOOL do_colour = FALSE;
145 nigel 77 static BOOL hyphenpending = FALSE;
146 nigel 49 static BOOL invert = FALSE;
147 nigel 77 static BOOL multiline = FALSE;
148 nigel 49 static BOOL number = FALSE;
149 nigel 87 static BOOL only_matching = FALSE;
150 nigel 77 static BOOL quiet = FALSE;
151 nigel 49 static BOOL silent = FALSE;
152 nigel 93 static BOOL utf8 = FALSE;
153 nigel 49
154 nigel 53 /* Structure for options and list of them */
155 nigel 49
156 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
157     OP_PATLIST };
158 nigel 77
159 nigel 53 typedef struct option_item {
160 nigel 77 int type;
161 nigel 53 int one_char;
162 nigel 77 void *dataptr;
163 nigel 67 const char *long_name;
164     const char *help_text;
165 nigel 53 } option_item;
166 nigel 49
167 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
168     used to identify them. */
169    
170     #define N_COLOUR (-1)
171     #define N_EXCLUDE (-2)
172     #define N_HELP (-3)
173     #define N_INCLUDE (-4)
174     #define N_LABEL (-5)
175     #define N_LOCALE (-6)
176     #define N_NULL (-7)
177    
178 nigel 53 static option_item optionlist[] = {
179 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
180     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
181     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
182     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
183     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
184     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
185     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
186     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
187     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
188     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
189     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
190     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
191     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
192     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
193     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
194     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
195     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
196     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
197     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
198     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
199     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
200 ph10 149 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
202     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
203     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
204     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
205     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
206     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
207     #ifdef JFRIEDL_DEBUG
208     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
209     #endif
210     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
211     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
212     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
213     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
214     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
215     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
216     { OP_NODATA, 0, NULL, NULL, NULL }
217 nigel 53 };
218    
219 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
220     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
221     that the combination of -w and -x has the same effect as -x on its own, so we
222     can treat them as the same. */
223 nigel 53
224 nigel 87 static const char *prefix[] = {
225     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
226    
227     static const char *suffix[] = {
228     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
229    
230 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
231 nigel 87
232 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233 nigel 87
234 nigel 93 const char utf8_table4[] = {
235     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
237     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
238     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
239    
240    
241    
242 nigel 53 /*************************************************
243 nigel 87 * OS-specific functions *
244 nigel 53 *************************************************/
245    
246     /* These functions are defined so that they can be made system specific,
247 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
248 nigel 53
249    
250     /************* Directory scanning in Unix ***********/
251    
252 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253 nigel 53 #include <sys/types.h>
254     #include <sys/stat.h>
255     #include <dirent.h>
256    
257     typedef DIR directory_type;
258    
259 nigel 67 static int
260 nigel 53 isdirectory(char *filename)
261     {
262     struct stat statbuf;
263     if (stat(filename, &statbuf) < 0)
264     return 0; /* In the expectation that opening as a file will fail */
265     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
266     }
267    
268 nigel 67 static directory_type *
269 nigel 53 opendirectory(char *filename)
270     {
271     return opendir(filename);
272     }
273    
274 nigel 67 static char *
275 nigel 53 readdirectory(directory_type *dir)
276     {
277     for (;;)
278     {
279     struct dirent *dent = readdir(dir);
280     if (dent == NULL) return NULL;
281     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282     return dent->d_name;
283     }
284 ph10 151 /* Control never reaches here */
285 nigel 53 }
286    
287 nigel 67 static void
288 nigel 53 closedirectory(directory_type *dir)
289     {
290     closedir(dir);
291     }
292    
293    
294 nigel 87 /************* Test for regular file in Unix **********/
295    
296     static int
297     isregfile(char *filename)
298     {
299     struct stat statbuf;
300     if (stat(filename, &statbuf) < 0)
301     return 1; /* In the expectation that opening as a file will fail */
302     return (statbuf.st_mode & S_IFMT) == S_IFREG;
303     }
304    
305    
306     /************* Test stdout for being a terminal in Unix **********/
307    
308     static BOOL
309     is_stdout_tty(void)
310     {
311     return isatty(fileno(stdout));
312     }
313    
314    
315 nigel 63 /************* Directory scanning in Win32 ***********/
316 nigel 53
317 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
318 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
319     when it did not exist. */
320 nigel 53
321 nigel 63
322 ph10 97 #elif HAVE_WINDOWS_H
323 nigel 63
324     #ifndef STRICT
325     # define STRICT
326     #endif
327     #ifndef WIN32_LEAN_AND_MEAN
328     # define WIN32_LEAN_AND_MEAN
329     #endif
330 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
331     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
332     #endif
333    
334 nigel 63 #include <windows.h>
335    
336     typedef struct directory_type
337     {
338     HANDLE handle;
339     BOOL first;
340     WIN32_FIND_DATA data;
341     } directory_type;
342    
343     int
344     isdirectory(char *filename)
345     {
346     DWORD attr = GetFileAttributes(filename);
347     if (attr == INVALID_FILE_ATTRIBUTES)
348     return 0;
349     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
350     }
351    
352     directory_type *
353     opendirectory(char *filename)
354     {
355     size_t len;
356     char *pattern;
357     directory_type *dir;
358     DWORD err;
359     len = strlen(filename);
360     pattern = (char *) malloc(len + 3);
361     dir = (directory_type *) malloc(sizeof(*dir));
362     if ((pattern == NULL) || (dir == NULL))
363     {
364     fprintf(stderr, "pcregrep: malloc failed\n");
365     exit(2);
366     }
367     memcpy(pattern, filename, len);
368     memcpy(&(pattern[len]), "\\*", 3);
369     dir->handle = FindFirstFile(pattern, &(dir->data));
370     if (dir->handle != INVALID_HANDLE_VALUE)
371     {
372     free(pattern);
373     dir->first = TRUE;
374     return dir;
375     }
376     err = GetLastError();
377     free(pattern);
378     free(dir);
379     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
380     return NULL;
381     }
382    
383     char *
384     readdirectory(directory_type *dir)
385     {
386     for (;;)
387     {
388     if (!dir->first)
389     {
390     if (!FindNextFile(dir->handle, &(dir->data)))
391     return NULL;
392     }
393     else
394     {
395     dir->first = FALSE;
396     }
397     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
398     return dir->data.cFileName;
399     }
400     #ifndef _MSC_VER
401     return NULL; /* Keep compiler happy; never executed */
402     #endif
403     }
404    
405     void
406     closedirectory(directory_type *dir)
407     {
408     FindClose(dir->handle);
409     free(dir);
410     }
411    
412    
413 nigel 87 /************* Test for regular file in Win32 **********/
414    
415     /* I don't know how to do this, or if it can be done; assume all paths are
416     regular if they are not directories. */
417    
418     int isregfile(char *filename)
419     {
420     return !isdirectory(filename)
421     }
422    
423    
424     /************* Test stdout for being a terminal in Win32 **********/
425    
426     /* I don't know how to do this; assume never */
427    
428     static BOOL
429     is_stdout_tty(void)
430     {
431     FALSE;
432     }
433    
434    
435 nigel 53 /************* Directory scanning when we can't do it ***********/
436    
437     /* The type is void, and apart from isdirectory(), the functions do nothing. */
438    
439 nigel 63 #else
440    
441 nigel 53 typedef void directory_type;
442    
443 nigel 87 int isdirectory(char *filename) { return 0; }
444 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445     char *readdirectory(directory_type *dir) { return (char*)0;}
446 nigel 53 void closedirectory(directory_type *dir) {}
447    
448 nigel 87
449     /************* Test for regular when we can't do it **********/
450    
451     /* Assume all files are regular. */
452    
453     int isregfile(char *filename) { return 1; }
454    
455    
456     /************* Test stdout for being a terminal when we can't do it **********/
457    
458     static BOOL
459     is_stdout_tty(void)
460     {
461     return FALSE;
462     }
463    
464    
465 nigel 53 #endif
466    
467    
468    
469 ph10 137 #ifndef HAVE_STRERROR
470 nigel 49 /*************************************************
471     * Provide strerror() for non-ANSI libraries *
472     *************************************************/
473    
474     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
475     in their libraries, but can provide the same facility by this simple
476     alternative function. */
477    
478     extern int sys_nerr;
479     extern char *sys_errlist[];
480    
481     char *
482     strerror(int n)
483     {
484     if (n < 0 || n >= sys_nerr) return "unknown error number";
485     return sys_errlist[n];
486     }
487     #endif /* HAVE_STRERROR */
488    
489    
490    
491     /*************************************************
492 nigel 93 * Find end of line *
493     *************************************************/
494    
495     /* The length of the endline sequence that is found is set via lenptr. This may
496     be zero at the very end of the file if there is no line-ending sequence there.
497    
498     Arguments:
499     p current position in line
500     endptr end of available data
501     lenptr where to put the length of the eol sequence
502    
503     Returns: pointer to the last byte of the line
504     */
505    
506     static char *
507     end_of_line(char *p, char *endptr, int *lenptr)
508     {
509     switch(endlinetype)
510     {
511     default: /* Just in case */
512     case EL_LF:
513     while (p < endptr && *p != '\n') p++;
514     if (p < endptr)
515     {
516     *lenptr = 1;
517     return p + 1;
518     }
519     *lenptr = 0;
520     return endptr;
521    
522     case EL_CR:
523     while (p < endptr && *p != '\r') p++;
524     if (p < endptr)
525     {
526     *lenptr = 1;
527     return p + 1;
528     }
529     *lenptr = 0;
530     return endptr;
531    
532     case EL_CRLF:
533     for (;;)
534     {
535     while (p < endptr && *p != '\r') p++;
536     if (++p >= endptr)
537     {
538     *lenptr = 0;
539     return endptr;
540     }
541     if (*p == '\n')
542     {
543     *lenptr = 2;
544     return p + 1;
545     }
546     }
547     break;
548    
549 ph10 149 case EL_ANYCRLF:
550     while (p < endptr)
551     {
552     int extra = 0;
553     register int c = *((unsigned char *)p);
554    
555     if (utf8 && c >= 0xc0)
556     {
557     int gcii, gcss;
558     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
559     gcss = 6*extra;
560     c = (c & utf8_table3[extra]) << gcss;
561     for (gcii = 1; gcii <= extra; gcii++)
562     {
563     gcss -= 6;
564     c |= (p[gcii] & 0x3f) << gcss;
565     }
566     }
567    
568     p += 1 + extra;
569    
570     switch (c)
571     {
572     case 0x0a: /* LF */
573     *lenptr = 1;
574     return p;
575    
576     case 0x0d: /* CR */
577     if (p < endptr && *p == 0x0a)
578     {
579     *lenptr = 2;
580     p++;
581     }
582     else *lenptr = 1;
583     return p;
584 ph10 150
585 ph10 149 default:
586     break;
587     }
588     } /* End of loop for ANYCRLF case */
589 ph10 150
590 ph10 149 *lenptr = 0; /* Must have hit the end */
591     return endptr;
592    
593 nigel 93 case EL_ANY:
594     while (p < endptr)
595     {
596     int extra = 0;
597     register int c = *((unsigned char *)p);
598    
599     if (utf8 && c >= 0xc0)
600     {
601     int gcii, gcss;
602     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
603     gcss = 6*extra;
604     c = (c & utf8_table3[extra]) << gcss;
605     for (gcii = 1; gcii <= extra; gcii++)
606     {
607     gcss -= 6;
608     c |= (p[gcii] & 0x3f) << gcss;
609     }
610     }
611    
612     p += 1 + extra;
613    
614     switch (c)
615     {
616     case 0x0a: /* LF */
617     case 0x0b: /* VT */
618     case 0x0c: /* FF */
619     *lenptr = 1;
620     return p;
621    
622     case 0x0d: /* CR */
623     if (p < endptr && *p == 0x0a)
624     {
625     *lenptr = 2;
626     p++;
627     }
628     else *lenptr = 1;
629     return p;
630    
631     case 0x85: /* NEL */
632     *lenptr = utf8? 2 : 1;
633     return p;
634    
635     case 0x2028: /* LS */
636     case 0x2029: /* PS */
637     *lenptr = 3;
638     return p;
639    
640     default:
641     break;
642     }
643     } /* End of loop for ANY case */
644    
645     *lenptr = 0; /* Must have hit the end */
646     return endptr;
647     } /* End of overall switch */
648     }
649    
650    
651    
652     /*************************************************
653     * Find start of previous line *
654     *************************************************/
655    
656     /* This is called when looking back for before lines to print.
657    
658     Arguments:
659     p start of the subsequent line
660     startptr start of available data
661    
662     Returns: pointer to the start of the previous line
663     */
664    
665     static char *
666     previous_line(char *p, char *startptr)
667     {
668     switch(endlinetype)
669     {
670     default: /* Just in case */
671     case EL_LF:
672     p--;
673     while (p > startptr && p[-1] != '\n') p--;
674     return p;
675    
676     case EL_CR:
677     p--;
678     while (p > startptr && p[-1] != '\n') p--;
679     return p;
680    
681     case EL_CRLF:
682     for (;;)
683     {
684     p -= 2;
685     while (p > startptr && p[-1] != '\n') p--;
686     if (p <= startptr + 1 || p[-2] == '\r') return p;
687     }
688     return p; /* But control should never get here */
689    
690     case EL_ANY:
691 ph10 150 case EL_ANYCRLF:
692 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693     if (utf8) while ((*p & 0xc0) == 0x80) p--;
694    
695     while (p > startptr)
696     {
697     register int c;
698     char *pp = p - 1;
699    
700     if (utf8)
701     {
702     int extra = 0;
703     while ((*pp & 0xc0) == 0x80) pp--;
704     c = *((unsigned char *)pp);
705     if (c >= 0xc0)
706     {
707     int gcii, gcss;
708     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
709     gcss = 6*extra;
710     c = (c & utf8_table3[extra]) << gcss;
711     for (gcii = 1; gcii <= extra; gcii++)
712     {
713     gcss -= 6;
714     c |= (pp[gcii] & 0x3f) << gcss;
715     }
716     }
717     }
718     else c = *((unsigned char *)pp);
719    
720 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
721 nigel 93 {
722     case 0x0a: /* LF */
723 ph10 149 case 0x0d: /* CR */
724     return p;
725 ph10 150
726 ph10 149 default:
727     break;
728 ph10 150 }
729 ph10 149
730     else switch (c)
731     {
732     case 0x0a: /* LF */
733 nigel 93 case 0x0b: /* VT */
734     case 0x0c: /* FF */
735     case 0x0d: /* CR */
736     case 0x85: /* NEL */
737     case 0x2028: /* LS */
738     case 0x2029: /* PS */
739     return p;
740    
741     default:
742     break;
743     }
744    
745     p = pp; /* Back one character */
746     } /* End of loop for ANY case */
747    
748     return startptr; /* Hit start of data */
749     } /* End of overall switch */
750     }
751    
752    
753    
754    
755    
756     /*************************************************
757 nigel 77 * Print the previous "after" lines *
758 nigel 49 *************************************************/
759    
760 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
761 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
762     that a binary zero does not terminate it.
763 nigel 77
764     Arguments:
765     lastmatchnumber the number of the last matching line, plus one
766     lastmatchrestart where we restarted after the last match
767     endptr end of available data
768     printname filename for printing
769    
770     Returns: nothing
771     */
772    
773     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
774     char *endptr, char *printname)
775     {
776     if (after_context > 0 && lastmatchnumber > 0)
777     {
778     int count = 0;
779     while (lastmatchrestart < endptr && count++ < after_context)
780     {
781 nigel 93 int ellength;
782 nigel 77 char *pp = lastmatchrestart;
783     if (printname != NULL) fprintf(stdout, "%s-", printname);
784     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
785 nigel 93 pp = end_of_line(pp, endptr, &ellength);
786     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
787     lastmatchrestart = pp;
788 nigel 77 }
789     hyphenpending = TRUE;
790     }
791     }
792    
793    
794    
795     /*************************************************
796     * Grep an individual file *
797     *************************************************/
798    
799     /* This is called from grep_or_recurse() below. It uses a buffer that is three
800     times the value of MBUFTHIRD. The matching point is never allowed to stray into
801     the top third of the buffer, thus keeping more of the file available for
802     context printing or for multiline scanning. For large files, the pointer will
803     be in the middle third most of the time, so the bottom third is available for
804     "before" context printing.
805    
806     Arguments:
807     in the fopened FILE stream
808     printname the file name if it is to be printed for each match
809     or NULL if the file name is not to be printed
810     it cannot be NULL if filenames[_nomatch]_only is set
811    
812     Returns: 0 if there was at least one match
813     1 otherwise (no matches)
814     */
815    
816 nigel 49 static int
817 nigel 77 pcregrep(FILE *in, char *printname)
818 nigel 49 {
819     int rc = 1;
820 nigel 77 int linenumber = 1;
821     int lastmatchnumber = 0;
822 nigel 49 int count = 0;
823     int offsets[99];
824 nigel 77 char *lastmatchrestart = NULL;
825     char buffer[3*MBUFTHIRD];
826     char *ptr = buffer;
827     char *endptr;
828     size_t bufflength;
829     BOOL endhyphenpending = FALSE;
830 nigel 49
831 nigel 77 /* Do the first read into the start of the buffer and set up the pointer to
832     end of what we have. */
833    
834     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
835     endptr = buffer + bufflength;
836    
837     /* Loop while the current pointer is not at the end of the file. For large
838     files, endptr will be at the end of the buffer when we are in the middle of the
839     file, but ptr will never get there, because as soon as it gets over 2/3 of the
840     way, the buffer is shifted left and re-filled. */
841    
842     while (ptr < endptr)
843 nigel 49 {
844 nigel 93 int i, endlinelength;
845 nigel 87 int mrc = 0;
846 nigel 53 BOOL match = FALSE;
847 nigel 77 char *t = ptr;
848     size_t length, linelength;
849 nigel 49
850 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
851     of the subject string to pass to pcre_exec(). In multiline mode, it is the
852     length remainder of the data in the buffer. Otherwise, it is the length of
853     the next line. After matching, we always advance by the length of the next
854     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
855     that any match is constrained to be in the first line. */
856    
857 nigel 93 t = end_of_line(t, endptr, &endlinelength);
858     linelength = t - ptr - endlinelength;
859 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
860 nigel 77
861 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
862    
863     #ifdef JFRIEDL_DEBUG
864     if (jfriedl_XT || jfriedl_XR)
865     {
866     #include <sys/time.h>
867     #include <time.h>
868     struct timeval start_time, end_time;
869     struct timezone dummy;
870    
871     if (jfriedl_XT)
872     {
873     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
874     const char *orig = ptr;
875     ptr = malloc(newlen + 1);
876     if (!ptr) {
877     printf("out of memory");
878     exit(2);
879     }
880     endptr = ptr;
881     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
882     for (i = 0; i < jfriedl_XT; i++) {
883     strncpy(endptr, orig, length);
884     endptr += length;
885     }
886     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
887     length = newlen;
888     }
889    
890     if (gettimeofday(&start_time, &dummy) != 0)
891     perror("bad gettimeofday");
892    
893    
894     for (i = 0; i < jfriedl_XR; i++)
895     match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
896    
897     if (gettimeofday(&end_time, &dummy) != 0)
898     perror("bad gettimeofday");
899    
900     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
901     -
902     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
903    
904     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
905     return 0;
906     }
907     #endif
908    
909    
910 nigel 77 /* Run through all the patterns until one matches. Note that we don't include
911     the final newline in the subject string. */
912    
913 nigel 87 for (i = 0; i < pattern_count; i++)
914 nigel 53 {
915 nigel 87 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
916     offsets, 99);
917     if (mrc >= 0) { match = TRUE; break; }
918     if (mrc != PCRE_ERROR_NOMATCH)
919     {
920     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
921     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
922     fprintf(stderr, "this line:\n");
923     fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
924     fprintf(stderr, "\n");
925     if (error_count == 0 &&
926     (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
927     {
928     fprintf(stderr, "pcregrep: error %d means that a resource limit "
929     "was exceeded\n", mrc);
930     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
931     }
932     if (error_count++ > 20)
933     {
934     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
935     exit(2);
936     }
937     match = invert; /* No more matching; don't show the line again */
938     break;
939     }
940 nigel 53 }
941 nigel 49
942 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
943 nigel 77
944 nigel 49 if (match != invert)
945     {
946 nigel 77 BOOL hyphenprinted = FALSE;
947    
948 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
949 nigel 77
950 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
951    
952     /* Just count if just counting is wanted. */
953    
954 nigel 49 if (count_only) count++;
955    
956 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
957     in the file. */
958    
959     else if (filenames == FN_ONLY)
960 nigel 49 {
961 nigel 77 fprintf(stdout, "%s\n", printname);
962 nigel 49 return 0;
963     }
964    
965 nigel 87 /* Likewise, if all we want is a yes/no answer. */
966    
967 nigel 77 else if (quiet) return 0;
968 nigel 49
969 nigel 87 /* The --only-matching option prints just the substring that matched, and
970     does not pring any context. */
971    
972     else if (only_matching)
973     {
974     if (printname != NULL) fprintf(stdout, "%s:", printname);
975     if (number) fprintf(stdout, "%d:", linenumber);
976     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
977     fprintf(stdout, "\n");
978     }
979    
980     /* This is the default case when none of the above options is set. We print
981     the matching lines(s), possibly preceded and/or followed by other lines of
982     context. */
983    
984 nigel 49 else
985     {
986 nigel 77 /* See if there is a requirement to print some "after" lines from a
987     previous match. We never print any overlaps. */
988    
989     if (after_context > 0 && lastmatchnumber > 0)
990     {
991 nigel 93 int ellength;
992 nigel 77 int linecount = 0;
993     char *p = lastmatchrestart;
994    
995     while (p < ptr && linecount < after_context)
996     {
997 nigel 93 p = end_of_line(p, ptr, &ellength);
998 nigel 77 linecount++;
999     }
1000    
1001     /* It is important to advance lastmatchrestart during this printing so
1002 nigel 87 that it interacts correctly with any "before" printing below. Print
1003     each line's data using fwrite() in case there are binary zeroes. */
1004 nigel 77
1005     while (lastmatchrestart < p)
1006     {
1007     char *pp = lastmatchrestart;
1008     if (printname != NULL) fprintf(stdout, "%s-", printname);
1009     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1010 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1011     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1012     lastmatchrestart = pp;
1013 nigel 77 }
1014     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1015     }
1016    
1017     /* If there were non-contiguous lines printed above, insert hyphens. */
1018    
1019     if (hyphenpending)
1020     {
1021     fprintf(stdout, "--\n");
1022     hyphenpending = FALSE;
1023     hyphenprinted = TRUE;
1024     }
1025    
1026     /* See if there is a requirement to print some "before" lines for this
1027     match. Again, don't print overlaps. */
1028    
1029     if (before_context > 0)
1030     {
1031     int linecount = 0;
1032     char *p = ptr;
1033    
1034     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1035 nigel 87 linecount < before_context)
1036 nigel 77 {
1037 nigel 87 linecount++;
1038 nigel 93 p = previous_line(p, buffer);
1039 nigel 77 }
1040    
1041     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1042     fprintf(stdout, "--\n");
1043    
1044     while (p < ptr)
1045     {
1046 nigel 93 int ellength;
1047 nigel 77 char *pp = p;
1048     if (printname != NULL) fprintf(stdout, "%s-", printname);
1049     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1050 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1051     fwrite(p, 1, pp - p, stdout);
1052     p = pp;
1053 nigel 77 }
1054     }
1055    
1056     /* Now print the matching line(s); ensure we set hyphenpending at the end
1057 nigel 85 of the file if any context lines are being output. */
1058 nigel 77
1059 nigel 85 if (after_context > 0 || before_context > 0)
1060     endhyphenpending = TRUE;
1061    
1062 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1063 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1064 nigel 77
1065     /* In multiline mode, we want to print to the end of the line in which
1066     the end of the matched string is found, so we adjust linelength and the
1067     line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1068 nigel 91 start of the match will always be before the first newline sequence. */
1069 nigel 77
1070     if (multiline)
1071     {
1072 nigel 93 int ellength;
1073 nigel 77 char *endmatch = ptr + offsets[1];
1074     t = ptr;
1075 nigel 93 while (t < endmatch)
1076     {
1077     t = end_of_line(t, endptr, &ellength);
1078     if (t <= endmatch) linenumber++; else break;
1079     }
1080     endmatch = end_of_line(endmatch, endptr, &ellength);
1081     linelength = endmatch - ptr - ellength;
1082 nigel 77 }
1083    
1084 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1085     zeroes are treated as just another data character. */
1086    
1087     /* This extra option, for Jeffrey Friedl's debugging requirements,
1088     replaces the matched string, or a specific captured string if it exists,
1089     with X. When this happens, colouring is ignored. */
1090    
1091     #ifdef JFRIEDL_DEBUG
1092     if (S_arg >= 0 && S_arg < mrc)
1093     {
1094     int first = S_arg * 2;
1095     int last = first + 1;
1096     fwrite(ptr, 1, offsets[first], stdout);
1097     fprintf(stdout, "X");
1098     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1099     }
1100     else
1101     #endif
1102    
1103     /* We have to split the line(s) up if colouring. */
1104    
1105     if (do_colour)
1106     {
1107     fwrite(ptr, 1, offsets[0], stdout);
1108     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1109     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1110     fprintf(stdout, "%c[00m", 0x1b);
1111     fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1112     }
1113 nigel 93 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1114 nigel 49 }
1115    
1116 nigel 87 /* End of doing what has to be done for a match */
1117    
1118 nigel 77 rc = 0; /* Had some success */
1119    
1120     /* Remember where the last match happened for after_context. We remember
1121     where we are about to restart, and that line's number. */
1122    
1123 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1124 nigel 77 lastmatchnumber = linenumber + 1;
1125 nigel 49 }
1126 nigel 77
1127     /* Advance to after the newline and increment the line number. */
1128    
1129 nigel 93 ptr += linelength + endlinelength;
1130 nigel 77 linenumber++;
1131    
1132     /* If we haven't yet reached the end of the file (the buffer is full), and
1133     the current point is in the top 1/3 of the buffer, slide the buffer down by
1134     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1135     about to be lost, print them. */
1136    
1137     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1138     {
1139     if (after_context > 0 &&
1140     lastmatchnumber > 0 &&
1141     lastmatchrestart < buffer + MBUFTHIRD)
1142     {
1143     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1144     lastmatchnumber = 0;
1145     }
1146    
1147     /* Now do the shuffle */
1148    
1149     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1150     ptr -= MBUFTHIRD;
1151     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1152     endptr = buffer + bufflength;
1153    
1154     /* Adjust any last match point */
1155    
1156     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1157     }
1158     } /* Loop through the whole file */
1159    
1160     /* End of file; print final "after" lines if wanted; do_after_lines sets
1161     hyphenpending if it prints something. */
1162    
1163 nigel 87 if (!only_matching && !count_only)
1164     {
1165     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1166     hyphenpending |= endhyphenpending;
1167     }
1168 nigel 77
1169     /* Print the file name if we are looking for those without matches and there
1170     were none. If we found a match, we won't have got this far. */
1171    
1172 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1173 nigel 77 {
1174     fprintf(stdout, "%s\n", printname);
1175     return 0;
1176 nigel 49 }
1177    
1178 nigel 77 /* Print the match count if wanted */
1179    
1180 nigel 49 if (count_only)
1181     {
1182 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1183 nigel 49 fprintf(stdout, "%d\n", count);
1184     }
1185    
1186     return rc;
1187     }
1188    
1189    
1190    
1191     /*************************************************
1192 nigel 53 * Grep a file or recurse into a directory *
1193     *************************************************/
1194    
1195 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1196     recursing; if it's a file, grep it.
1197    
1198     Arguments:
1199     pathname the path to investigate
1200 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1201 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1202    
1203     Returns: 0 if there was at least one match
1204     1 if there were no matches
1205     2 there was some kind of error
1206    
1207     However, file opening failures are suppressed if "silent" is set.
1208     */
1209    
1210 nigel 53 static int
1211 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1212 nigel 53 {
1213     int rc = 1;
1214     int sep;
1215     FILE *in;
1216    
1217 nigel 77 /* If the file name is "-" we scan stdin */
1218 nigel 53
1219 nigel 77 if (strcmp(pathname, "-") == 0)
1220 nigel 53 {
1221 nigel 77 return pcregrep(stdin,
1222 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1223 nigel 77 stdin_name : NULL);
1224     }
1225    
1226    
1227 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1228     each file within it, subject to any include or exclude patterns that were set.
1229     The scanning code is localized so it can be made system-specific. */
1230    
1231     if ((sep = isdirectory(pathname)) != 0)
1232 nigel 77 {
1233 nigel 87 if (dee_action == dee_SKIP) return 1;
1234     if (dee_action == dee_RECURSE)
1235 nigel 53 {
1236 nigel 87 char buffer[1024];
1237     char *nextfile;
1238     directory_type *dir = opendirectory(pathname);
1239 nigel 53
1240 nigel 87 if (dir == NULL)
1241     {
1242     if (!silent)
1243     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1244     strerror(errno));
1245     return 2;
1246     }
1247 nigel 77
1248 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1249     {
1250     int frc, blen;
1251     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1252     blen = strlen(buffer);
1253 nigel 77
1254 nigel 87 if (exclude_compiled != NULL &&
1255     pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1256     continue;
1257 nigel 77
1258 nigel 87 if (include_compiled != NULL &&
1259     pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1260     continue;
1261    
1262     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1263     if (frc > 1) rc = frc;
1264     else if (frc == 0 && rc == 1) rc = 0;
1265     }
1266    
1267     closedirectory(dir);
1268     return rc;
1269 nigel 53 }
1270     }
1271    
1272 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1273     been requested. */
1274 nigel 53
1275 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1276    
1277     /* Control reaches here if we have a regular file, or if we have a directory
1278     and recursion or skipping was not requested, or if we have anything else and
1279     skipping was not requested. The scan proceeds. If this is the first and only
1280     argument at top level, we don't show the file name, unless we are only showing
1281     the file name, or the filename was forced (-H). */
1282    
1283 nigel 77 in = fopen(pathname, "r");
1284 nigel 53 if (in == NULL)
1285     {
1286 nigel 77 if (!silent)
1287     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1288     strerror(errno));
1289 nigel 53 return 2;
1290     }
1291    
1292 nigel 87 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1293     (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1294 nigel 77
1295 nigel 53 fclose(in);
1296     return rc;
1297     }
1298    
1299    
1300    
1301    
1302     /*************************************************
1303 nigel 49 * Usage function *
1304     *************************************************/
1305    
1306     static int
1307     usage(int rc)
1308     {
1309 nigel 87 option_item *op;
1310     fprintf(stderr, "Usage: pcregrep [-");
1311     for (op = optionlist; op->one_char != 0; op++)
1312     {
1313     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1314     }
1315     fprintf(stderr, "] [long options] [pattern] [files]\n");
1316 nigel 53 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1317 nigel 49 return rc;
1318     }
1319    
1320    
1321    
1322    
1323     /*************************************************
1324 nigel 53 * Help function *
1325     *************************************************/
1326    
1327     static void
1328     help(void)
1329     {
1330     option_item *op;
1331    
1332 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1333 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1334 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1335     printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1336 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1337    
1338     printf("Options:\n");
1339    
1340     for (op = optionlist; op->one_char != 0; op++)
1341     {
1342     int n;
1343     char s[4];
1344     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1345     printf(" %s --%s%n", s, op->long_name, &n);
1346     n = 30 - n;
1347     if (n < 1) n = 1;
1348     printf("%.*s%s\n", n, " ", op->help_text);
1349     }
1350    
1351 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1352     printf("trailing white space is removed and blank lines are ignored.\n");
1353     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1354 nigel 53
1355 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1356 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1357     }
1358    
1359    
1360    
1361    
1362     /*************************************************
1363 nigel 77 * Handle a single-letter, no data option *
1364 nigel 53 *************************************************/
1365    
1366     static int
1367     handle_option(int letter, int options)
1368     {
1369     switch(letter)
1370     {
1371 nigel 87 case N_HELP: help(); exit(0);
1372 nigel 53 case 'c': count_only = TRUE; break;
1373 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1374     case 'H': filenames = FN_FORCE; break;
1375     case 'h': filenames = FN_NONE; break;
1376 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1377 nigel 87 case 'l': filenames = FN_ONLY; break;
1378     case 'L': filenames = FN_NOMATCH_ONLY; break;
1379 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1380 nigel 53 case 'n': number = TRUE; break;
1381 nigel 87 case 'o': only_matching = TRUE; break;
1382 nigel 77 case 'q': quiet = TRUE; break;
1383 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1384 nigel 53 case 's': silent = TRUE; break;
1385 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1386 nigel 53 case 'v': invert = TRUE; break;
1387 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1388     case 'x': process_options |= PO_LINE_MATCH; break;
1389 nigel 53
1390     case 'V':
1391 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1392 nigel 53 exit(0);
1393     break;
1394    
1395     default:
1396     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1397     exit(usage(2));
1398     }
1399    
1400     return options;
1401     }
1402    
1403    
1404    
1405    
1406     /*************************************************
1407 nigel 87 * Construct printed ordinal *
1408     *************************************************/
1409    
1410     /* This turns a number into "1st", "3rd", etc. */
1411    
1412     static char *
1413     ordin(int n)
1414     {
1415     static char buffer[8];
1416     char *p = buffer;
1417     sprintf(p, "%d", n);
1418     while (*p != 0) p++;
1419     switch (n%10)
1420     {
1421     case 1: strcpy(p, "st"); break;
1422     case 2: strcpy(p, "nd"); break;
1423     case 3: strcpy(p, "rd"); break;
1424     default: strcpy(p, "th"); break;
1425     }
1426     return buffer;
1427     }
1428    
1429    
1430    
1431     /*************************************************
1432     * Compile a single pattern *
1433     *************************************************/
1434    
1435     /* When the -F option has been used, this is called for each substring.
1436     Otherwise it's called for each supplied pattern.
1437    
1438     Arguments:
1439     pattern the pattern string
1440     options the PCRE options
1441     filename the file name, or NULL for a command-line pattern
1442     count 0 if this is the only command line pattern, or
1443     number of the command line pattern, or
1444     linenumber for a pattern from a file
1445    
1446     Returns: TRUE on success, FALSE after an error
1447     */
1448    
1449     static BOOL
1450     compile_single_pattern(char *pattern, int options, char *filename, int count)
1451     {
1452     char buffer[MBUFTHIRD + 16];
1453     const char *error;
1454     int errptr;
1455    
1456     if (pattern_count >= MAX_PATTERN_COUNT)
1457     {
1458     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1459     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1460     return FALSE;
1461     }
1462    
1463     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1464     suffix[process_options]);
1465     pattern_list[pattern_count] =
1466     pcre_compile(buffer, options, &error, &errptr, pcretables);
1467 ph10 142 if (pattern_list[pattern_count] != NULL)
1468 ph10 141 {
1469 ph10 142 pattern_count++;
1470 ph10 141 return TRUE;
1471 ph10 142 }
1472 nigel 87
1473     /* Handle compile errors */
1474    
1475     errptr -= (int)strlen(prefix[process_options]);
1476     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1477    
1478     if (filename == NULL)
1479     {
1480     if (count == 0)
1481     fprintf(stderr, "pcregrep: Error in command-line regex "
1482     "at offset %d: %s\n", errptr, error);
1483     else
1484     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1485     "at offset %d: %s\n", ordin(count), errptr, error);
1486     }
1487     else
1488     {
1489     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1490     "at offset %d: %s\n", count, filename, errptr, error);
1491     }
1492    
1493     return FALSE;
1494     }
1495    
1496    
1497    
1498     /*************************************************
1499     * Compile one supplied pattern *
1500     *************************************************/
1501    
1502     /* When the -F option has been used, each string may be a list of strings,
1503 nigel 91 separated by line breaks. They will be matched literally.
1504 nigel 87
1505     Arguments:
1506     pattern the pattern string
1507     options the PCRE options
1508     filename the file name, or NULL for a command-line pattern
1509     count 0 if this is the only command line pattern, or
1510     number of the command line pattern, or
1511     linenumber for a pattern from a file
1512    
1513     Returns: TRUE on success, FALSE after an error
1514     */
1515    
1516     static BOOL
1517     compile_pattern(char *pattern, int options, char *filename, int count)
1518     {
1519     if ((process_options & PO_FIXED_STRINGS) != 0)
1520     {
1521 nigel 93 char *eop = pattern + strlen(pattern);
1522 nigel 87 char buffer[MBUFTHIRD];
1523     for(;;)
1524     {
1525 nigel 93 int ellength;
1526     char *p = end_of_line(pattern, eop, &ellength);
1527     if (ellength == 0)
1528 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1529 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1530 nigel 93 pattern = p;
1531 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1532     return FALSE;
1533     }
1534     }
1535     else return compile_single_pattern(pattern, options, filename, count);
1536     }
1537    
1538    
1539    
1540     /*************************************************
1541 nigel 49 * Main program *
1542     *************************************************/
1543    
1544 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1545    
1546 nigel 49 int
1547     main(int argc, char **argv)
1548     {
1549 nigel 53 int i, j;
1550 nigel 49 int rc = 1;
1551 nigel 87 int pcre_options = 0;
1552     int cmd_pattern_count = 0;
1553 ph10 141 int hint_count = 0;
1554 nigel 49 int errptr;
1555 nigel 87 BOOL only_one_at_top;
1556     char *patterns[MAX_PATTERN_COUNT];
1557     const char *locale_from = "--locale";
1558 nigel 49 const char *error;
1559    
1560 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1561     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1562     */
1563 nigel 91
1564     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1565     switch(i)
1566     {
1567     default: newline = (char *)"lf"; break;
1568     case '\r': newline = (char *)"cr"; break;
1569     case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1570 nigel 93 case -1: newline = (char *)"any"; break;
1571 ph10 150 case -2: newline = (char *)"anycrlf"; break;
1572 nigel 91 }
1573    
1574 nigel 49 /* Process the options */
1575    
1576     for (i = 1; i < argc; i++)
1577     {
1578 nigel 77 option_item *op = NULL;
1579     char *option_data = (char *)""; /* default to keep compiler happy */
1580     BOOL longop;
1581     BOOL longopwasequals = FALSE;
1582    
1583 nigel 49 if (argv[i][0] != '-') break;
1584 nigel 53
1585 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1586 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1587 nigel 63
1588 nigel 77 if (argv[i][1] == 0)
1589     {
1590 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1591 nigel 77 else exit(usage(2));
1592     }
1593 nigel 63
1594 nigel 77 /* Handle a long name option, or -- to terminate the options */
1595 nigel 53
1596     if (argv[i][1] == '-')
1597 nigel 49 {
1598 nigel 77 char *arg = argv[i] + 2;
1599     char *argequals = strchr(arg, '=');
1600 nigel 53
1601 nigel 77 if (*arg == 0) /* -- terminates options */
1602 nigel 49 {
1603 nigel 77 i++;
1604     break; /* out of the options-handling loop */
1605 nigel 53 }
1606 nigel 49
1607 nigel 77 longop = TRUE;
1608    
1609     /* Some long options have data that follows after =, for example file=name.
1610     Some options have variations in the long name spelling: specifically, we
1611     allow "regexp" because GNU grep allows it, though I personally go along
1612 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1613     These options are entered in the table as "regex(p)". No option is in both
1614     these categories, fortunately. */
1615 nigel 77
1616 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1617     {
1618 nigel 77 char *opbra = strchr(op->long_name, '(');
1619     char *equals = strchr(op->long_name, '=');
1620     if (opbra == NULL) /* Not a (p) case */
1621 nigel 53 {
1622 nigel 77 if (equals == NULL) /* Not thing=data case */
1623     {
1624     if (strcmp(arg, op->long_name) == 0) break;
1625     }
1626     else /* Special case xxx=data */
1627     {
1628     int oplen = equals - op->long_name;
1629 ph10 199 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1630 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1631     {
1632     option_data = arg + arglen;
1633     if (*option_data == '=')
1634     {
1635     option_data++;
1636     longopwasequals = TRUE;
1637     }
1638     break;
1639     }
1640     }
1641 nigel 53 }
1642 nigel 77 else /* Special case xxxx(p) */
1643     {
1644     char buff1[24];
1645     char buff2[24];
1646     int baselen = opbra - op->long_name;
1647     sprintf(buff1, "%.*s", baselen, op->long_name);
1648 ph10 152 sprintf(buff2, "%s%.*s", buff1,
1649 ph10 151 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1650 nigel 77 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1651     break;
1652     }
1653 nigel 53 }
1654 nigel 77
1655 nigel 53 if (op->one_char == 0)
1656     {
1657     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1658     exit(usage(2));
1659     }
1660     }
1661 nigel 49
1662 nigel 89
1663     /* Jeffrey Friedl's debugging harness uses these additional options which
1664     are not in the right form for putting in the option table because they use
1665     only one hyphen, yet are more than one character long. By putting them
1666     separately here, they will not get displayed as part of the help() output,
1667     but I don't think Jeffrey will care about that. */
1668    
1669     #ifdef JFRIEDL_DEBUG
1670     else if (strcmp(argv[i], "-pre") == 0) {
1671     jfriedl_prefix = argv[++i];
1672     continue;
1673     } else if (strcmp(argv[i], "-post") == 0) {
1674     jfriedl_postfix = argv[++i];
1675     continue;
1676     } else if (strcmp(argv[i], "-XT") == 0) {
1677     sscanf(argv[++i], "%d", &jfriedl_XT);
1678     continue;
1679     } else if (strcmp(argv[i], "-XR") == 0) {
1680     sscanf(argv[++i], "%d", &jfriedl_XR);
1681     continue;
1682     }
1683     #endif
1684    
1685    
1686 nigel 77 /* One-char options; many that have no data may be in a single argument; we
1687     continue till we hit the last one or one that needs data. */
1688 nigel 53
1689     else
1690     {
1691     char *s = argv[i] + 1;
1692 nigel 77 longop = FALSE;
1693 nigel 53 while (*s != 0)
1694     {
1695 nigel 77 for (op = optionlist; op->one_char != 0; op++)
1696     { if (*s == op->one_char) break; }
1697     if (op->one_char == 0)
1698 nigel 53 {
1699 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1700     *s, argv[i]);
1701     exit(usage(2));
1702     }
1703     if (op->type != OP_NODATA || s[1] == 0)
1704     {
1705     option_data = s+1;
1706 nigel 53 break;
1707     }
1708 nigel 87 pcre_options = handle_option(*s++, pcre_options);
1709 nigel 49 }
1710     }
1711 nigel 77
1712 nigel 87 /* At this point we should have op pointing to a matched option. If the type
1713     is NO_DATA, it means that there is no data, and the option might set
1714     something in the PCRE options. */
1715 nigel 77
1716     if (op->type == OP_NODATA)
1717     {
1718 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
1719     continue;
1720     }
1721    
1722     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1723     either has a value or defaults to something. It cannot have data in a
1724     separate item. At the moment, the only such options are "colo(u)r" and
1725 nigel 89 Jeffrey Friedl's special -S debugging option. */
1726 nigel 87
1727     if (*option_data == 0 &&
1728     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1729     {
1730     switch (op->one_char)
1731 nigel 77 {
1732 nigel 87 case N_COLOUR:
1733     colour_option = (char *)"auto";
1734     break;
1735     #ifdef JFRIEDL_DEBUG
1736     case 'S':
1737     S_arg = 0;
1738     break;
1739     #endif
1740 nigel 77 }
1741 nigel 87 continue;
1742     }
1743 nigel 77
1744 nigel 87 /* Otherwise, find the data string for the option. */
1745    
1746     if (*option_data == 0)
1747     {
1748     if (i >= argc - 1 || longopwasequals)
1749 nigel 77 {
1750 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1751     exit(usage(2));
1752     }
1753     option_data = argv[++i];
1754     }
1755    
1756     /* If the option type is OP_PATLIST, it's the -e option, which can be called
1757     multiple times to create a list of patterns. */
1758    
1759     if (op->type == OP_PATLIST)
1760     {
1761     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1762     {
1763     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1764     MAX_PATTERN_COUNT);
1765     return 2;
1766     }
1767     patterns[cmd_pattern_count++] = option_data;
1768     }
1769    
1770     /* Otherwise, deal with single string or numeric data values. */
1771    
1772     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1773     {
1774     *((char **)op->dataptr) = option_data;
1775     }
1776     else
1777     {
1778     char *endptr;
1779     int n = strtoul(option_data, &endptr, 10);
1780     if (*endptr != 0)
1781     {
1782     if (longop)
1783 nigel 77 {
1784 nigel 87 char *equals = strchr(op->long_name, '=');
1785     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1786     equals - op->long_name;
1787     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1788     option_data, nlen, op->long_name);
1789 nigel 77 }
1790 nigel 87 else
1791     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1792     option_data, op->one_char);
1793     exit(usage(2));
1794 nigel 77 }
1795 nigel 87 *((int *)op->dataptr) = n;
1796 nigel 77 }
1797 nigel 49 }
1798    
1799 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
1800     for -A and -B. */
1801    
1802     if (both_context > 0)
1803     {
1804     if (after_context == 0) after_context = both_context;
1805     if (before_context == 0) before_context = both_context;
1806     }
1807    
1808 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1809     LC_ALL environment variable is set, and if so, use it. */
1810 nigel 49
1811 nigel 87 if (locale == NULL)
1812 nigel 53 {
1813 nigel 87 locale = getenv("LC_ALL");
1814     locale_from = "LCC_ALL";
1815 nigel 53 }
1816 nigel 49
1817 nigel 87 if (locale == NULL)
1818     {
1819     locale = getenv("LC_CTYPE");
1820     locale_from = "LC_CTYPE";
1821     }
1822 nigel 49
1823 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
1824     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1825    
1826     if (locale != NULL)
1827 nigel 49 {
1828 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
1829 nigel 53 {
1830 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1831     locale, locale_from);
1832 nigel 53 return 2;
1833     }
1834 nigel 87 pcretables = pcre_maketables();
1835     }
1836 nigel 77
1837 nigel 87 /* Sort out colouring */
1838    
1839     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1840     {
1841     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1842     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1843     else
1844 nigel 53 {
1845 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1846     colour_option);
1847     return 2;
1848 nigel 77 }
1849 nigel 87 if (do_colour)
1850 nigel 77 {
1851 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
1852     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1853     if (cs != NULL) colour_string = cs;
1854 nigel 77 }
1855 nigel 87 }
1856 nigel 77
1857 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
1858    
1859     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1860     {
1861     pcre_options |= PCRE_NEWLINE_CR;
1862 nigel 93 endlinetype = EL_CR;
1863 nigel 91 }
1864     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1865     {
1866     pcre_options |= PCRE_NEWLINE_LF;
1867 nigel 93 endlinetype = EL_LF;
1868 nigel 91 }
1869     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1870     {
1871     pcre_options |= PCRE_NEWLINE_CRLF;
1872 nigel 93 endlinetype = EL_CRLF;
1873 nigel 91 }
1874 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1875     {
1876     pcre_options |= PCRE_NEWLINE_ANY;
1877     endlinetype = EL_ANY;
1878     }
1879 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1880     {
1881     pcre_options |= PCRE_NEWLINE_ANYCRLF;
1882     endlinetype = EL_ANYCRLF;
1883     }
1884 nigel 91 else
1885     {
1886     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1887     return 2;
1888     }
1889    
1890 nigel 87 /* Interpret the text values for -d and -D */
1891    
1892     if (dee_option != NULL)
1893     {
1894     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1895     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1896     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1897     else
1898 nigel 77 {
1899 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1900     return 2;
1901 nigel 53 }
1902 nigel 49 }
1903    
1904 nigel 87 if (DEE_option != NULL)
1905     {
1906     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1907     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1908     else
1909     {
1910     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1911     return 2;
1912     }
1913     }
1914 nigel 49
1915 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
1916 nigel 87
1917     #ifdef JFRIEDL_DEBUG
1918     if (S_arg > 9)
1919 nigel 49 {
1920 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
1921     return 2;
1922     }
1923 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1924     {
1925     if (jfriedl_XT == 0) jfriedl_XT = 1;
1926     if (jfriedl_XR == 0) jfriedl_XR = 1;
1927     }
1928 nigel 87 #endif
1929 nigel 77
1930 nigel 87 /* Get memory to store the pattern and hints lists. */
1931    
1932     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1933     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1934    
1935     if (pattern_list == NULL || hints_list == NULL)
1936     {
1937     fprintf(stderr, "pcregrep: malloc failed\n");
1938 ph10 123 goto EXIT2;
1939 nigel 87 }
1940    
1941     /* If no patterns were provided by -e, and there is no file provided by -f,
1942     the first argument is the one and only pattern, and it must exist. */
1943    
1944     if (cmd_pattern_count == 0 && pattern_filename == NULL)
1945     {
1946 nigel 63 if (i >= argc) return usage(2);
1947 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
1948     }
1949 nigel 77
1950 nigel 87 /* Compile the patterns that were provided on the command line, either by
1951     multiple uses of -e or as a single unkeyed pattern. */
1952    
1953     for (j = 0; j < cmd_pattern_count; j++)
1954     {
1955     if (!compile_pattern(patterns[j], pcre_options, NULL,
1956     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1957 ph10 123 goto EXIT2;
1958 nigel 87 }
1959    
1960     /* Compile the regular expressions that are provided in a file. */
1961    
1962     if (pattern_filename != NULL)
1963     {
1964     int linenumber = 0;
1965     FILE *f;
1966     char *filename;
1967     char buffer[MBUFTHIRD];
1968    
1969     if (strcmp(pattern_filename, "-") == 0)
1970 nigel 77 {
1971 nigel 87 f = stdin;
1972     filename = stdin_name;
1973 nigel 77 }
1974 nigel 87 else
1975 nigel 77 {
1976 nigel 87 f = fopen(pattern_filename, "r");
1977     if (f == NULL)
1978     {
1979     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1980     strerror(errno));
1981 ph10 123 goto EXIT2;
1982 nigel 87 }
1983     filename = pattern_filename;
1984 nigel 77 }
1985    
1986 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
1987 nigel 53 {
1988 nigel 87 char *s = buffer + (int)strlen(buffer);
1989     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1990     *s = 0;
1991     linenumber++;
1992     if (buffer[0] == 0) continue; /* Skip blank lines */
1993     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1994 ph10 121 goto EXIT2;
1995 nigel 53 }
1996 nigel 87
1997     if (f != stdin) fclose(f);
1998 nigel 49 }
1999    
2000 nigel 77 /* Study the regular expressions, as we will be running them many times */
2001 nigel 53
2002     for (j = 0; j < pattern_count; j++)
2003     {
2004     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2005     if (error != NULL)
2006     {
2007     char s[16];
2008     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2009     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2010 ph10 121 goto EXIT2;
2011 nigel 53 }
2012 ph10 142 hint_count++;
2013 nigel 53 }
2014    
2015 nigel 77 /* If there are include or exclude patterns, compile them. */
2016    
2017     if (exclude_pattern != NULL)
2018     {
2019 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2020     pcretables);
2021 nigel 77 if (exclude_compiled == NULL)
2022     {
2023     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2024     errptr, error);
2025 ph10 121 goto EXIT2;
2026 nigel 77 }
2027     }
2028    
2029     if (include_pattern != NULL)
2030     {
2031 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2032     pcretables);
2033 nigel 77 if (include_compiled == NULL)
2034     {
2035     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2036     errptr, error);
2037 ph10 121 goto EXIT2;
2038 nigel 77 }
2039     }
2040    
2041 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2042 nigel 49
2043 nigel 87 if (i >= argc)
2044 ph10 121 {
2045     rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2046     goto EXIT;
2047 ph10 123 }
2048 nigel 49
2049 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2050     Pass in the fact that there is only one argument at top level - this suppresses
2051 nigel 87 the file name if the argument is not a directory and filenames are not
2052     otherwise forced. */
2053 nigel 49
2054 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2055 nigel 49
2056     for (; i < argc; i++)
2057     {
2058 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2059     only_one_at_top);
2060 nigel 77 if (frc > 1) rc = frc;
2061     else if (frc == 0 && rc == 1) rc = 0;
2062 nigel 49 }
2063    
2064 ph10 121 EXIT:
2065     if (pattern_list != NULL)
2066     {
2067 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2068 ph10 121 free(pattern_list);
2069 ph10 123 }
2070 ph10 121 if (hints_list != NULL)
2071     {
2072 ph10 141 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2073 ph10 121 free(hints_list);
2074 ph10 123 }
2075 nigel 49 return rc;
2076 ph10 121
2077     EXIT2:
2078     rc = 2;
2079     goto EXIT;
2080 nigel 49 }
2081    
2082 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12