/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 149 - (hide annotations) (download)
Mon Apr 16 15:28:08 2007 UTC (7 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 58814 byte(s)
Add PCRE_NEWLINE_ANYCRLF.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 117 Copyright (c) 1997-2007 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41     # include <config.h>
42     #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 137 #ifdef HAVE_UNISTD_H
54     # include <unistd.h>
55     #endif
56 nigel 77
57 ph10 137 #include <pcre.h>
58 nigel 49
59     #define FALSE 0
60     #define TRUE 1
61    
62     typedef int BOOL;
63    
64 nigel 53 #define MAX_PATTERN_COUNT 100
65 nigel 49
66 nigel 77 #if BUFSIZ > 8192
67     #define MBUFTHIRD BUFSIZ
68     #else
69     #define MBUFTHIRD 8192
70     #endif
71 nigel 49
72 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
73     output. The order is important; it is assumed that a file name is wanted for
74     all values greater than FN_DEFAULT. */
75 nigel 77
76 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
77    
78     /* Actions for the -d and -D options */
79    
80     enum { dee_READ, dee_SKIP, dee_RECURSE };
81     enum { DEE_READ, DEE_SKIP };
82    
83     /* Actions for special processing options (flag bits) */
84    
85     #define PO_WORD_MATCH 0x0001
86     #define PO_LINE_MATCH 0x0002
87     #define PO_FIXED_STRINGS 0x0004
88    
89 nigel 93 /* Line ending types */
90 nigel 87
91 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
92 nigel 87
93 nigel 93
94    
95 nigel 49 /*************************************************
96     * Global variables *
97     *************************************************/
98    
99 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
100     regular code. */
101    
102     #ifdef JFRIEDL_DEBUG
103     static int S_arg = -1;
104 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
105     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
106     static const char *jfriedl_prefix = "";
107     static const char *jfriedl_postfix = "";
108 nigel 87 #endif
109    
110 nigel 93 static int endlinetype;
111 nigel 91
112 nigel 87 static char *colour_string = (char *)"1;31";
113     static char *colour_option = NULL;
114     static char *dee_option = NULL;
115     static char *DEE_option = NULL;
116 nigel 91 static char *newline = NULL;
117 nigel 53 static char *pattern_filename = NULL;
118 nigel 77 static char *stdin_name = (char *)"(standard input)";
119 nigel 87 static char *locale = NULL;
120    
121     static const unsigned char *pcretables = NULL;
122    
123 nigel 53 static int pattern_count = 0;
124 ph10 121 static pcre **pattern_list = NULL;
125     static pcre_extra **hints_list = NULL;
126 nigel 49
127 nigel 77 static char *include_pattern = NULL;
128     static char *exclude_pattern = NULL;
129    
130     static pcre *include_compiled = NULL;
131     static pcre *exclude_compiled = NULL;
132    
133     static int after_context = 0;
134     static int before_context = 0;
135     static int both_context = 0;
136 nigel 87 static int dee_action = dee_READ;
137     static int DEE_action = DEE_READ;
138     static int error_count = 0;
139     static int filenames = FN_DEFAULT;
140     static int process_options = 0;
141 nigel 77
142 nigel 49 static BOOL count_only = FALSE;
143 nigel 87 static BOOL do_colour = FALSE;
144 nigel 77 static BOOL hyphenpending = FALSE;
145 nigel 49 static BOOL invert = FALSE;
146 nigel 77 static BOOL multiline = FALSE;
147 nigel 49 static BOOL number = FALSE;
148 nigel 87 static BOOL only_matching = FALSE;
149 nigel 77 static BOOL quiet = FALSE;
150 nigel 49 static BOOL silent = FALSE;
151 nigel 93 static BOOL utf8 = FALSE;
152 nigel 49
153 nigel 53 /* Structure for options and list of them */
154 nigel 49
155 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
156     OP_PATLIST };
157 nigel 77
158 nigel 53 typedef struct option_item {
159 nigel 77 int type;
160 nigel 53 int one_char;
161 nigel 77 void *dataptr;
162 nigel 67 const char *long_name;
163     const char *help_text;
164 nigel 53 } option_item;
165 nigel 49
166 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
167     used to identify them. */
168    
169     #define N_COLOUR (-1)
170     #define N_EXCLUDE (-2)
171     #define N_HELP (-3)
172     #define N_INCLUDE (-4)
173     #define N_LABEL (-5)
174     #define N_LOCALE (-6)
175     #define N_NULL (-7)
176    
177 nigel 53 static option_item optionlist[] = {
178 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
179     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
180     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
181     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
182     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
183     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
184     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
185     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
186     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
187     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
188     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
189     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
190     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
191     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
192     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
193     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
194     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
195     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
196     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
197     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
198     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
199 ph10 149 { OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
200 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
201     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
202     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
203     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
204     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
205     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
206     #ifdef JFRIEDL_DEBUG
207     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
208     #endif
209     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
210     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
211     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
212     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
213     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
214     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
215     { OP_NODATA, 0, NULL, NULL, NULL }
216 nigel 53 };
217    
218 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
219     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
220     that the combination of -w and -x has the same effect as -x on its own, so we
221     can treat them as the same. */
222 nigel 53
223 nigel 87 static const char *prefix[] = {
224     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
225    
226     static const char *suffix[] = {
227     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
228    
229 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
230 nigel 87
231 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
232 nigel 87
233 nigel 93 const char utf8_table4[] = {
234     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
235     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
237     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
238    
239    
240    
241 nigel 53 /*************************************************
242 nigel 87 * OS-specific functions *
243 nigel 53 *************************************************/
244    
245     /* These functions are defined so that they can be made system specific,
246 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
247 nigel 53
248    
249     /************* Directory scanning in Unix ***********/
250    
251 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
252 nigel 53 #include <sys/types.h>
253     #include <sys/stat.h>
254     #include <dirent.h>
255    
256     typedef DIR directory_type;
257    
258 nigel 67 static int
259 nigel 53 isdirectory(char *filename)
260     {
261     struct stat statbuf;
262     if (stat(filename, &statbuf) < 0)
263     return 0; /* In the expectation that opening as a file will fail */
264     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
265     }
266    
267 nigel 67 static directory_type *
268 nigel 53 opendirectory(char *filename)
269     {
270     return opendir(filename);
271     }
272    
273 nigel 67 static char *
274 nigel 53 readdirectory(directory_type *dir)
275     {
276     for (;;)
277     {
278     struct dirent *dent = readdir(dir);
279     if (dent == NULL) return NULL;
280     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
281     return dent->d_name;
282     }
283     return NULL; /* Keep compiler happy; never executed */
284     }
285    
286 nigel 67 static void
287 nigel 53 closedirectory(directory_type *dir)
288     {
289     closedir(dir);
290     }
291    
292    
293 nigel 87 /************* Test for regular file in Unix **********/
294    
295     static int
296     isregfile(char *filename)
297     {
298     struct stat statbuf;
299     if (stat(filename, &statbuf) < 0)
300     return 1; /* In the expectation that opening as a file will fail */
301     return (statbuf.st_mode & S_IFMT) == S_IFREG;
302     }
303    
304    
305     /************* Test stdout for being a terminal in Unix **********/
306    
307     static BOOL
308     is_stdout_tty(void)
309     {
310     return isatty(fileno(stdout));
311     }
312    
313    
314 nigel 63 /************* Directory scanning in Win32 ***********/
315 nigel 53
316 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
317 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
318     when it did not exist. */
319 nigel 53
320 nigel 63
321 ph10 97 #elif HAVE_WINDOWS_H
322 nigel 63
323     #ifndef STRICT
324     # define STRICT
325     #endif
326     #ifndef WIN32_LEAN_AND_MEAN
327     # define WIN32_LEAN_AND_MEAN
328     #endif
329 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
330     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
331     #endif
332    
333 nigel 63 #include <windows.h>
334    
335     typedef struct directory_type
336     {
337     HANDLE handle;
338     BOOL first;
339     WIN32_FIND_DATA data;
340     } directory_type;
341    
342     int
343     isdirectory(char *filename)
344     {
345     DWORD attr = GetFileAttributes(filename);
346     if (attr == INVALID_FILE_ATTRIBUTES)
347     return 0;
348     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
349     }
350    
351     directory_type *
352     opendirectory(char *filename)
353     {
354     size_t len;
355     char *pattern;
356     directory_type *dir;
357     DWORD err;
358     len = strlen(filename);
359     pattern = (char *) malloc(len + 3);
360     dir = (directory_type *) malloc(sizeof(*dir));
361     if ((pattern == NULL) || (dir == NULL))
362     {
363     fprintf(stderr, "pcregrep: malloc failed\n");
364     exit(2);
365     }
366     memcpy(pattern, filename, len);
367     memcpy(&(pattern[len]), "\\*", 3);
368     dir->handle = FindFirstFile(pattern, &(dir->data));
369     if (dir->handle != INVALID_HANDLE_VALUE)
370     {
371     free(pattern);
372     dir->first = TRUE;
373     return dir;
374     }
375     err = GetLastError();
376     free(pattern);
377     free(dir);
378     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
379     return NULL;
380     }
381    
382     char *
383     readdirectory(directory_type *dir)
384     {
385     for (;;)
386     {
387     if (!dir->first)
388     {
389     if (!FindNextFile(dir->handle, &(dir->data)))
390     return NULL;
391     }
392     else
393     {
394     dir->first = FALSE;
395     }
396     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
397     return dir->data.cFileName;
398     }
399     #ifndef _MSC_VER
400     return NULL; /* Keep compiler happy; never executed */
401     #endif
402     }
403    
404     void
405     closedirectory(directory_type *dir)
406     {
407     FindClose(dir->handle);
408     free(dir);
409     }
410    
411    
412 nigel 87 /************* Test for regular file in Win32 **********/
413    
414     /* I don't know how to do this, or if it can be done; assume all paths are
415     regular if they are not directories. */
416    
417     int isregfile(char *filename)
418     {
419     return !isdirectory(filename)
420     }
421    
422    
423     /************* Test stdout for being a terminal in Win32 **********/
424    
425     /* I don't know how to do this; assume never */
426    
427     static BOOL
428     is_stdout_tty(void)
429     {
430     FALSE;
431     }
432    
433    
434 nigel 53 /************* Directory scanning when we can't do it ***********/
435    
436     /* The type is void, and apart from isdirectory(), the functions do nothing. */
437    
438 nigel 63 #else
439    
440 nigel 53 typedef void directory_type;
441    
442 nigel 87 int isdirectory(char *filename) { return 0; }
443 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
444     char *readdirectory(directory_type *dir) { return (char*)0;}
445 nigel 53 void closedirectory(directory_type *dir) {}
446    
447 nigel 87
448     /************* Test for regular when we can't do it **********/
449    
450     /* Assume all files are regular. */
451    
452     int isregfile(char *filename) { return 1; }
453    
454    
455     /************* Test stdout for being a terminal when we can't do it **********/
456    
457     static BOOL
458     is_stdout_tty(void)
459     {
460     return FALSE;
461     }
462    
463    
464 nigel 53 #endif
465    
466    
467    
468 ph10 137 #ifndef HAVE_STRERROR
469 nigel 49 /*************************************************
470     * Provide strerror() for non-ANSI libraries *
471     *************************************************/
472    
473     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
474     in their libraries, but can provide the same facility by this simple
475     alternative function. */
476    
477     extern int sys_nerr;
478     extern char *sys_errlist[];
479    
480     char *
481     strerror(int n)
482     {
483     if (n < 0 || n >= sys_nerr) return "unknown error number";
484     return sys_errlist[n];
485     }
486     #endif /* HAVE_STRERROR */
487    
488    
489    
490     /*************************************************
491 nigel 93 * Find end of line *
492     *************************************************/
493    
494     /* The length of the endline sequence that is found is set via lenptr. This may
495     be zero at the very end of the file if there is no line-ending sequence there.
496    
497     Arguments:
498     p current position in line
499     endptr end of available data
500     lenptr where to put the length of the eol sequence
501    
502     Returns: pointer to the last byte of the line
503     */
504    
505     static char *
506     end_of_line(char *p, char *endptr, int *lenptr)
507     {
508     switch(endlinetype)
509     {
510     default: /* Just in case */
511     case EL_LF:
512     while (p < endptr && *p != '\n') p++;
513     if (p < endptr)
514     {
515     *lenptr = 1;
516     return p + 1;
517     }
518     *lenptr = 0;
519     return endptr;
520    
521     case EL_CR:
522     while (p < endptr && *p != '\r') p++;
523     if (p < endptr)
524     {
525     *lenptr = 1;
526     return p + 1;
527     }
528     *lenptr = 0;
529     return endptr;
530    
531     case EL_CRLF:
532     for (;;)
533     {
534     while (p < endptr && *p != '\r') p++;
535     if (++p >= endptr)
536     {
537     *lenptr = 0;
538     return endptr;
539     }
540     if (*p == '\n')
541     {
542     *lenptr = 2;
543     return p + 1;
544     }
545     }
546     break;
547    
548 ph10 149 case EL_ANYCRLF:
549     while (p < endptr)
550     {
551     int extra = 0;
552     register int c = *((unsigned char *)p);
553    
554     if (utf8 && c >= 0xc0)
555     {
556     int gcii, gcss;
557     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
558     gcss = 6*extra;
559     c = (c & utf8_table3[extra]) << gcss;
560     for (gcii = 1; gcii <= extra; gcii++)
561     {
562     gcss -= 6;
563     c |= (p[gcii] & 0x3f) << gcss;
564     }
565     }
566    
567     p += 1 + extra;
568    
569     switch (c)
570     {
571     case 0x0a: /* LF */
572     *lenptr = 1;
573     return p;
574    
575     case 0x0d: /* CR */
576     if (p < endptr && *p == 0x0a)
577     {
578     *lenptr = 2;
579     p++;
580     }
581     else *lenptr = 1;
582     return p;
583    
584     default:
585     break;
586     }
587     } /* End of loop for ANYCRLF case */
588    
589     *lenptr = 0; /* Must have hit the end */
590     return endptr;
591    
592 nigel 93 case EL_ANY:
593     while (p < endptr)
594     {
595     int extra = 0;
596     register int c = *((unsigned char *)p);
597    
598     if (utf8 && c >= 0xc0)
599     {
600     int gcii, gcss;
601     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
602     gcss = 6*extra;
603     c = (c & utf8_table3[extra]) << gcss;
604     for (gcii = 1; gcii <= extra; gcii++)
605     {
606     gcss -= 6;
607     c |= (p[gcii] & 0x3f) << gcss;
608     }
609     }
610    
611     p += 1 + extra;
612    
613     switch (c)
614     {
615     case 0x0a: /* LF */
616     case 0x0b: /* VT */
617     case 0x0c: /* FF */
618     *lenptr = 1;
619     return p;
620    
621     case 0x0d: /* CR */
622     if (p < endptr && *p == 0x0a)
623     {
624     *lenptr = 2;
625     p++;
626     }
627     else *lenptr = 1;
628     return p;
629    
630     case 0x85: /* NEL */
631     *lenptr = utf8? 2 : 1;
632     return p;
633    
634     case 0x2028: /* LS */
635     case 0x2029: /* PS */
636     *lenptr = 3;
637     return p;
638    
639     default:
640     break;
641     }
642     } /* End of loop for ANY case */
643    
644     *lenptr = 0; /* Must have hit the end */
645     return endptr;
646     } /* End of overall switch */
647     }
648    
649    
650    
651     /*************************************************
652     * Find start of previous line *
653     *************************************************/
654    
655     /* This is called when looking back for before lines to print.
656    
657     Arguments:
658     p start of the subsequent line
659     startptr start of available data
660    
661     Returns: pointer to the start of the previous line
662     */
663    
664     static char *
665     previous_line(char *p, char *startptr)
666     {
667     switch(endlinetype)
668     {
669     default: /* Just in case */
670     case EL_LF:
671     p--;
672     while (p > startptr && p[-1] != '\n') p--;
673     return p;
674    
675     case EL_CR:
676     p--;
677     while (p > startptr && p[-1] != '\n') p--;
678     return p;
679    
680     case EL_CRLF:
681     for (;;)
682     {
683     p -= 2;
684     while (p > startptr && p[-1] != '\n') p--;
685     if (p <= startptr + 1 || p[-2] == '\r') return p;
686     }
687     return p; /* But control should never get here */
688    
689     case EL_ANY:
690 ph10 149 case EL_ANYCRLF:
691 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
692     if (utf8) while ((*p & 0xc0) == 0x80) p--;
693    
694     while (p > startptr)
695     {
696     register int c;
697     char *pp = p - 1;
698    
699     if (utf8)
700     {
701     int extra = 0;
702     while ((*pp & 0xc0) == 0x80) pp--;
703     c = *((unsigned char *)pp);
704     if (c >= 0xc0)
705     {
706     int gcii, gcss;
707     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
708     gcss = 6*extra;
709     c = (c & utf8_table3[extra]) << gcss;
710     for (gcii = 1; gcii <= extra; gcii++)
711     {
712     gcss -= 6;
713     c |= (pp[gcii] & 0x3f) << gcss;
714     }
715     }
716     }
717     else c = *((unsigned char *)pp);
718    
719 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
720 nigel 93 {
721     case 0x0a: /* LF */
722 ph10 149 case 0x0d: /* CR */
723     return p;
724    
725     default:
726     break;
727     }
728    
729     else switch (c)
730     {
731     case 0x0a: /* LF */
732 nigel 93 case 0x0b: /* VT */
733     case 0x0c: /* FF */
734     case 0x0d: /* CR */
735     case 0x85: /* NEL */
736     case 0x2028: /* LS */
737     case 0x2029: /* PS */
738     return p;
739    
740     default:
741     break;
742     }
743    
744     p = pp; /* Back one character */
745     } /* End of loop for ANY case */
746    
747     return startptr; /* Hit start of data */
748     } /* End of overall switch */
749     }
750    
751    
752    
753    
754    
755     /*************************************************
756 nigel 77 * Print the previous "after" lines *
757 nigel 49 *************************************************/
758    
759 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
760 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
761     that a binary zero does not terminate it.
762 nigel 77
763     Arguments:
764     lastmatchnumber the number of the last matching line, plus one
765     lastmatchrestart where we restarted after the last match
766     endptr end of available data
767     printname filename for printing
768    
769     Returns: nothing
770     */
771    
772     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
773     char *endptr, char *printname)
774     {
775     if (after_context > 0 && lastmatchnumber > 0)
776     {
777     int count = 0;
778     while (lastmatchrestart < endptr && count++ < after_context)
779     {
780 nigel 93 int ellength;
781 nigel 77 char *pp = lastmatchrestart;
782     if (printname != NULL) fprintf(stdout, "%s-", printname);
783     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
784 nigel 93 pp = end_of_line(pp, endptr, &ellength);
785     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
786     lastmatchrestart = pp;
787 nigel 77 }
788     hyphenpending = TRUE;
789     }
790     }
791    
792    
793    
794     /*************************************************
795     * Grep an individual file *
796     *************************************************/
797    
798     /* This is called from grep_or_recurse() below. It uses a buffer that is three
799     times the value of MBUFTHIRD. The matching point is never allowed to stray into
800     the top third of the buffer, thus keeping more of the file available for
801     context printing or for multiline scanning. For large files, the pointer will
802     be in the middle third most of the time, so the bottom third is available for
803     "before" context printing.
804    
805     Arguments:
806     in the fopened FILE stream
807     printname the file name if it is to be printed for each match
808     or NULL if the file name is not to be printed
809     it cannot be NULL if filenames[_nomatch]_only is set
810    
811     Returns: 0 if there was at least one match
812     1 otherwise (no matches)
813     */
814    
815 nigel 49 static int
816 nigel 77 pcregrep(FILE *in, char *printname)
817 nigel 49 {
818     int rc = 1;
819 nigel 77 int linenumber = 1;
820     int lastmatchnumber = 0;
821 nigel 49 int count = 0;
822     int offsets[99];
823 nigel 77 char *lastmatchrestart = NULL;
824     char buffer[3*MBUFTHIRD];
825     char *ptr = buffer;
826     char *endptr;
827     size_t bufflength;
828     BOOL endhyphenpending = FALSE;
829 nigel 49
830 nigel 77 /* Do the first read into the start of the buffer and set up the pointer to
831     end of what we have. */
832    
833     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
834     endptr = buffer + bufflength;
835    
836     /* Loop while the current pointer is not at the end of the file. For large
837     files, endptr will be at the end of the buffer when we are in the middle of the
838     file, but ptr will never get there, because as soon as it gets over 2/3 of the
839     way, the buffer is shifted left and re-filled. */
840    
841     while (ptr < endptr)
842 nigel 49 {
843 nigel 93 int i, endlinelength;
844 nigel 87 int mrc = 0;
845 nigel 53 BOOL match = FALSE;
846 nigel 77 char *t = ptr;
847     size_t length, linelength;
848 nigel 49
849 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
850     of the subject string to pass to pcre_exec(). In multiline mode, it is the
851     length remainder of the data in the buffer. Otherwise, it is the length of
852     the next line. After matching, we always advance by the length of the next
853     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
854     that any match is constrained to be in the first line. */
855    
856 nigel 93 t = end_of_line(t, endptr, &endlinelength);
857     linelength = t - ptr - endlinelength;
858 nigel 77 length = multiline? endptr - ptr : linelength;
859    
860 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
861    
862     #ifdef JFRIEDL_DEBUG
863     if (jfriedl_XT || jfriedl_XR)
864     {
865     #include <sys/time.h>
866     #include <time.h>
867     struct timeval start_time, end_time;
868     struct timezone dummy;
869    
870     if (jfriedl_XT)
871     {
872     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
873     const char *orig = ptr;
874     ptr = malloc(newlen + 1);
875     if (!ptr) {
876     printf("out of memory");
877     exit(2);
878     }
879     endptr = ptr;
880     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
881     for (i = 0; i < jfriedl_XT; i++) {
882     strncpy(endptr, orig, length);
883     endptr += length;
884     }
885     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
886     length = newlen;
887     }
888    
889     if (gettimeofday(&start_time, &dummy) != 0)
890     perror("bad gettimeofday");
891    
892    
893     for (i = 0; i < jfriedl_XR; i++)
894     match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
895    
896     if (gettimeofday(&end_time, &dummy) != 0)
897     perror("bad gettimeofday");
898    
899     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
900     -
901     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
902    
903     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
904     return 0;
905     }
906     #endif
907    
908    
909 nigel 77 /* Run through all the patterns until one matches. Note that we don't include
910     the final newline in the subject string. */
911    
912 nigel 87 for (i = 0; i < pattern_count; i++)
913 nigel 53 {
914 nigel 87 mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
915     offsets, 99);
916     if (mrc >= 0) { match = TRUE; break; }
917     if (mrc != PCRE_ERROR_NOMATCH)
918     {
919     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
920     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
921     fprintf(stderr, "this line:\n");
922     fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
923     fprintf(stderr, "\n");
924     if (error_count == 0 &&
925     (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
926     {
927     fprintf(stderr, "pcregrep: error %d means that a resource limit "
928     "was exceeded\n", mrc);
929     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
930     }
931     if (error_count++ > 20)
932     {
933     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
934     exit(2);
935     }
936     match = invert; /* No more matching; don't show the line again */
937     break;
938     }
939 nigel 53 }
940 nigel 49
941 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
942 nigel 77
943 nigel 49 if (match != invert)
944     {
945 nigel 77 BOOL hyphenprinted = FALSE;
946    
947 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
948 nigel 77
949 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
950    
951     /* Just count if just counting is wanted. */
952    
953 nigel 49 if (count_only) count++;
954    
955 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
956     in the file. */
957    
958     else if (filenames == FN_ONLY)
959 nigel 49 {
960 nigel 77 fprintf(stdout, "%s\n", printname);
961 nigel 49 return 0;
962     }
963    
964 nigel 87 /* Likewise, if all we want is a yes/no answer. */
965    
966 nigel 77 else if (quiet) return 0;
967 nigel 49
968 nigel 87 /* The --only-matching option prints just the substring that matched, and
969     does not pring any context. */
970    
971     else if (only_matching)
972     {
973     if (printname != NULL) fprintf(stdout, "%s:", printname);
974     if (number) fprintf(stdout, "%d:", linenumber);
975     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
976     fprintf(stdout, "\n");
977     }
978    
979     /* This is the default case when none of the above options is set. We print
980     the matching lines(s), possibly preceded and/or followed by other lines of
981     context. */
982    
983 nigel 49 else
984     {
985 nigel 77 /* See if there is a requirement to print some "after" lines from a
986     previous match. We never print any overlaps. */
987    
988     if (after_context > 0 && lastmatchnumber > 0)
989     {
990 nigel 93 int ellength;
991 nigel 77 int linecount = 0;
992     char *p = lastmatchrestart;
993    
994     while (p < ptr && linecount < after_context)
995     {
996 nigel 93 p = end_of_line(p, ptr, &ellength);
997 nigel 77 linecount++;
998     }
999    
1000     /* It is important to advance lastmatchrestart during this printing so
1001 nigel 87 that it interacts correctly with any "before" printing below. Print
1002     each line's data using fwrite() in case there are binary zeroes. */
1003 nigel 77
1004     while (lastmatchrestart < p)
1005     {
1006     char *pp = lastmatchrestart;
1007     if (printname != NULL) fprintf(stdout, "%s-", printname);
1008     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1009 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1010     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1011     lastmatchrestart = pp;
1012 nigel 77 }
1013     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1014     }
1015    
1016     /* If there were non-contiguous lines printed above, insert hyphens. */
1017    
1018     if (hyphenpending)
1019     {
1020     fprintf(stdout, "--\n");
1021     hyphenpending = FALSE;
1022     hyphenprinted = TRUE;
1023     }
1024    
1025     /* See if there is a requirement to print some "before" lines for this
1026     match. Again, don't print overlaps. */
1027    
1028     if (before_context > 0)
1029     {
1030     int linecount = 0;
1031     char *p = ptr;
1032    
1033     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1034 nigel 87 linecount < before_context)
1035 nigel 77 {
1036 nigel 87 linecount++;
1037 nigel 93 p = previous_line(p, buffer);
1038 nigel 77 }
1039    
1040     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1041     fprintf(stdout, "--\n");
1042    
1043     while (p < ptr)
1044     {
1045 nigel 93 int ellength;
1046 nigel 77 char *pp = p;
1047     if (printname != NULL) fprintf(stdout, "%s-", printname);
1048     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1049 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1050     fwrite(p, 1, pp - p, stdout);
1051     p = pp;
1052 nigel 77 }
1053     }
1054    
1055     /* Now print the matching line(s); ensure we set hyphenpending at the end
1056 nigel 85 of the file if any context lines are being output. */
1057 nigel 77
1058 nigel 85 if (after_context > 0 || before_context > 0)
1059     endhyphenpending = TRUE;
1060    
1061 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1062 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1063 nigel 77
1064     /* In multiline mode, we want to print to the end of the line in which
1065     the end of the matched string is found, so we adjust linelength and the
1066     line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1067 nigel 91 start of the match will always be before the first newline sequence. */
1068 nigel 77
1069     if (multiline)
1070     {
1071 nigel 93 int ellength;
1072 nigel 77 char *endmatch = ptr + offsets[1];
1073     t = ptr;
1074 nigel 93 while (t < endmatch)
1075     {
1076     t = end_of_line(t, endptr, &ellength);
1077     if (t <= endmatch) linenumber++; else break;
1078     }
1079     endmatch = end_of_line(endmatch, endptr, &ellength);
1080     linelength = endmatch - ptr - ellength;
1081 nigel 77 }
1082    
1083 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1084     zeroes are treated as just another data character. */
1085    
1086     /* This extra option, for Jeffrey Friedl's debugging requirements,
1087     replaces the matched string, or a specific captured string if it exists,
1088     with X. When this happens, colouring is ignored. */
1089    
1090     #ifdef JFRIEDL_DEBUG
1091     if (S_arg >= 0 && S_arg < mrc)
1092     {
1093     int first = S_arg * 2;
1094     int last = first + 1;
1095     fwrite(ptr, 1, offsets[first], stdout);
1096     fprintf(stdout, "X");
1097     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1098     }
1099     else
1100     #endif
1101    
1102     /* We have to split the line(s) up if colouring. */
1103    
1104     if (do_colour)
1105     {
1106     fwrite(ptr, 1, offsets[0], stdout);
1107     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1108     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1109     fprintf(stdout, "%c[00m", 0x1b);
1110     fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1111     }
1112 nigel 93 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1113 nigel 49 }
1114    
1115 nigel 87 /* End of doing what has to be done for a match */
1116    
1117 nigel 77 rc = 0; /* Had some success */
1118    
1119     /* Remember where the last match happened for after_context. We remember
1120     where we are about to restart, and that line's number. */
1121    
1122 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1123 nigel 77 lastmatchnumber = linenumber + 1;
1124 nigel 49 }
1125 nigel 77
1126     /* Advance to after the newline and increment the line number. */
1127    
1128 nigel 93 ptr += linelength + endlinelength;
1129 nigel 77 linenumber++;
1130    
1131     /* If we haven't yet reached the end of the file (the buffer is full), and
1132     the current point is in the top 1/3 of the buffer, slide the buffer down by
1133     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1134     about to be lost, print them. */
1135    
1136     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1137     {
1138     if (after_context > 0 &&
1139     lastmatchnumber > 0 &&
1140     lastmatchrestart < buffer + MBUFTHIRD)
1141     {
1142     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1143     lastmatchnumber = 0;
1144     }
1145    
1146     /* Now do the shuffle */
1147    
1148     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1149     ptr -= MBUFTHIRD;
1150     bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1151     endptr = buffer + bufflength;
1152    
1153     /* Adjust any last match point */
1154    
1155     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1156     }
1157     } /* Loop through the whole file */
1158    
1159     /* End of file; print final "after" lines if wanted; do_after_lines sets
1160     hyphenpending if it prints something. */
1161    
1162 nigel 87 if (!only_matching && !count_only)
1163     {
1164     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1165     hyphenpending |= endhyphenpending;
1166     }
1167 nigel 77
1168     /* Print the file name if we are looking for those without matches and there
1169     were none. If we found a match, we won't have got this far. */
1170    
1171 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1172 nigel 77 {
1173     fprintf(stdout, "%s\n", printname);
1174     return 0;
1175 nigel 49 }
1176    
1177 nigel 77 /* Print the match count if wanted */
1178    
1179 nigel 49 if (count_only)
1180     {
1181 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1182 nigel 49 fprintf(stdout, "%d\n", count);
1183     }
1184    
1185     return rc;
1186     }
1187    
1188    
1189    
1190     /*************************************************
1191 nigel 53 * Grep a file or recurse into a directory *
1192     *************************************************/
1193    
1194 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1195     recursing; if it's a file, grep it.
1196    
1197     Arguments:
1198     pathname the path to investigate
1199 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1200 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1201    
1202     Returns: 0 if there was at least one match
1203     1 if there were no matches
1204     2 there was some kind of error
1205    
1206     However, file opening failures are suppressed if "silent" is set.
1207     */
1208    
1209 nigel 53 static int
1210 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1211 nigel 53 {
1212     int rc = 1;
1213     int sep;
1214     FILE *in;
1215    
1216 nigel 77 /* If the file name is "-" we scan stdin */
1217 nigel 53
1218 nigel 77 if (strcmp(pathname, "-") == 0)
1219 nigel 53 {
1220 nigel 77 return pcregrep(stdin,
1221 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1222 nigel 77 stdin_name : NULL);
1223     }
1224    
1225    
1226 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1227     each file within it, subject to any include or exclude patterns that were set.
1228     The scanning code is localized so it can be made system-specific. */
1229    
1230     if ((sep = isdirectory(pathname)) != 0)
1231 nigel 77 {
1232 nigel 87 if (dee_action == dee_SKIP) return 1;
1233     if (dee_action == dee_RECURSE)
1234 nigel 53 {
1235 nigel 87 char buffer[1024];
1236     char *nextfile;
1237     directory_type *dir = opendirectory(pathname);
1238 nigel 53
1239 nigel 87 if (dir == NULL)
1240     {
1241     if (!silent)
1242     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1243     strerror(errno));
1244     return 2;
1245     }
1246 nigel 77
1247 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1248     {
1249     int frc, blen;
1250     sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1251     blen = strlen(buffer);
1252 nigel 77
1253 nigel 87 if (exclude_compiled != NULL &&
1254     pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1255     continue;
1256 nigel 77
1257 nigel 87 if (include_compiled != NULL &&
1258     pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1259     continue;
1260    
1261     frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1262     if (frc > 1) rc = frc;
1263     else if (frc == 0 && rc == 1) rc = 0;
1264     }
1265    
1266     closedirectory(dir);
1267     return rc;
1268 nigel 53 }
1269     }
1270    
1271 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1272     been requested. */
1273 nigel 53
1274 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1275    
1276     /* Control reaches here if we have a regular file, or if we have a directory
1277     and recursion or skipping was not requested, or if we have anything else and
1278     skipping was not requested. The scan proceeds. If this is the first and only
1279     argument at top level, we don't show the file name, unless we are only showing
1280     the file name, or the filename was forced (-H). */
1281    
1282 nigel 77 in = fopen(pathname, "r");
1283 nigel 53 if (in == NULL)
1284     {
1285 nigel 77 if (!silent)
1286     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1287     strerror(errno));
1288 nigel 53 return 2;
1289     }
1290    
1291 nigel 87 rc = pcregrep(in, (filenames > FN_DEFAULT ||
1292     (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1293 nigel 77
1294 nigel 53 fclose(in);
1295     return rc;
1296     }
1297    
1298    
1299    
1300    
1301     /*************************************************
1302 nigel 49 * Usage function *
1303     *************************************************/
1304    
1305     static int
1306     usage(int rc)
1307     {
1308 nigel 87 option_item *op;
1309     fprintf(stderr, "Usage: pcregrep [-");
1310     for (op = optionlist; op->one_char != 0; op++)
1311     {
1312     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1313     }
1314     fprintf(stderr, "] [long options] [pattern] [files]\n");
1315 nigel 53 fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1316 nigel 49 return rc;
1317     }
1318    
1319    
1320    
1321    
1322     /*************************************************
1323 nigel 53 * Help function *
1324     *************************************************/
1325    
1326     static void
1327     help(void)
1328     {
1329     option_item *op;
1330    
1331 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1332 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1333 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1334     printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1335 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1336    
1337     printf("Options:\n");
1338    
1339     for (op = optionlist; op->one_char != 0; op++)
1340     {
1341     int n;
1342     char s[4];
1343     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1344     printf(" %s --%s%n", s, op->long_name, &n);
1345     n = 30 - n;
1346     if (n < 1) n = 1;
1347     printf("%.*s%s\n", n, " ", op->help_text);
1348     }
1349    
1350 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1351     printf("trailing white space is removed and blank lines are ignored.\n");
1352     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1353 nigel 53
1354 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1355 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1356     }
1357    
1358    
1359    
1360    
1361     /*************************************************
1362 nigel 77 * Handle a single-letter, no data option *
1363 nigel 53 *************************************************/
1364    
1365     static int
1366     handle_option(int letter, int options)
1367     {
1368     switch(letter)
1369     {
1370 nigel 87 case N_HELP: help(); exit(0);
1371 nigel 53 case 'c': count_only = TRUE; break;
1372 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1373     case 'H': filenames = FN_FORCE; break;
1374     case 'h': filenames = FN_NONE; break;
1375 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1376 nigel 87 case 'l': filenames = FN_ONLY; break;
1377     case 'L': filenames = FN_NOMATCH_ONLY; break;
1378 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1379 nigel 53 case 'n': number = TRUE; break;
1380 nigel 87 case 'o': only_matching = TRUE; break;
1381 nigel 77 case 'q': quiet = TRUE; break;
1382 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1383 nigel 53 case 's': silent = TRUE; break;
1384 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1385 nigel 53 case 'v': invert = TRUE; break;
1386 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1387     case 'x': process_options |= PO_LINE_MATCH; break;
1388 nigel 53
1389     case 'V':
1390 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1391 nigel 53 exit(0);
1392     break;
1393    
1394     default:
1395     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1396     exit(usage(2));
1397     }
1398    
1399     return options;
1400     }
1401    
1402    
1403    
1404    
1405     /*************************************************
1406 nigel 87 * Construct printed ordinal *
1407     *************************************************/
1408    
1409     /* This turns a number into "1st", "3rd", etc. */
1410    
1411     static char *
1412     ordin(int n)
1413     {
1414     static char buffer[8];
1415     char *p = buffer;
1416     sprintf(p, "%d", n);
1417     while (*p != 0) p++;
1418     switch (n%10)
1419     {
1420     case 1: strcpy(p, "st"); break;
1421     case 2: strcpy(p, "nd"); break;
1422     case 3: strcpy(p, "rd"); break;
1423     default: strcpy(p, "th"); break;
1424     }
1425     return buffer;
1426     }
1427    
1428    
1429    
1430     /*************************************************
1431     * Compile a single pattern *
1432     *************************************************/
1433    
1434     /* When the -F option has been used, this is called for each substring.
1435     Otherwise it's called for each supplied pattern.
1436    
1437     Arguments:
1438     pattern the pattern string
1439     options the PCRE options
1440     filename the file name, or NULL for a command-line pattern
1441     count 0 if this is the only command line pattern, or
1442     number of the command line pattern, or
1443     linenumber for a pattern from a file
1444    
1445     Returns: TRUE on success, FALSE after an error
1446     */
1447    
1448     static BOOL
1449     compile_single_pattern(char *pattern, int options, char *filename, int count)
1450     {
1451     char buffer[MBUFTHIRD + 16];
1452     const char *error;
1453     int errptr;
1454    
1455     if (pattern_count >= MAX_PATTERN_COUNT)
1456     {
1457     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1458     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1459     return FALSE;
1460     }
1461    
1462     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1463     suffix[process_options]);
1464     pattern_list[pattern_count] =
1465     pcre_compile(buffer, options, &error, &errptr, pcretables);
1466 ph10 142 if (pattern_list[pattern_count] != NULL)
1467 ph10 141 {
1468 ph10 142 pattern_count++;
1469 ph10 141 return TRUE;
1470 ph10 142 }
1471 nigel 87
1472     /* Handle compile errors */
1473    
1474     errptr -= (int)strlen(prefix[process_options]);
1475     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1476    
1477     if (filename == NULL)
1478     {
1479     if (count == 0)
1480     fprintf(stderr, "pcregrep: Error in command-line regex "
1481     "at offset %d: %s\n", errptr, error);
1482     else
1483     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1484     "at offset %d: %s\n", ordin(count), errptr, error);
1485     }
1486     else
1487     {
1488     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1489     "at offset %d: %s\n", count, filename, errptr, error);
1490     }
1491    
1492     return FALSE;
1493     }
1494    
1495    
1496    
1497     /*************************************************
1498     * Compile one supplied pattern *
1499     *************************************************/
1500    
1501     /* When the -F option has been used, each string may be a list of strings,
1502 nigel 91 separated by line breaks. They will be matched literally.
1503 nigel 87
1504     Arguments:
1505     pattern the pattern string
1506     options the PCRE options
1507     filename the file name, or NULL for a command-line pattern
1508     count 0 if this is the only command line pattern, or
1509     number of the command line pattern, or
1510     linenumber for a pattern from a file
1511    
1512     Returns: TRUE on success, FALSE after an error
1513     */
1514    
1515     static BOOL
1516     compile_pattern(char *pattern, int options, char *filename, int count)
1517     {
1518     if ((process_options & PO_FIXED_STRINGS) != 0)
1519     {
1520 nigel 93 char *eop = pattern + strlen(pattern);
1521 nigel 87 char buffer[MBUFTHIRD];
1522     for(;;)
1523     {
1524 nigel 93 int ellength;
1525     char *p = end_of_line(pattern, eop, &ellength);
1526     if (ellength == 0)
1527 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1528 nigel 93 sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1529     pattern = p;
1530 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1531     return FALSE;
1532     }
1533     }
1534     else return compile_single_pattern(pattern, options, filename, count);
1535     }
1536    
1537    
1538    
1539     /*************************************************
1540 nigel 49 * Main program *
1541     *************************************************/
1542    
1543 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1544    
1545 nigel 49 int
1546     main(int argc, char **argv)
1547     {
1548 nigel 53 int i, j;
1549 nigel 49 int rc = 1;
1550 nigel 87 int pcre_options = 0;
1551     int cmd_pattern_count = 0;
1552 ph10 141 int hint_count = 0;
1553 nigel 49 int errptr;
1554 nigel 87 BOOL only_one_at_top;
1555     char *patterns[MAX_PATTERN_COUNT];
1556     const char *locale_from = "--locale";
1557 nigel 49 const char *error;
1558    
1559 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1560     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1561     */
1562 nigel 91
1563     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1564     switch(i)
1565     {
1566     default: newline = (char *)"lf"; break;
1567     case '\r': newline = (char *)"cr"; break;
1568     case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1569 nigel 93 case -1: newline = (char *)"any"; break;
1570 ph10 149 case -2: newline = (char *)"anycrlf"; break;
1571 nigel 91 }
1572    
1573 nigel 49 /* Process the options */
1574    
1575     for (i = 1; i < argc; i++)
1576     {
1577 nigel 77 option_item *op = NULL;
1578     char *option_data = (char *)""; /* default to keep compiler happy */
1579     BOOL longop;
1580     BOOL longopwasequals = FALSE;
1581    
1582 nigel 49 if (argv[i][0] != '-') break;
1583 nigel 53
1584 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1585 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1586 nigel 63
1587 nigel 77 if (argv[i][1] == 0)
1588     {
1589 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1590 nigel 77 else exit(usage(2));
1591     }
1592 nigel 63
1593 nigel 77 /* Handle a long name option, or -- to terminate the options */
1594 nigel 53
1595     if (argv[i][1] == '-')
1596 nigel 49 {
1597 nigel 77 char *arg = argv[i] + 2;
1598     char *argequals = strchr(arg, '=');
1599 nigel 53
1600 nigel 77 if (*arg == 0) /* -- terminates options */
1601 nigel 49 {
1602 nigel 77 i++;
1603     break; /* out of the options-handling loop */
1604 nigel 53 }
1605 nigel 49
1606 nigel 77 longop = TRUE;
1607    
1608     /* Some long options have data that follows after =, for example file=name.
1609     Some options have variations in the long name spelling: specifically, we
1610     allow "regexp" because GNU grep allows it, though I personally go along
1611 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1612     These options are entered in the table as "regex(p)". No option is in both
1613     these categories, fortunately. */
1614 nigel 77
1615 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1616     {
1617 nigel 77 char *opbra = strchr(op->long_name, '(');
1618     char *equals = strchr(op->long_name, '=');
1619     if (opbra == NULL) /* Not a (p) case */
1620 nigel 53 {
1621 nigel 77 if (equals == NULL) /* Not thing=data case */
1622     {
1623     if (strcmp(arg, op->long_name) == 0) break;
1624     }
1625     else /* Special case xxx=data */
1626     {
1627     int oplen = equals - op->long_name;
1628     int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1629     if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1630     {
1631     option_data = arg + arglen;
1632     if (*option_data == '=')
1633     {
1634     option_data++;
1635     longopwasequals = TRUE;
1636     }
1637     break;
1638     }
1639     }
1640 nigel 53 }
1641 nigel 77 else /* Special case xxxx(p) */
1642     {
1643     char buff1[24];
1644     char buff2[24];
1645     int baselen = opbra - op->long_name;
1646     sprintf(buff1, "%.*s", baselen, op->long_name);
1647     sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1648     opbra + 1);
1649     if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1650     break;
1651     }
1652 nigel 53 }
1653 nigel 77
1654 nigel 53 if (op->one_char == 0)
1655     {
1656     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1657     exit(usage(2));
1658     }
1659     }
1660 nigel 49
1661 nigel 89
1662     /* Jeffrey Friedl's debugging harness uses these additional options which
1663     are not in the right form for putting in the option table because they use
1664     only one hyphen, yet are more than one character long. By putting them
1665     separately here, they will not get displayed as part of the help() output,
1666     but I don't think Jeffrey will care about that. */
1667    
1668     #ifdef JFRIEDL_DEBUG
1669     else if (strcmp(argv[i], "-pre") == 0) {
1670     jfriedl_prefix = argv[++i];
1671     continue;
1672     } else if (strcmp(argv[i], "-post") == 0) {
1673     jfriedl_postfix = argv[++i];
1674     continue;
1675     } else if (strcmp(argv[i], "-XT") == 0) {
1676     sscanf(argv[++i], "%d", &jfriedl_XT);
1677     continue;
1678     } else if (strcmp(argv[i], "-XR") == 0) {
1679     sscanf(argv[++i], "%d", &jfriedl_XR);
1680     continue;
1681     }
1682     #endif
1683    
1684    
1685 nigel 77 /* One-char options; many that have no data may be in a single argument; we
1686     continue till we hit the last one or one that needs data. */
1687 nigel 53
1688     else
1689     {
1690     char *s = argv[i] + 1;
1691 nigel 77 longop = FALSE;
1692 nigel 53 while (*s != 0)
1693     {
1694 nigel 77 for (op = optionlist; op->one_char != 0; op++)
1695     { if (*s == op->one_char) break; }
1696     if (op->one_char == 0)
1697 nigel 53 {
1698 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1699     *s, argv[i]);
1700     exit(usage(2));
1701     }
1702     if (op->type != OP_NODATA || s[1] == 0)
1703     {
1704     option_data = s+1;
1705 nigel 53 break;
1706     }
1707 nigel 87 pcre_options = handle_option(*s++, pcre_options);
1708 nigel 49 }
1709     }
1710 nigel 77
1711 nigel 87 /* At this point we should have op pointing to a matched option. If the type
1712     is NO_DATA, it means that there is no data, and the option might set
1713     something in the PCRE options. */
1714 nigel 77
1715     if (op->type == OP_NODATA)
1716     {
1717 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
1718     continue;
1719     }
1720    
1721     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1722     either has a value or defaults to something. It cannot have data in a
1723     separate item. At the moment, the only such options are "colo(u)r" and
1724 nigel 89 Jeffrey Friedl's special -S debugging option. */
1725 nigel 87
1726     if (*option_data == 0 &&
1727     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1728     {
1729     switch (op->one_char)
1730 nigel 77 {
1731 nigel 87 case N_COLOUR:
1732     colour_option = (char *)"auto";
1733     break;
1734     #ifdef JFRIEDL_DEBUG
1735     case 'S':
1736     S_arg = 0;
1737     break;
1738     #endif
1739 nigel 77 }
1740 nigel 87 continue;
1741     }
1742 nigel 77
1743 nigel 87 /* Otherwise, find the data string for the option. */
1744    
1745     if (*option_data == 0)
1746     {
1747     if (i >= argc - 1 || longopwasequals)
1748 nigel 77 {
1749 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1750     exit(usage(2));
1751     }
1752     option_data = argv[++i];
1753     }
1754    
1755     /* If the option type is OP_PATLIST, it's the -e option, which can be called
1756     multiple times to create a list of patterns. */
1757    
1758     if (op->type == OP_PATLIST)
1759     {
1760     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1761     {
1762     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1763     MAX_PATTERN_COUNT);
1764     return 2;
1765     }
1766     patterns[cmd_pattern_count++] = option_data;
1767     }
1768    
1769     /* Otherwise, deal with single string or numeric data values. */
1770    
1771     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1772     {
1773     *((char **)op->dataptr) = option_data;
1774     }
1775     else
1776     {
1777     char *endptr;
1778     int n = strtoul(option_data, &endptr, 10);
1779     if (*endptr != 0)
1780     {
1781     if (longop)
1782 nigel 77 {
1783 nigel 87 char *equals = strchr(op->long_name, '=');
1784     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1785     equals - op->long_name;
1786     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1787     option_data, nlen, op->long_name);
1788 nigel 77 }
1789 nigel 87 else
1790     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1791     option_data, op->one_char);
1792     exit(usage(2));
1793 nigel 77 }
1794 nigel 87 *((int *)op->dataptr) = n;
1795 nigel 77 }
1796 nigel 49 }
1797    
1798 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
1799     for -A and -B. */
1800    
1801     if (both_context > 0)
1802     {
1803     if (after_context == 0) after_context = both_context;
1804     if (before_context == 0) before_context = both_context;
1805     }
1806    
1807 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
1808     LC_ALL environment variable is set, and if so, use it. */
1809 nigel 49
1810 nigel 87 if (locale == NULL)
1811 nigel 53 {
1812 nigel 87 locale = getenv("LC_ALL");
1813     locale_from = "LCC_ALL";
1814 nigel 53 }
1815 nigel 49
1816 nigel 87 if (locale == NULL)
1817     {
1818     locale = getenv("LC_CTYPE");
1819     locale_from = "LC_CTYPE";
1820     }
1821 nigel 49
1822 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
1823     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1824    
1825     if (locale != NULL)
1826 nigel 49 {
1827 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
1828 nigel 53 {
1829 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1830     locale, locale_from);
1831 nigel 53 return 2;
1832     }
1833 nigel 87 pcretables = pcre_maketables();
1834     }
1835 nigel 77
1836 nigel 87 /* Sort out colouring */
1837    
1838     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1839     {
1840     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1841     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1842     else
1843 nigel 53 {
1844 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1845     colour_option);
1846     return 2;
1847 nigel 77 }
1848 nigel 87 if (do_colour)
1849 nigel 77 {
1850 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
1851     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1852     if (cs != NULL) colour_string = cs;
1853 nigel 77 }
1854 nigel 87 }
1855 nigel 77
1856 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
1857    
1858     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1859     {
1860     pcre_options |= PCRE_NEWLINE_CR;
1861 nigel 93 endlinetype = EL_CR;
1862 nigel 91 }
1863     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1864     {
1865     pcre_options |= PCRE_NEWLINE_LF;
1866 nigel 93 endlinetype = EL_LF;
1867 nigel 91 }
1868     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1869     {
1870     pcre_options |= PCRE_NEWLINE_CRLF;
1871 nigel 93 endlinetype = EL_CRLF;
1872 nigel 91 }
1873 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1874     {
1875     pcre_options |= PCRE_NEWLINE_ANY;
1876     endlinetype = EL_ANY;
1877     }
1878 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1879     {
1880     pcre_options |= PCRE_NEWLINE_ANYCRLF;
1881     endlinetype = EL_ANYCRLF;
1882     }
1883 nigel 91 else
1884     {
1885     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1886     return 2;
1887     }
1888    
1889 nigel 87 /* Interpret the text values for -d and -D */
1890    
1891     if (dee_option != NULL)
1892     {
1893     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1894     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1895     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1896     else
1897 nigel 77 {
1898 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1899     return 2;
1900 nigel 53 }
1901 nigel 49 }
1902    
1903 nigel 87 if (DEE_option != NULL)
1904     {
1905     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1906     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1907     else
1908     {
1909     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1910     return 2;
1911     }
1912     }
1913 nigel 49
1914 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
1915 nigel 87
1916     #ifdef JFRIEDL_DEBUG
1917     if (S_arg > 9)
1918 nigel 49 {
1919 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
1920     return 2;
1921     }
1922 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
1923     {
1924     if (jfriedl_XT == 0) jfriedl_XT = 1;
1925     if (jfriedl_XR == 0) jfriedl_XR = 1;
1926     }
1927 nigel 87 #endif
1928 nigel 77
1929 nigel 87 /* Get memory to store the pattern and hints lists. */
1930    
1931     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1932     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1933    
1934     if (pattern_list == NULL || hints_list == NULL)
1935     {
1936     fprintf(stderr, "pcregrep: malloc failed\n");
1937 ph10 123 goto EXIT2;
1938 nigel 87 }
1939    
1940     /* If no patterns were provided by -e, and there is no file provided by -f,
1941     the first argument is the one and only pattern, and it must exist. */
1942    
1943     if (cmd_pattern_count == 0 && pattern_filename == NULL)
1944     {
1945 nigel 63 if (i >= argc) return usage(2);
1946 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
1947     }
1948 nigel 77
1949 nigel 87 /* Compile the patterns that were provided on the command line, either by
1950     multiple uses of -e or as a single unkeyed pattern. */
1951    
1952     for (j = 0; j < cmd_pattern_count; j++)
1953     {
1954     if (!compile_pattern(patterns[j], pcre_options, NULL,
1955     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1956 ph10 123 goto EXIT2;
1957 nigel 87 }
1958    
1959     /* Compile the regular expressions that are provided in a file. */
1960    
1961     if (pattern_filename != NULL)
1962     {
1963     int linenumber = 0;
1964     FILE *f;
1965     char *filename;
1966     char buffer[MBUFTHIRD];
1967    
1968     if (strcmp(pattern_filename, "-") == 0)
1969 nigel 77 {
1970 nigel 87 f = stdin;
1971     filename = stdin_name;
1972 nigel 77 }
1973 nigel 87 else
1974 nigel 77 {
1975 nigel 87 f = fopen(pattern_filename, "r");
1976     if (f == NULL)
1977     {
1978     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1979     strerror(errno));
1980 ph10 123 goto EXIT2;
1981 nigel 87 }
1982     filename = pattern_filename;
1983 nigel 77 }
1984    
1985 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
1986 nigel 53 {
1987 nigel 87 char *s = buffer + (int)strlen(buffer);
1988     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1989     *s = 0;
1990     linenumber++;
1991     if (buffer[0] == 0) continue; /* Skip blank lines */
1992     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1993 ph10 121 goto EXIT2;
1994 nigel 53 }
1995 nigel 87
1996     if (f != stdin) fclose(f);
1997 nigel 49 }
1998    
1999 nigel 77 /* Study the regular expressions, as we will be running them many times */
2000 nigel 53
2001     for (j = 0; j < pattern_count; j++)
2002     {
2003     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2004     if (error != NULL)
2005     {
2006     char s[16];
2007     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2008     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2009 ph10 121 goto EXIT2;
2010 nigel 53 }
2011 ph10 142 hint_count++;
2012 nigel 53 }
2013    
2014 nigel 77 /* If there are include or exclude patterns, compile them. */
2015    
2016     if (exclude_pattern != NULL)
2017     {
2018 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2019     pcretables);
2020 nigel 77 if (exclude_compiled == NULL)
2021     {
2022     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2023     errptr, error);
2024 ph10 121 goto EXIT2;
2025 nigel 77 }
2026     }
2027    
2028     if (include_pattern != NULL)
2029     {
2030 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2031     pcretables);
2032 nigel 77 if (include_compiled == NULL)
2033     {
2034     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2035     errptr, error);
2036 ph10 121 goto EXIT2;
2037 nigel 77 }
2038     }
2039    
2040 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2041 nigel 49
2042 nigel 87 if (i >= argc)
2043 ph10 121 {
2044     rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2045     goto EXIT;
2046 ph10 123 }
2047 nigel 49
2048 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2049     Pass in the fact that there is only one argument at top level - this suppresses
2050 nigel 87 the file name if the argument is not a directory and filenames are not
2051     otherwise forced. */
2052 nigel 49
2053 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2054 nigel 49
2055     for (; i < argc; i++)
2056     {
2057 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2058     only_one_at_top);
2059 nigel 77 if (frc > 1) rc = frc;
2060     else if (frc == 0 && rc == 1) rc = 0;
2061 nigel 49 }
2062    
2063 ph10 121 EXIT:
2064     if (pattern_list != NULL)
2065     {
2066 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2067 ph10 121 free(pattern_list);
2068 ph10 123 }
2069 ph10 121 if (hints_list != NULL)
2070     {
2071 ph10 141 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2072 ph10 121 free(hints_list);
2073 ph10 123 }
2074 nigel 49 return rc;
2075 ph10 121
2076     EXIT2:
2077     rc = 2;
2078     goto EXIT;
2079 nigel 49 }
2080    
2081 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12