/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 571 - (hide annotations) (download)
Tue Nov 16 17:51:37 2010 UTC (3 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 76791 byte(s)
Rename --{in,ex}clude_dir with hyphen instead of underscore in pcregrep, but 
leave the old versions as undocumented synonyms. (In GNU grep, hyphens are 
used.)

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 515 Copyright (c) 1997-2010 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 ph10 378 #define OFFSET_SIZE 99
75 nigel 49
76 nigel 77 #if BUFSIZ > 8192
77     #define MBUFTHIRD BUFSIZ
78     #else
79     #define MBUFTHIRD 8192
80     #endif
81 nigel 49
82 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
83     output. The order is important; it is assumed that a file name is wanted for
84     all values greater than FN_DEFAULT. */
85 nigel 77
86 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87 nigel 87
88 ph10 286 /* File reading styles */
89    
90     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92 nigel 87 /* Actions for the -d and -D options */
93    
94     enum { dee_READ, dee_SKIP, dee_RECURSE };
95     enum { DEE_READ, DEE_SKIP };
96    
97     /* Actions for special processing options (flag bits) */
98    
99     #define PO_WORD_MATCH 0x0001
100     #define PO_LINE_MATCH 0x0002
101     #define PO_FIXED_STRINGS 0x0004
102    
103 nigel 93 /* Line ending types */
104 nigel 87
105 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 nigel 87
107 ph10 535 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108     environments), a warning is issued if the value of fwrite() is ignored.
109     Unfortunately, casting to (void) does not suppress the warning. To get round
110     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 ph10 515 apply to fprintf(). */
112 nigel 93
113 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114 nigel 93
115 ph10 515
116    
117 nigel 49 /*************************************************
118     * Global variables *
119     *************************************************/
120    
121 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
122     regular code. */
123    
124     #ifdef JFRIEDL_DEBUG
125     static int S_arg = -1;
126 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128     static const char *jfriedl_prefix = "";
129     static const char *jfriedl_postfix = "";
130 nigel 87 #endif
131    
132 nigel 93 static int endlinetype;
133 nigel 91
134 nigel 87 static char *colour_string = (char *)"1;31";
135     static char *colour_option = NULL;
136     static char *dee_option = NULL;
137     static char *DEE_option = NULL;
138 nigel 91 static char *newline = NULL;
139 nigel 53 static char *pattern_filename = NULL;
140 nigel 77 static char *stdin_name = (char *)"(standard input)";
141 nigel 87 static char *locale = NULL;
142    
143     static const unsigned char *pcretables = NULL;
144    
145 nigel 53 static int pattern_count = 0;
146 ph10 121 static pcre **pattern_list = NULL;
147     static pcre_extra **hints_list = NULL;
148 nigel 49
149 nigel 77 static char *include_pattern = NULL;
150     static char *exclude_pattern = NULL;
151 ph10 325 static char *include_dir_pattern = NULL;
152     static char *exclude_dir_pattern = NULL;
153 nigel 77
154     static pcre *include_compiled = NULL;
155     static pcre *exclude_compiled = NULL;
156 ph10 325 static pcre *include_dir_compiled = NULL;
157     static pcre *exclude_dir_compiled = NULL;
158 nigel 77
159     static int after_context = 0;
160     static int before_context = 0;
161     static int both_context = 0;
162 nigel 87 static int dee_action = dee_READ;
163     static int DEE_action = DEE_READ;
164     static int error_count = 0;
165     static int filenames = FN_DEFAULT;
166 ph10 565 static int only_matching = -1;
167 nigel 87 static int process_options = 0;
168 nigel 77
169 ph10 561 static unsigned long int match_limit = 0;
170     static unsigned long int match_limit_recursion = 0;
171    
172 nigel 49 static BOOL count_only = FALSE;
173 nigel 87 static BOOL do_colour = FALSE;
174 ph10 280 static BOOL file_offsets = FALSE;
175 nigel 77 static BOOL hyphenpending = FALSE;
176 nigel 49 static BOOL invert = FALSE;
177 ph10 519 static BOOL line_buffered = FALSE;
178 ph10 280 static BOOL line_offsets = FALSE;
179 nigel 77 static BOOL multiline = FALSE;
180 nigel 49 static BOOL number = FALSE;
181 ph10 420 static BOOL omit_zero_count = FALSE;
182 ph10 561 static BOOL resource_error = FALSE;
183 nigel 77 static BOOL quiet = FALSE;
184 nigel 49 static BOOL silent = FALSE;
185 nigel 93 static BOOL utf8 = FALSE;
186 nigel 49
187 nigel 53 /* Structure for options and list of them */
188 nigel 49
189 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
190     OP_PATLIST };
191 nigel 77
192 nigel 53 typedef struct option_item {
193 nigel 77 int type;
194 nigel 53 int one_char;
195 nigel 77 void *dataptr;
196 nigel 67 const char *long_name;
197     const char *help_text;
198 nigel 53 } option_item;
199 nigel 49
200 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
201     used to identify them. */
202    
203 ph10 325 #define N_COLOUR (-1)
204     #define N_EXCLUDE (-2)
205     #define N_EXCLUDE_DIR (-3)
206     #define N_HELP (-4)
207     #define N_INCLUDE (-5)
208     #define N_INCLUDE_DIR (-6)
209     #define N_LABEL (-7)
210     #define N_LOCALE (-8)
211     #define N_NULL (-9)
212     #define N_LOFFSETS (-10)
213     #define N_FOFFSETS (-11)
214 ph10 519 #define N_LBUFFER (-12)
215 ph10 561 #define N_M_LIMIT (-13)
216     #define N_M_LIMIT_REC (-14)
217 nigel 87
218 nigel 53 static option_item optionlist[] = {
219 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
220     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
221     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
222     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
223     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
224 ph10 561 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
225 nigel 87 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
226     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
227     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
228     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
229 ph10 422 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
230 ph10 421 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
231 nigel 87 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
232 ph10 280 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
233 nigel 87 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
234     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
235     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
236     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
237     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
238     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
239 ph10 519 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
240 ph10 280 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
241 nigel 87 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
242 ph10 561 { OP_NUMBER, N_M_LIMIT,&match_limit, "match-limit=number", "set PCRE match limit option" },
243     { OP_NUMBER, N_M_LIMIT_REC,&match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244 nigel 87 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
245 ph10 280 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
247 ph10 565 { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
248 nigel 87 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
249     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
250     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
251     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
252 ph10 571 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
253     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
254    
255     /* These two were accidentally implemented with underscores instead of
256     hyphens in the option names. As this was not discovered for several releases,
257     the incorrect versions are left in the table for compatibility. However, the
258     --help function misses out any option that has an underscore in its name. */
259    
260 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
261     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
262 ph10 571
263 nigel 87 #ifdef JFRIEDL_DEBUG
264     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
265     #endif
266     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
267     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
268     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
269     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
270     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
271     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
272     { OP_NODATA, 0, NULL, NULL, NULL }
273 nigel 53 };
274    
275 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
276     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
277     that the combination of -w and -x has the same effect as -x on its own, so we
278     can treat them as the same. */
279 nigel 53
280 nigel 87 static const char *prefix[] = {
281     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
282    
283     static const char *suffix[] = {
284     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
285    
286 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
287 nigel 87
288 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
289 nigel 87
290 nigel 93 const char utf8_table4[] = {
291     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
292     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
293     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
294     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
295    
296    
297    
298 nigel 53 /*************************************************
299 nigel 87 * OS-specific functions *
300 nigel 53 *************************************************/
301    
302     /* These functions are defined so that they can be made system specific,
303 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
304 nigel 53
305    
306     /************* Directory scanning in Unix ***********/
307    
308 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
309 nigel 53 #include <sys/types.h>
310     #include <sys/stat.h>
311     #include <dirent.h>
312    
313     typedef DIR directory_type;
314    
315 nigel 67 static int
316 nigel 53 isdirectory(char *filename)
317     {
318     struct stat statbuf;
319     if (stat(filename, &statbuf) < 0)
320     return 0; /* In the expectation that opening as a file will fail */
321     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
322     }
323    
324 nigel 67 static directory_type *
325 nigel 53 opendirectory(char *filename)
326     {
327     return opendir(filename);
328     }
329    
330 nigel 67 static char *
331 nigel 53 readdirectory(directory_type *dir)
332     {
333     for (;;)
334     {
335     struct dirent *dent = readdir(dir);
336     if (dent == NULL) return NULL;
337     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
338     return dent->d_name;
339     }
340 ph10 151 /* Control never reaches here */
341 nigel 53 }
342    
343 nigel 67 static void
344 nigel 53 closedirectory(directory_type *dir)
345     {
346     closedir(dir);
347     }
348    
349    
350 nigel 87 /************* Test for regular file in Unix **********/
351    
352     static int
353     isregfile(char *filename)
354     {
355     struct stat statbuf;
356     if (stat(filename, &statbuf) < 0)
357     return 1; /* In the expectation that opening as a file will fail */
358     return (statbuf.st_mode & S_IFMT) == S_IFREG;
359     }
360    
361    
362 ph10 519 /************* Test for a terminal in Unix **********/
363 nigel 87
364     static BOOL
365     is_stdout_tty(void)
366     {
367     return isatty(fileno(stdout));
368     }
369    
370 ph10 519 static BOOL
371     is_file_tty(FILE *f)
372     {
373     return isatty(fileno(f));
374     }
375 nigel 87
376 ph10 519
377 nigel 63 /************* Directory scanning in Win32 ***********/
378 nigel 53
379 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
380 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
381 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
382     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
383 ph10 558 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
384     undefined when it is indeed undefined. */
385 nigel 53
386 ph10 558 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
387 nigel 63
388     #ifndef STRICT
389     # define STRICT
390     #endif
391     #ifndef WIN32_LEAN_AND_MEAN
392     # define WIN32_LEAN_AND_MEAN
393     #endif
394 ph10 283
395     #include <windows.h>
396    
397 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
398     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
399     #endif
400    
401 nigel 63 typedef struct directory_type
402     {
403     HANDLE handle;
404     BOOL first;
405     WIN32_FIND_DATA data;
406     } directory_type;
407    
408     int
409     isdirectory(char *filename)
410     {
411     DWORD attr = GetFileAttributes(filename);
412     if (attr == INVALID_FILE_ATTRIBUTES)
413     return 0;
414     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
415     }
416    
417     directory_type *
418     opendirectory(char *filename)
419     {
420     size_t len;
421     char *pattern;
422     directory_type *dir;
423     DWORD err;
424     len = strlen(filename);
425     pattern = (char *) malloc(len + 3);
426     dir = (directory_type *) malloc(sizeof(*dir));
427     if ((pattern == NULL) || (dir == NULL))
428     {
429     fprintf(stderr, "pcregrep: malloc failed\n");
430 ph10 561 pcregrep_exit(2);
431 nigel 63 }
432     memcpy(pattern, filename, len);
433     memcpy(&(pattern[len]), "\\*", 3);
434     dir->handle = FindFirstFile(pattern, &(dir->data));
435     if (dir->handle != INVALID_HANDLE_VALUE)
436     {
437     free(pattern);
438     dir->first = TRUE;
439     return dir;
440     }
441     err = GetLastError();
442     free(pattern);
443     free(dir);
444     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
445     return NULL;
446     }
447    
448     char *
449     readdirectory(directory_type *dir)
450     {
451     for (;;)
452     {
453     if (!dir->first)
454     {
455     if (!FindNextFile(dir->handle, &(dir->data)))
456     return NULL;
457     }
458     else
459     {
460     dir->first = FALSE;
461     }
462     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
463     return dir->data.cFileName;
464     }
465     #ifndef _MSC_VER
466     return NULL; /* Keep compiler happy; never executed */
467     #endif
468     }
469    
470     void
471     closedirectory(directory_type *dir)
472     {
473     FindClose(dir->handle);
474     free(dir);
475     }
476    
477    
478 nigel 87 /************* Test for regular file in Win32 **********/
479    
480     /* I don't know how to do this, or if it can be done; assume all paths are
481     regular if they are not directories. */
482    
483     int isregfile(char *filename)
484     {
485 ph10 283 return !isdirectory(filename);
486 nigel 87 }
487    
488    
489 ph10 519 /************* Test for a terminal in Win32 **********/
490 nigel 87
491     /* I don't know how to do this; assume never */
492    
493     static BOOL
494     is_stdout_tty(void)
495     {
496 ph10 283 return FALSE;
497 nigel 87 }
498    
499 ph10 519 static BOOL
500     is_file_tty(FILE *f)
501     {
502     return FALSE;
503     }
504 nigel 87
505 ph10 519
506 nigel 53 /************* Directory scanning when we can't do it ***********/
507    
508     /* The type is void, and apart from isdirectory(), the functions do nothing. */
509    
510 nigel 63 #else
511    
512 nigel 53 typedef void directory_type;
513    
514 nigel 87 int isdirectory(char *filename) { return 0; }
515 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
516     char *readdirectory(directory_type *dir) { return (char*)0;}
517 nigel 53 void closedirectory(directory_type *dir) {}
518    
519 nigel 87
520     /************* Test for regular when we can't do it **********/
521    
522     /* Assume all files are regular. */
523    
524     int isregfile(char *filename) { return 1; }
525    
526    
527 ph10 519 /************* Test for a terminal when we can't do it **********/
528 nigel 87
529     static BOOL
530     is_stdout_tty(void)
531     {
532     return FALSE;
533     }
534    
535 ph10 519 static BOOL
536     is_file_tty(FILE *f)
537     {
538     return FALSE;
539     }
540 nigel 87
541 nigel 53 #endif
542    
543    
544    
545 ph10 137 #ifndef HAVE_STRERROR
546 nigel 49 /*************************************************
547     * Provide strerror() for non-ANSI libraries *
548     *************************************************/
549    
550     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
551     in their libraries, but can provide the same facility by this simple
552     alternative function. */
553    
554     extern int sys_nerr;
555     extern char *sys_errlist[];
556    
557     char *
558     strerror(int n)
559     {
560     if (n < 0 || n >= sys_nerr) return "unknown error number";
561     return sys_errlist[n];
562     }
563     #endif /* HAVE_STRERROR */
564    
565    
566    
567     /*************************************************
568 ph10 561 * Exit from the program *
569     *************************************************/
570    
571     /* If there has been a resource error, give a suitable message.
572    
573     Argument: the return code
574     Returns: does not return
575     */
576    
577     static void
578     pcregrep_exit(int rc)
579     {
580     if (resource_error)
581     {
582     fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
583     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
584     fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
585     }
586    
587     exit(rc);
588     }
589    
590    
591    
592     /*************************************************
593 ph10 519 * Read one line of input *
594     *************************************************/
595    
596 ph10 535 /* Normally, input is read using fread() into a large buffer, so many lines may
597     be read at once. However, doing this for tty input means that no output appears
598 ph10 519 until a lot of input has been typed. Instead, tty input is handled line by
599     line. We cannot use fgets() for this, because it does not stop at a binary
600 ph10 535 zero, and therefore there is no way of telling how many characters it has read,
601 ph10 519 because there may be binary zeros embedded in the data.
602    
603     Arguments:
604     buffer the buffer to read into
605     length the maximum number of characters to read
606     f the file
607 ph10 535
608 ph10 519 Returns: the number of characters read, zero at end of file
609 ph10 535 */
610 ph10 519
611     static int
612     read_one_line(char *buffer, int length, FILE *f)
613     {
614     int c;
615     int yield = 0;
616     while ((c = fgetc(f)) != EOF)
617     {
618     buffer[yield++] = c;
619 ph10 535 if (c == '\n' || yield >= length) break;
620     }
621     return yield;
622 ph10 519 }
623    
624    
625    
626     /*************************************************
627 nigel 93 * Find end of line *
628     *************************************************/
629    
630     /* The length of the endline sequence that is found is set via lenptr. This may
631     be zero at the very end of the file if there is no line-ending sequence there.
632    
633     Arguments:
634     p current position in line
635     endptr end of available data
636     lenptr where to put the length of the eol sequence
637    
638     Returns: pointer to the last byte of the line
639     */
640    
641     static char *
642     end_of_line(char *p, char *endptr, int *lenptr)
643     {
644     switch(endlinetype)
645     {
646     default: /* Just in case */
647     case EL_LF:
648     while (p < endptr && *p != '\n') p++;
649     if (p < endptr)
650     {
651     *lenptr = 1;
652     return p + 1;
653     }
654     *lenptr = 0;
655     return endptr;
656    
657     case EL_CR:
658     while (p < endptr && *p != '\r') p++;
659     if (p < endptr)
660     {
661     *lenptr = 1;
662     return p + 1;
663     }
664     *lenptr = 0;
665     return endptr;
666    
667     case EL_CRLF:
668     for (;;)
669     {
670     while (p < endptr && *p != '\r') p++;
671     if (++p >= endptr)
672     {
673     *lenptr = 0;
674     return endptr;
675     }
676     if (*p == '\n')
677     {
678     *lenptr = 2;
679     return p + 1;
680     }
681     }
682     break;
683    
684 ph10 149 case EL_ANYCRLF:
685     while (p < endptr)
686     {
687     int extra = 0;
688     register int c = *((unsigned char *)p);
689    
690     if (utf8 && c >= 0xc0)
691     {
692     int gcii, gcss;
693     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
694     gcss = 6*extra;
695     c = (c & utf8_table3[extra]) << gcss;
696     for (gcii = 1; gcii <= extra; gcii++)
697     {
698     gcss -= 6;
699     c |= (p[gcii] & 0x3f) << gcss;
700     }
701     }
702    
703     p += 1 + extra;
704    
705     switch (c)
706     {
707     case 0x0a: /* LF */
708     *lenptr = 1;
709     return p;
710    
711     case 0x0d: /* CR */
712     if (p < endptr && *p == 0x0a)
713     {
714     *lenptr = 2;
715     p++;
716     }
717     else *lenptr = 1;
718     return p;
719 ph10 150
720 ph10 149 default:
721     break;
722     }
723     } /* End of loop for ANYCRLF case */
724 ph10 150
725 ph10 149 *lenptr = 0; /* Must have hit the end */
726     return endptr;
727    
728 nigel 93 case EL_ANY:
729     while (p < endptr)
730     {
731     int extra = 0;
732     register int c = *((unsigned char *)p);
733    
734     if (utf8 && c >= 0xc0)
735     {
736     int gcii, gcss;
737     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
738     gcss = 6*extra;
739     c = (c & utf8_table3[extra]) << gcss;
740     for (gcii = 1; gcii <= extra; gcii++)
741     {
742     gcss -= 6;
743     c |= (p[gcii] & 0x3f) << gcss;
744     }
745     }
746    
747     p += 1 + extra;
748    
749     switch (c)
750     {
751     case 0x0a: /* LF */
752     case 0x0b: /* VT */
753     case 0x0c: /* FF */
754     *lenptr = 1;
755     return p;
756    
757     case 0x0d: /* CR */
758     if (p < endptr && *p == 0x0a)
759     {
760     *lenptr = 2;
761     p++;
762     }
763     else *lenptr = 1;
764     return p;
765    
766     case 0x85: /* NEL */
767     *lenptr = utf8? 2 : 1;
768     return p;
769    
770     case 0x2028: /* LS */
771     case 0x2029: /* PS */
772     *lenptr = 3;
773     return p;
774    
775     default:
776     break;
777     }
778     } /* End of loop for ANY case */
779    
780     *lenptr = 0; /* Must have hit the end */
781     return endptr;
782     } /* End of overall switch */
783     }
784    
785    
786    
787     /*************************************************
788     * Find start of previous line *
789     *************************************************/
790    
791     /* This is called when looking back for before lines to print.
792    
793     Arguments:
794     p start of the subsequent line
795     startptr start of available data
796    
797     Returns: pointer to the start of the previous line
798     */
799    
800     static char *
801     previous_line(char *p, char *startptr)
802     {
803     switch(endlinetype)
804     {
805     default: /* Just in case */
806     case EL_LF:
807     p--;
808     while (p > startptr && p[-1] != '\n') p--;
809     return p;
810    
811     case EL_CR:
812     p--;
813     while (p > startptr && p[-1] != '\n') p--;
814     return p;
815    
816     case EL_CRLF:
817     for (;;)
818     {
819     p -= 2;
820     while (p > startptr && p[-1] != '\n') p--;
821     if (p <= startptr + 1 || p[-2] == '\r') return p;
822     }
823     return p; /* But control should never get here */
824    
825     case EL_ANY:
826 ph10 150 case EL_ANYCRLF:
827 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
828     if (utf8) while ((*p & 0xc0) == 0x80) p--;
829    
830     while (p > startptr)
831     {
832     register int c;
833     char *pp = p - 1;
834    
835     if (utf8)
836     {
837     int extra = 0;
838     while ((*pp & 0xc0) == 0x80) pp--;
839     c = *((unsigned char *)pp);
840     if (c >= 0xc0)
841     {
842     int gcii, gcss;
843     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
844     gcss = 6*extra;
845     c = (c & utf8_table3[extra]) << gcss;
846     for (gcii = 1; gcii <= extra; gcii++)
847     {
848     gcss -= 6;
849     c |= (pp[gcii] & 0x3f) << gcss;
850     }
851     }
852     }
853     else c = *((unsigned char *)pp);
854    
855 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
856 nigel 93 {
857     case 0x0a: /* LF */
858 ph10 149 case 0x0d: /* CR */
859     return p;
860 ph10 150
861 ph10 149 default:
862     break;
863 ph10 150 }
864 ph10 149
865     else switch (c)
866     {
867     case 0x0a: /* LF */
868 nigel 93 case 0x0b: /* VT */
869     case 0x0c: /* FF */
870     case 0x0d: /* CR */
871     case 0x85: /* NEL */
872     case 0x2028: /* LS */
873     case 0x2029: /* PS */
874     return p;
875    
876     default:
877     break;
878     }
879    
880     p = pp; /* Back one character */
881     } /* End of loop for ANY case */
882    
883     return startptr; /* Hit start of data */
884     } /* End of overall switch */
885     }
886    
887    
888    
889    
890    
891     /*************************************************
892 nigel 77 * Print the previous "after" lines *
893 nigel 49 *************************************************/
894    
895 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
896 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
897     that a binary zero does not terminate it.
898 nigel 77
899     Arguments:
900     lastmatchnumber the number of the last matching line, plus one
901     lastmatchrestart where we restarted after the last match
902     endptr end of available data
903     printname filename for printing
904    
905     Returns: nothing
906     */
907    
908     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
909     char *endptr, char *printname)
910     {
911     if (after_context > 0 && lastmatchnumber > 0)
912     {
913     int count = 0;
914     while (lastmatchrestart < endptr && count++ < after_context)
915     {
916 nigel 93 int ellength;
917 nigel 77 char *pp = lastmatchrestart;
918     if (printname != NULL) fprintf(stdout, "%s-", printname);
919     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
920 nigel 93 pp = end_of_line(pp, endptr, &ellength);
921 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
922 nigel 93 lastmatchrestart = pp;
923 nigel 77 }
924     hyphenpending = TRUE;
925     }
926     }
927    
928    
929    
930     /*************************************************
931 ph10 378 * Apply patterns to subject till one matches *
932     *************************************************/
933    
934 ph10 392 /* This function is called to run through all patterns, looking for a match. It
935     is used multiple times for the same subject when colouring is enabled, in order
936 ph10 378 to find all possible matches.
937    
938     Arguments:
939     matchptr the start of the subject
940     length the length of the subject to match
941     offsets the offets vector to fill in
942     mrc address of where to put the result of pcre_exec()
943 ph10 392
944     Returns: TRUE if there was a match
945 ph10 378 FALSE if there was no match
946     invert if there was a non-fatal error
947 ph10 392 */
948 ph10 378
949     static BOOL
950     match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
951     {
952     int i;
953 ph10 561 size_t slen = length;
954     const char *msg = "this text:\n\n";
955     if (slen > 200)
956     {
957     slen = 200;
958     msg = "text that starts:\n\n";
959     }
960 ph10 378 for (i = 0; i < pattern_count; i++)
961     {
962 ph10 530 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
963 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
964 ph10 378 if (*mrc >= 0) return TRUE;
965     if (*mrc == PCRE_ERROR_NOMATCH) continue;
966 ph10 561 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
967 ph10 378 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
968 ph10 561 fprintf(stderr, "%s", msg);
969     FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
970     fprintf(stderr, "\n\n");
971     if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
972     resource_error = TRUE;
973 ph10 378 if (error_count++ > 20)
974     {
975 ph10 561 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
976     pcregrep_exit(2);
977 ph10 378 }
978     return invert; /* No more matching; don't show the line again */
979     }
980    
981     return FALSE; /* No match, no errors */
982     }
983    
984    
985    
986     /*************************************************
987 nigel 77 * Grep an individual file *
988     *************************************************/
989    
990     /* This is called from grep_or_recurse() below. It uses a buffer that is three
991     times the value of MBUFTHIRD. The matching point is never allowed to stray into
992     the top third of the buffer, thus keeping more of the file available for
993     context printing or for multiline scanning. For large files, the pointer will
994     be in the middle third most of the time, so the bottom third is available for
995     "before" context printing.
996    
997     Arguments:
998 ph10 286 handle the fopened FILE stream for a normal file
999     the gzFile pointer when reading is via libz
1000     the BZFILE pointer when reading is via libbz2
1001     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1002 nigel 77 printname the file name if it is to be printed for each match
1003     or NULL if the file name is not to be printed
1004     it cannot be NULL if filenames[_nomatch]_only is set
1005    
1006     Returns: 0 if there was at least one match
1007     1 otherwise (no matches)
1008 ph10 286 2 if there is a read error on a .bz2 file
1009 nigel 77 */
1010    
1011 nigel 49 static int
1012 ph10 286 pcregrep(void *handle, int frtype, char *printname)
1013 nigel 49 {
1014     int rc = 1;
1015 nigel 77 int linenumber = 1;
1016     int lastmatchnumber = 0;
1017 nigel 49 int count = 0;
1018 ph10 280 int filepos = 0;
1019 ph10 378 int offsets[OFFSET_SIZE];
1020 nigel 77 char *lastmatchrestart = NULL;
1021     char buffer[3*MBUFTHIRD];
1022     char *ptr = buffer;
1023     char *endptr;
1024     size_t bufflength;
1025     BOOL endhyphenpending = FALSE;
1026 ph10 519 BOOL input_line_buffered = line_buffered;
1027 ph10 286 FILE *in = NULL; /* Ensure initialized */
1028 nigel 49
1029 ph10 286 #ifdef SUPPORT_LIBZ
1030     gzFile ingz = NULL;
1031     #endif
1032 nigel 77
1033 ph10 286 #ifdef SUPPORT_LIBBZ2
1034     BZFILE *inbz2 = NULL;
1035     #endif
1036    
1037    
1038     /* Do the first read into the start of the buffer and set up the pointer to end
1039     of what we have. In the case of libz, a non-zipped .gz file will be read as a
1040     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1041     fail. */
1042    
1043     #ifdef SUPPORT_LIBZ
1044     if (frtype == FR_LIBZ)
1045     {
1046     ingz = (gzFile)handle;
1047     bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1048     }
1049     else
1050     #endif
1051    
1052     #ifdef SUPPORT_LIBBZ2
1053     if (frtype == FR_LIBBZ2)
1054     {
1055     inbz2 = (BZFILE *)handle;
1056     bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1057     if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1058     } /* without the cast it is unsigned. */
1059     else
1060     #endif
1061    
1062     {
1063     in = (FILE *)handle;
1064 ph10 519 if (is_file_tty(in)) input_line_buffered = TRUE;
1065 ph10 535 bufflength = input_line_buffered?
1066 ph10 519 read_one_line(buffer, 3*MBUFTHIRD, in) :
1067     fread(buffer, 1, 3*MBUFTHIRD, in);
1068 ph10 286 }
1069 ph10 535
1070 nigel 77 endptr = buffer + bufflength;
1071    
1072     /* Loop while the current pointer is not at the end of the file. For large
1073     files, endptr will be at the end of the buffer when we are in the middle of the
1074     file, but ptr will never get there, because as soon as it gets over 2/3 of the
1075     way, the buffer is shifted left and re-filled. */
1076    
1077     while (ptr < endptr)
1078 nigel 49 {
1079 ph10 378 int endlinelength;
1080 nigel 87 int mrc = 0;
1081 ph10 378 BOOL match;
1082 ph10 286 char *matchptr = ptr;
1083 nigel 77 char *t = ptr;
1084     size_t length, linelength;
1085 nigel 49
1086 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
1087     of the subject string to pass to pcre_exec(). In multiline mode, it is the
1088     length remainder of the data in the buffer. Otherwise, it is the length of
1089 ph10 378 the next line, excluding the terminating newline. After matching, we always
1090     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1091     option is used for compiling, so that any match is constrained to be in the
1092     first line. */
1093 nigel 77
1094 nigel 93 t = end_of_line(t, endptr, &endlinelength);
1095     linelength = t - ptr - endlinelength;
1096 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
1097 nigel 77
1098 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
1099    
1100     #ifdef JFRIEDL_DEBUG
1101     if (jfriedl_XT || jfriedl_XR)
1102     {
1103     #include <sys/time.h>
1104     #include <time.h>
1105     struct timeval start_time, end_time;
1106     struct timezone dummy;
1107 ph10 392 int i;
1108 nigel 89
1109     if (jfriedl_XT)
1110     {
1111     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1112     const char *orig = ptr;
1113     ptr = malloc(newlen + 1);
1114     if (!ptr) {
1115     printf("out of memory");
1116 ph10 561 pcregrep_exit(2);
1117 nigel 89 }
1118     endptr = ptr;
1119     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1120     for (i = 0; i < jfriedl_XT; i++) {
1121     strncpy(endptr, orig, length);
1122     endptr += length;
1123     }
1124     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1125     length = newlen;
1126     }
1127    
1128     if (gettimeofday(&start_time, &dummy) != 0)
1129     perror("bad gettimeofday");
1130    
1131    
1132     for (i = 0; i < jfriedl_XR; i++)
1133 ph10 392 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1134 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1135 nigel 89
1136     if (gettimeofday(&end_time, &dummy) != 0)
1137     perror("bad gettimeofday");
1138    
1139     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1140     -
1141     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1142    
1143     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1144     return 0;
1145     }
1146     #endif
1147    
1148 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
1149 ph10 279 in order to find any further matches in the same line. */
1150 nigel 89
1151 ph10 286 ONLY_MATCHING_RESTART:
1152    
1153 ph10 392 /* Run through all the patterns until one matches or there is an error other
1154 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1155     finding subsequent matches when colouring matched lines. */
1156 ph10 392
1157 ph10 378 match = match_patterns(matchptr, length, offsets, &mrc);
1158 nigel 77
1159 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1160 nigel 77
1161 nigel 49 if (match != invert)
1162     {
1163 nigel 77 BOOL hyphenprinted = FALSE;
1164    
1165 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1166 nigel 77
1167 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1168    
1169     /* Just count if just counting is wanted. */
1170    
1171 nigel 49 if (count_only) count++;
1172    
1173 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1174     in the file. */
1175    
1176 ph10 420 else if (filenames == FN_MATCH_ONLY)
1177 nigel 49 {
1178 nigel 77 fprintf(stdout, "%s\n", printname);
1179 nigel 49 return 0;
1180     }
1181    
1182 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1183    
1184 nigel 77 else if (quiet) return 0;
1185 nigel 49
1186 ph10 565 /* The --only-matching option prints just the substring that matched, or a
1187     captured portion of it, as long as this string is not empty, and the
1188     --file-offsets and --line-offsets options output offsets for the matching
1189     substring (they both force --only-matching = 0). None of these options
1190 ph10 280 prints any context. Afterwards, adjust the start and length, and then jump
1191     back to look for further matches in the same line. If we are in invert
1192 ph10 565 mode, however, nothing is printed and we do not restart - this could still
1193     be useful because the return code is set. */
1194 nigel 87
1195 ph10 565 else if (only_matching >= 0)
1196 nigel 87 {
1197 ph10 279 if (!invert)
1198 ph10 286 {
1199 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1200     if (number) fprintf(stdout, "%d:", linenumber);
1201 ph10 280 if (line_offsets)
1202 ph10 565 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1203 ph10 286 offsets[1] - offsets[0]);
1204 ph10 280 else if (file_offsets)
1205 ph10 565 fprintf(stdout, "%d,%d\n",
1206     (int)(filepos + matchptr + offsets[0] - ptr),
1207 ph10 286 offsets[1] - offsets[0]);
1208 ph10 565 else if (only_matching < mrc)
1209 ph10 377 {
1210 ph10 565 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1211     if (plen > 0)
1212     {
1213     if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1214     FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1215     if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1216     fprintf(stdout, "\n");
1217     }
1218 ph10 392 }
1219 ph10 565 else if (printname != NULL || number) fprintf(stdout, "\n");
1220 ph10 279 matchptr += offsets[1];
1221     length -= offsets[1];
1222 ph10 286 match = FALSE;
1223 ph10 564 if (line_buffered) fflush(stdout);
1224     rc = 0; /* Had some success */
1225 ph10 286 goto ONLY_MATCHING_RESTART;
1226     }
1227 nigel 87 }
1228    
1229     /* This is the default case when none of the above options is set. We print
1230     the matching lines(s), possibly preceded and/or followed by other lines of
1231     context. */
1232    
1233 nigel 49 else
1234     {
1235 nigel 77 /* See if there is a requirement to print some "after" lines from a
1236     previous match. We never print any overlaps. */
1237    
1238     if (after_context > 0 && lastmatchnumber > 0)
1239     {
1240 nigel 93 int ellength;
1241 nigel 77 int linecount = 0;
1242     char *p = lastmatchrestart;
1243    
1244     while (p < ptr && linecount < after_context)
1245     {
1246 nigel 93 p = end_of_line(p, ptr, &ellength);
1247 nigel 77 linecount++;
1248     }
1249    
1250     /* It is important to advance lastmatchrestart during this printing so
1251 nigel 87 that it interacts correctly with any "before" printing below. Print
1252     each line's data using fwrite() in case there are binary zeroes. */
1253 nigel 77
1254     while (lastmatchrestart < p)
1255     {
1256     char *pp = lastmatchrestart;
1257     if (printname != NULL) fprintf(stdout, "%s-", printname);
1258     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1259 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1260 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1261 nigel 93 lastmatchrestart = pp;
1262 nigel 77 }
1263     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1264     }
1265    
1266     /* If there were non-contiguous lines printed above, insert hyphens. */
1267    
1268     if (hyphenpending)
1269     {
1270     fprintf(stdout, "--\n");
1271     hyphenpending = FALSE;
1272     hyphenprinted = TRUE;
1273     }
1274    
1275     /* See if there is a requirement to print some "before" lines for this
1276     match. Again, don't print overlaps. */
1277    
1278     if (before_context > 0)
1279     {
1280     int linecount = 0;
1281     char *p = ptr;
1282    
1283     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1284 nigel 87 linecount < before_context)
1285 nigel 77 {
1286 nigel 87 linecount++;
1287 nigel 93 p = previous_line(p, buffer);
1288 nigel 77 }
1289    
1290     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1291     fprintf(stdout, "--\n");
1292    
1293     while (p < ptr)
1294     {
1295 nigel 93 int ellength;
1296 nigel 77 char *pp = p;
1297     if (printname != NULL) fprintf(stdout, "%s-", printname);
1298     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1299 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1300 ph10 515 FWRITE(p, 1, pp - p, stdout);
1301 nigel 93 p = pp;
1302 nigel 77 }
1303     }
1304    
1305     /* Now print the matching line(s); ensure we set hyphenpending at the end
1306 nigel 85 of the file if any context lines are being output. */
1307 nigel 77
1308 nigel 85 if (after_context > 0 || before_context > 0)
1309     endhyphenpending = TRUE;
1310    
1311 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1312 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1313 nigel 77
1314     /* In multiline mode, we want to print to the end of the line in which
1315     the end of the matched string is found, so we adjust linelength and the
1316 ph10 222 line number appropriately, but only when there actually was a match
1317     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1318     the match will always be before the first newline sequence. */
1319 nigel 77
1320     if (multiline)
1321     {
1322 nigel 93 int ellength;
1323 ph10 222 char *endmatch = ptr;
1324     if (!invert)
1325 nigel 93 {
1326 ph10 222 endmatch += offsets[1];
1327     t = ptr;
1328     while (t < endmatch)
1329     {
1330     t = end_of_line(t, endptr, &ellength);
1331     if (t <= endmatch) linenumber++; else break;
1332     }
1333 nigel 93 }
1334     endmatch = end_of_line(endmatch, endptr, &ellength);
1335     linelength = endmatch - ptr - ellength;
1336 nigel 77 }
1337    
1338 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1339     zeroes are treated as just another data character. */
1340    
1341     /* This extra option, for Jeffrey Friedl's debugging requirements,
1342     replaces the matched string, or a specific captured string if it exists,
1343     with X. When this happens, colouring is ignored. */
1344    
1345     #ifdef JFRIEDL_DEBUG
1346     if (S_arg >= 0 && S_arg < mrc)
1347     {
1348     int first = S_arg * 2;
1349     int last = first + 1;
1350 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1351 nigel 87 fprintf(stdout, "X");
1352 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1353 nigel 87 }
1354     else
1355     #endif
1356    
1357 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1358 ph10 378 matches. */
1359 nigel 87
1360     if (do_colour)
1361     {
1362 ph10 392 int last_offset = 0;
1363 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1364 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1365 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1366 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1367 ph10 378 for (;;)
1368     {
1369 ph10 392 last_offset += offsets[1];
1370 ph10 378 matchptr += offsets[1];
1371     length -= offsets[1];
1372     if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1373 ph10 515 FWRITE(matchptr, 1, offsets[0], stdout);
1374 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1375 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1376 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1377     }
1378 ph10 535 FWRITE(ptr + last_offset, 1,
1379 ph10 515 (linelength + endlinelength) - last_offset, stdout);
1380 nigel 87 }
1381 ph10 392
1382 ph10 378 /* Not colouring; no need to search for further matches */
1383 ph10 392
1384 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1385 nigel 49 }
1386    
1387 ph10 519 /* End of doing what has to be done for a match. If --line-buffered was
1388     given, flush the output. */
1389 nigel 87
1390 ph10 519 if (line_buffered) fflush(stdout);
1391 nigel 77 rc = 0; /* Had some success */
1392    
1393     /* Remember where the last match happened for after_context. We remember
1394     where we are about to restart, and that line's number. */
1395    
1396 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1397 nigel 77 lastmatchnumber = linenumber + 1;
1398 nigel 49 }
1399 nigel 77
1400 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1401     anything to be printed), we have to move on to the end of the match before
1402     proceeding. */
1403    
1404     if (multiline && invert && match)
1405     {
1406     int ellength;
1407     char *endmatch = ptr + offsets[1];
1408     t = ptr;
1409     while (t < endmatch)
1410     {
1411     t = end_of_line(t, endptr, &ellength);
1412     if (t <= endmatch) linenumber++; else break;
1413     }
1414     endmatch = end_of_line(endmatch, endptr, &ellength);
1415     linelength = endmatch - ptr - ellength;
1416     }
1417    
1418 ph10 286 /* Advance to after the newline and increment the line number. The file
1419 ph10 280 offset to the current line is maintained in filepos. */
1420 nigel 77
1421 nigel 93 ptr += linelength + endlinelength;
1422 ph10 530 filepos += (int)(linelength + endlinelength);
1423 nigel 77 linenumber++;
1424 ph10 535
1425     /* If input is line buffered, and the buffer is not yet full, read another
1426 ph10 519 line and add it into the buffer. */
1427 ph10 535
1428 ph10 519 if (input_line_buffered && bufflength < sizeof(buffer))
1429     {
1430     int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1431     bufflength += add;
1432 ph10 535 endptr += add;
1433     }
1434 nigel 77
1435     /* If we haven't yet reached the end of the file (the buffer is full), and
1436     the current point is in the top 1/3 of the buffer, slide the buffer down by
1437     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1438     about to be lost, print them. */
1439    
1440     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1441     {
1442     if (after_context > 0 &&
1443     lastmatchnumber > 0 &&
1444     lastmatchrestart < buffer + MBUFTHIRD)
1445     {
1446     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1447     lastmatchnumber = 0;
1448     }
1449    
1450     /* Now do the shuffle */
1451    
1452     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1453     ptr -= MBUFTHIRD;
1454 ph10 286
1455     #ifdef SUPPORT_LIBZ
1456     if (frtype == FR_LIBZ)
1457     bufflength = 2*MBUFTHIRD +
1458     gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1459     else
1460     #endif
1461    
1462     #ifdef SUPPORT_LIBBZ2
1463     if (frtype == FR_LIBBZ2)
1464     bufflength = 2*MBUFTHIRD +
1465     BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1466     else
1467     #endif
1468    
1469 ph10 535 bufflength = 2*MBUFTHIRD +
1470     (input_line_buffered?
1471     read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1472 ph10 519 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1473 nigel 77 endptr = buffer + bufflength;
1474    
1475     /* Adjust any last match point */
1476    
1477     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1478     }
1479     } /* Loop through the whole file */
1480    
1481     /* End of file; print final "after" lines if wanted; do_after_lines sets
1482     hyphenpending if it prints something. */
1483    
1484 ph10 565 if (only_matching < 0 && !count_only)
1485 nigel 87 {
1486     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1487     hyphenpending |= endhyphenpending;
1488     }
1489 nigel 77
1490     /* Print the file name if we are looking for those without matches and there
1491     were none. If we found a match, we won't have got this far. */
1492    
1493 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1494 nigel 77 {
1495     fprintf(stdout, "%s\n", printname);
1496     return 0;
1497 nigel 49 }
1498    
1499 nigel 77 /* Print the match count if wanted */
1500    
1501 nigel 49 if (count_only)
1502     {
1503 ph10 420 if (count > 0 || !omit_zero_count)
1504 ph10 461 {
1505     if (printname != NULL && filenames != FN_NONE)
1506 ph10 420 fprintf(stdout, "%s:", printname);
1507     fprintf(stdout, "%d\n", count);
1508 ph10 461 }
1509 nigel 49 }
1510    
1511     return rc;
1512     }
1513    
1514    
1515    
1516     /*************************************************
1517 nigel 53 * Grep a file or recurse into a directory *
1518     *************************************************/
1519    
1520 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1521     recursing; if it's a file, grep it.
1522    
1523     Arguments:
1524     pathname the path to investigate
1525 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1526 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1527    
1528     Returns: 0 if there was at least one match
1529     1 if there were no matches
1530     2 there was some kind of error
1531    
1532     However, file opening failures are suppressed if "silent" is set.
1533     */
1534    
1535 nigel 53 static int
1536 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1537 nigel 53 {
1538     int rc = 1;
1539     int sep;
1540 ph10 286 int frtype;
1541     int pathlen;
1542     void *handle;
1543     FILE *in = NULL; /* Ensure initialized */
1544 nigel 53
1545 ph10 286 #ifdef SUPPORT_LIBZ
1546     gzFile ingz = NULL;
1547     #endif
1548    
1549     #ifdef SUPPORT_LIBBZ2
1550     BZFILE *inbz2 = NULL;
1551     #endif
1552    
1553 nigel 77 /* If the file name is "-" we scan stdin */
1554 nigel 53
1555 nigel 77 if (strcmp(pathname, "-") == 0)
1556 nigel 53 {
1557 ph10 286 return pcregrep(stdin, FR_PLAIN,
1558 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1559 nigel 77 stdin_name : NULL);
1560     }
1561    
1562 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1563 ph10 325 each file and directory within it, subject to any include or exclude patterns
1564     that were set. The scanning code is localized so it can be made
1565     system-specific. */
1566 nigel 87
1567     if ((sep = isdirectory(pathname)) != 0)
1568 nigel 77 {
1569 nigel 87 if (dee_action == dee_SKIP) return 1;
1570     if (dee_action == dee_RECURSE)
1571 nigel 53 {
1572 nigel 87 char buffer[1024];
1573     char *nextfile;
1574     directory_type *dir = opendirectory(pathname);
1575 nigel 53
1576 nigel 87 if (dir == NULL)
1577     {
1578     if (!silent)
1579     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1580     strerror(errno));
1581     return 2;
1582     }
1583 nigel 77
1584 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1585     {
1586 ph10 324 int frc, nflen;
1587 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1588 ph10 530 nflen = (int)(strlen(nextfile));
1589 ph10 345
1590 ph10 325 if (isdirectory(buffer))
1591     {
1592     if (exclude_dir_compiled != NULL &&
1593     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1594     continue;
1595 ph10 345
1596 ph10 325 if (include_dir_compiled != NULL &&
1597     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1598     continue;
1599     }
1600 ph10 345 else
1601     {
1602 ph10 324 if (exclude_compiled != NULL &&
1603     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1604     continue;
1605 ph10 345
1606 ph10 324 if (include_compiled != NULL &&
1607     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1608     continue;
1609 ph10 345 }
1610 nigel 77
1611 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1612     if (frc > 1) rc = frc;
1613     else if (frc == 0 && rc == 1) rc = 0;
1614     }
1615    
1616     closedirectory(dir);
1617     return rc;
1618 nigel 53 }
1619     }
1620    
1621 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1622     been requested. */
1623 nigel 53
1624 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1625    
1626     /* Control reaches here if we have a regular file, or if we have a directory
1627     and recursion or skipping was not requested, or if we have anything else and
1628     skipping was not requested. The scan proceeds. If this is the first and only
1629     argument at top level, we don't show the file name, unless we are only showing
1630     the file name, or the filename was forced (-H). */
1631    
1632 ph10 530 pathlen = (int)(strlen(pathname));
1633 ph10 286
1634     /* Open using zlib if it is supported and the file name ends with .gz. */
1635    
1636     #ifdef SUPPORT_LIBZ
1637     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1638 nigel 53 {
1639 ph10 286 ingz = gzopen(pathname, "rb");
1640     if (ingz == NULL)
1641     {
1642     if (!silent)
1643     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1644     strerror(errno));
1645     return 2;
1646     }
1647     handle = (void *)ingz;
1648     frtype = FR_LIBZ;
1649     }
1650     else
1651     #endif
1652    
1653     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1654    
1655     #ifdef SUPPORT_LIBBZ2
1656     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1657     {
1658     inbz2 = BZ2_bzopen(pathname, "rb");
1659     handle = (void *)inbz2;
1660     frtype = FR_LIBBZ2;
1661     }
1662     else
1663     #endif
1664    
1665     /* Otherwise use plain fopen(). The label is so that we can come back here if
1666     an attempt to read a .bz2 file indicates that it really is a plain file. */
1667    
1668     #ifdef SUPPORT_LIBBZ2
1669     PLAIN_FILE:
1670     #endif
1671     {
1672 ph10 419 in = fopen(pathname, "rb");
1673 ph10 286 handle = (void *)in;
1674     frtype = FR_PLAIN;
1675     }
1676    
1677     /* All the opening methods return errno when they fail. */
1678    
1679     if (handle == NULL)
1680     {
1681 nigel 77 if (!silent)
1682     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1683     strerror(errno));
1684 nigel 53 return 2;
1685     }
1686    
1687 ph10 286 /* Now grep the file */
1688    
1689     rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1690 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1691 nigel 77
1692 ph10 286 /* Close in an appropriate manner. */
1693    
1694     #ifdef SUPPORT_LIBZ
1695     if (frtype == FR_LIBZ)
1696     gzclose(ingz);
1697     else
1698     #endif
1699    
1700     /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1701     read failed. If the error indicates that the file isn't in fact bzipped, try
1702     again as a normal file. */
1703    
1704     #ifdef SUPPORT_LIBBZ2
1705     if (frtype == FR_LIBBZ2)
1706     {
1707     if (rc == 2)
1708     {
1709     int errnum;
1710     const char *err = BZ2_bzerror(inbz2, &errnum);
1711     if (errnum == BZ_DATA_ERROR_MAGIC)
1712     {
1713     BZ2_bzclose(inbz2);
1714     goto PLAIN_FILE;
1715     }
1716     else if (!silent)
1717     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1718     pathname, err);
1719     }
1720     BZ2_bzclose(inbz2);
1721     }
1722     else
1723     #endif
1724    
1725     /* Normal file close */
1726    
1727 nigel 53 fclose(in);
1728 ph10 286
1729     /* Pass back the yield from pcregrep(). */
1730    
1731 nigel 53 return rc;
1732     }
1733    
1734    
1735    
1736    
1737     /*************************************************
1738 nigel 49 * Usage function *
1739     *************************************************/
1740    
1741     static int
1742     usage(int rc)
1743     {
1744 nigel 87 option_item *op;
1745     fprintf(stderr, "Usage: pcregrep [-");
1746     for (op = optionlist; op->one_char != 0; op++)
1747     {
1748     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1749     }
1750     fprintf(stderr, "] [long options] [pattern] [files]\n");
1751 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1752     "options.\n");
1753 nigel 49 return rc;
1754     }
1755    
1756    
1757    
1758    
1759     /*************************************************
1760 nigel 53 * Help function *
1761     *************************************************/
1762    
1763     static void
1764     help(void)
1765     {
1766     option_item *op;
1767    
1768 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1769 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1770 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1771 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1772    
1773     #ifdef SUPPORT_LIBZ
1774     printf("Files whose names end in .gz are read using zlib.\n");
1775     #endif
1776    
1777     #ifdef SUPPORT_LIBBZ2
1778     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1779     #endif
1780    
1781     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1782     printf("Other files and the standard input are read as plain files.\n\n");
1783     #else
1784     printf("All files are read as plain files, without any interpretation.\n\n");
1785     #endif
1786    
1787 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1788     printf("Options:\n");
1789    
1790     for (op = optionlist; op->one_char != 0; op++)
1791     {
1792     int n;
1793     char s[4];
1794 ph10 571
1795     /* Two options were accidentally implemented and documented with underscores
1796     instead of hyphens in their names, something that was not noticed for quite a
1797     few releases. When fixing this, I left the underscored versions in the list
1798     in case people were using them. However, we don't want to display them in the
1799     help data. There are no other options that contain underscores, and we do not
1800     expect ever to implement such options. Therefore, just omit any option that
1801     contains an underscore. */
1802    
1803     if (strchr(op->long_name, '_') != NULL) continue;
1804    
1805 nigel 53 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1806 ph10 571 n = 31 - printf(" %s --%s", s, op->long_name);
1807 nigel 53 if (n < 1) n = 1;
1808 ph10 571 printf("%.*s%s\n", n, " ", op->help_text);
1809 nigel 53 }
1810    
1811 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1812     printf("trailing white space is removed and blank lines are ignored.\n");
1813     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1814 nigel 53
1815 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1816 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1817     }
1818    
1819    
1820    
1821    
1822     /*************************************************
1823 nigel 77 * Handle a single-letter, no data option *
1824 nigel 53 *************************************************/
1825    
1826     static int
1827     handle_option(int letter, int options)
1828     {
1829     switch(letter)
1830     {
1831 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1832 ph10 561 case N_HELP: help(); pcregrep_exit(0);
1833 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1834 ph10 535 case N_LBUFFER: line_buffered = TRUE; break;
1835 nigel 53 case 'c': count_only = TRUE; break;
1836 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1837     case 'H': filenames = FN_FORCE; break;
1838     case 'h': filenames = FN_NONE; break;
1839 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1840 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1841 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
1842 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1843 nigel 53 case 'n': number = TRUE; break;
1844 ph10 565 case 'o': only_matching = 0; break;
1845 nigel 77 case 'q': quiet = TRUE; break;
1846 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1847 nigel 53 case 's': silent = TRUE; break;
1848 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1849 nigel 53 case 'v': invert = TRUE; break;
1850 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1851     case 'x': process_options |= PO_LINE_MATCH; break;
1852 nigel 53
1853     case 'V':
1854 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1855 ph10 561 pcregrep_exit(0);
1856 nigel 53 break;
1857    
1858     default:
1859     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1860 ph10 561 pcregrep_exit(usage(2));
1861 nigel 53 }
1862    
1863     return options;
1864     }
1865    
1866    
1867    
1868    
1869     /*************************************************
1870 nigel 87 * Construct printed ordinal *
1871     *************************************************/
1872    
1873     /* This turns a number into "1st", "3rd", etc. */
1874    
1875     static char *
1876     ordin(int n)
1877     {
1878     static char buffer[8];
1879     char *p = buffer;
1880     sprintf(p, "%d", n);
1881     while (*p != 0) p++;
1882     switch (n%10)
1883     {
1884     case 1: strcpy(p, "st"); break;
1885     case 2: strcpy(p, "nd"); break;
1886     case 3: strcpy(p, "rd"); break;
1887     default: strcpy(p, "th"); break;
1888     }
1889     return buffer;
1890     }
1891    
1892    
1893    
1894     /*************************************************
1895     * Compile a single pattern *
1896     *************************************************/
1897    
1898     /* When the -F option has been used, this is called for each substring.
1899     Otherwise it's called for each supplied pattern.
1900    
1901     Arguments:
1902     pattern the pattern string
1903     options the PCRE options
1904     filename the file name, or NULL for a command-line pattern
1905     count 0 if this is the only command line pattern, or
1906     number of the command line pattern, or
1907     linenumber for a pattern from a file
1908    
1909     Returns: TRUE on success, FALSE after an error
1910     */
1911    
1912     static BOOL
1913     compile_single_pattern(char *pattern, int options, char *filename, int count)
1914     {
1915     char buffer[MBUFTHIRD + 16];
1916     const char *error;
1917     int errptr;
1918    
1919     if (pattern_count >= MAX_PATTERN_COUNT)
1920     {
1921     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1922     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1923     return FALSE;
1924     }
1925    
1926     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1927     suffix[process_options]);
1928     pattern_list[pattern_count] =
1929     pcre_compile(buffer, options, &error, &errptr, pcretables);
1930 ph10 142 if (pattern_list[pattern_count] != NULL)
1931 ph10 141 {
1932 ph10 142 pattern_count++;
1933 ph10 141 return TRUE;
1934 ph10 142 }
1935 nigel 87
1936     /* Handle compile errors */
1937    
1938     errptr -= (int)strlen(prefix[process_options]);
1939     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1940    
1941     if (filename == NULL)
1942     {
1943     if (count == 0)
1944     fprintf(stderr, "pcregrep: Error in command-line regex "
1945     "at offset %d: %s\n", errptr, error);
1946     else
1947     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1948     "at offset %d: %s\n", ordin(count), errptr, error);
1949     }
1950     else
1951     {
1952     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1953     "at offset %d: %s\n", count, filename, errptr, error);
1954     }
1955    
1956     return FALSE;
1957     }
1958    
1959    
1960    
1961     /*************************************************
1962     * Compile one supplied pattern *
1963     *************************************************/
1964    
1965     /* When the -F option has been used, each string may be a list of strings,
1966 nigel 91 separated by line breaks. They will be matched literally.
1967 nigel 87
1968     Arguments:
1969     pattern the pattern string
1970     options the PCRE options
1971     filename the file name, or NULL for a command-line pattern
1972     count 0 if this is the only command line pattern, or
1973     number of the command line pattern, or
1974     linenumber for a pattern from a file
1975    
1976     Returns: TRUE on success, FALSE after an error
1977     */
1978    
1979     static BOOL
1980     compile_pattern(char *pattern, int options, char *filename, int count)
1981     {
1982     if ((process_options & PO_FIXED_STRINGS) != 0)
1983     {
1984 nigel 93 char *eop = pattern + strlen(pattern);
1985 nigel 87 char buffer[MBUFTHIRD];
1986     for(;;)
1987     {
1988 nigel 93 int ellength;
1989     char *p = end_of_line(pattern, eop, &ellength);
1990     if (ellength == 0)
1991 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1992 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1993 nigel 93 pattern = p;
1994 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1995     return FALSE;
1996     }
1997     }
1998     else return compile_single_pattern(pattern, options, filename, count);
1999     }
2000    
2001    
2002    
2003     /*************************************************
2004 nigel 49 * Main program *
2005     *************************************************/
2006    
2007 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2008    
2009 nigel 49 int
2010     main(int argc, char **argv)
2011     {
2012 nigel 53 int i, j;
2013 nigel 49 int rc = 1;
2014 nigel 87 int pcre_options = 0;
2015     int cmd_pattern_count = 0;
2016 ph10 141 int hint_count = 0;
2017 nigel 49 int errptr;
2018 nigel 87 BOOL only_one_at_top;
2019     char *patterns[MAX_PATTERN_COUNT];
2020     const char *locale_from = "--locale";
2021 nigel 49 const char *error;
2022    
2023 nigel 93 /* Set the default line ending value from the default in the PCRE library;
2024     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2025 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
2026 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
2027 ph10 391 rather than escapes such as as '\r'. */
2028 nigel 91
2029     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2030     switch(i)
2031     {
2032 ph10 391 default: newline = (char *)"lf"; break;
2033     case 13: newline = (char *)"cr"; break;
2034     case (13 << 8) | 10: newline = (char *)"crlf"; break;
2035     case -1: newline = (char *)"any"; break;
2036     case -2: newline = (char *)"anycrlf"; break;
2037 nigel 91 }
2038    
2039 nigel 49 /* Process the options */
2040    
2041     for (i = 1; i < argc; i++)
2042     {
2043 nigel 77 option_item *op = NULL;
2044     char *option_data = (char *)""; /* default to keep compiler happy */
2045     BOOL longop;
2046     BOOL longopwasequals = FALSE;
2047    
2048 nigel 49 if (argv[i][0] != '-') break;
2049 nigel 53
2050 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2051 nigel 87 but only if we have previously had -e or -f to define the patterns. */
2052 nigel 63
2053 nigel 77 if (argv[i][1] == 0)
2054     {
2055 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
2056 ph10 561 else pcregrep_exit(usage(2));
2057 nigel 77 }
2058 nigel 63
2059 nigel 77 /* Handle a long name option, or -- to terminate the options */
2060 nigel 53
2061     if (argv[i][1] == '-')
2062 nigel 49 {
2063 nigel 77 char *arg = argv[i] + 2;
2064     char *argequals = strchr(arg, '=');
2065 nigel 53
2066 nigel 77 if (*arg == 0) /* -- terminates options */
2067 nigel 49 {
2068 nigel 77 i++;
2069     break; /* out of the options-handling loop */
2070 nigel 53 }
2071 nigel 49
2072 nigel 77 longop = TRUE;
2073    
2074     /* Some long options have data that follows after =, for example file=name.
2075     Some options have variations in the long name spelling: specifically, we
2076     allow "regexp" because GNU grep allows it, though I personally go along
2077 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2078 ph10 422 These options are entered in the table as "regex(p)". Options can be in
2079     both these categories. */
2080 nigel 77
2081 nigel 53 for (op = optionlist; op->one_char != 0; op++)
2082     {
2083 nigel 77 char *opbra = strchr(op->long_name, '(');
2084     char *equals = strchr(op->long_name, '=');
2085 ph10 461
2086 ph10 422 /* Handle options with only one spelling of the name */
2087 ph10 461
2088 ph10 422 if (opbra == NULL) /* Does not contain '(' */
2089 nigel 53 {
2090 nigel 77 if (equals == NULL) /* Not thing=data case */
2091     {
2092     if (strcmp(arg, op->long_name) == 0) break;
2093     }
2094     else /* Special case xxx=data */
2095     {
2096 ph10 530 int oplen = (int)(equals - op->long_name);
2097 ph10 535 int arglen = (argequals == NULL)?
2098 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2099 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2100     {
2101     option_data = arg + arglen;
2102     if (*option_data == '=')
2103     {
2104     option_data++;
2105     longopwasequals = TRUE;
2106     }
2107     break;
2108     }
2109     }
2110 nigel 53 }
2111 ph10 461
2112 ph10 422 /* Handle options with an alternate spelling of the name */
2113 ph10 461
2114     else
2115 nigel 77 {
2116     char buff1[24];
2117     char buff2[24];
2118 ph10 461
2119 ph10 530 int baselen = (int)(opbra - op->long_name);
2120     int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2121 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
2122 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2123 ph10 461
2124 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
2125 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2126 ph10 461
2127     if (strncmp(arg, buff1, arglen) == 0 ||
2128 ph10 422 strncmp(arg, buff2, arglen) == 0)
2129     {
2130     if (equals != NULL && argequals != NULL)
2131     {
2132 ph10 461 option_data = argequals;
2133 ph10 422 if (*option_data == '=')
2134     {
2135 ph10 461 option_data++;
2136 ph10 422 longopwasequals = TRUE;
2137 ph10 461 }
2138     }
2139 nigel 77 break;
2140 ph10 461 }
2141 nigel 77 }
2142 nigel 53 }
2143 nigel 77
2144 nigel 53 if (op->one_char == 0)
2145     {
2146     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2147 ph10 561 pcregrep_exit(usage(2));
2148 nigel 53 }
2149     }
2150 nigel 49
2151 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2152     are not in the right form for putting in the option table because they use
2153     only one hyphen, yet are more than one character long. By putting them
2154     separately here, they will not get displayed as part of the help() output,
2155     but I don't think Jeffrey will care about that. */
2156    
2157     #ifdef JFRIEDL_DEBUG
2158     else if (strcmp(argv[i], "-pre") == 0) {
2159     jfriedl_prefix = argv[++i];
2160     continue;
2161     } else if (strcmp(argv[i], "-post") == 0) {
2162     jfriedl_postfix = argv[++i];
2163     continue;
2164     } else if (strcmp(argv[i], "-XT") == 0) {
2165     sscanf(argv[++i], "%d", &jfriedl_XT);
2166     continue;
2167     } else if (strcmp(argv[i], "-XR") == 0) {
2168     sscanf(argv[++i], "%d", &jfriedl_XR);
2169     continue;
2170     }
2171     #endif
2172    
2173    
2174 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2175     continue till we hit the last one or one that needs data. */
2176 nigel 53
2177     else
2178     {
2179     char *s = argv[i] + 1;
2180 nigel 77 longop = FALSE;
2181 nigel 53 while (*s != 0)
2182     {
2183 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2184 ph10 565 {
2185     if (*s == op->one_char) break;
2186     }
2187 nigel 77 if (op->one_char == 0)
2188 nigel 53 {
2189 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2190     *s, argv[i]);
2191 ph10 561 pcregrep_exit(usage(2));
2192 nigel 77 }
2193 ph10 565
2194     /* Check for a single-character option that has data: OP_OP_NUMBER
2195     is used for one that either has a numerical number or defaults, i.e. the
2196     data is optional. If a digit follows, there is data; if not, carry on
2197     with other single-character options in the same string. */
2198    
2199     option_data = s+1;
2200     if (op->type == OP_OP_NUMBER)
2201     {
2202     if (isdigit((unsigned char)s[1])) break;
2203 nigel 53 }
2204 ph10 565 else /* Check for end or a dataless option */
2205     {
2206     if (op->type != OP_NODATA || s[1] == 0) break;
2207     }
2208    
2209     /* Handle a single-character option with no data, then loop for the
2210     next character in the string. */
2211    
2212 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2213 nigel 49 }
2214     }
2215 nigel 77
2216 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2217     is NO_DATA, it means that there is no data, and the option might set
2218     something in the PCRE options. */
2219 nigel 77
2220     if (op->type == OP_NODATA)
2221     {
2222 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2223     continue;
2224     }
2225    
2226     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2227     either has a value or defaults to something. It cannot have data in a
2228 ph10 565 separate item. At the moment, the only such options are "colo(u)r",
2229     "only-matching", and Jeffrey Friedl's special -S debugging option. */
2230 nigel 87
2231     if (*option_data == 0 &&
2232     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2233     {
2234     switch (op->one_char)
2235 nigel 77 {
2236 nigel 87 case N_COLOUR:
2237     colour_option = (char *)"auto";
2238     break;
2239 ph10 565
2240     case 'o':
2241     only_matching = 0;
2242     break;
2243    
2244 nigel 87 #ifdef JFRIEDL_DEBUG
2245     case 'S':
2246     S_arg = 0;
2247     break;
2248     #endif
2249 nigel 77 }
2250 nigel 87 continue;
2251     }
2252 nigel 77
2253 nigel 87 /* Otherwise, find the data string for the option. */
2254    
2255     if (*option_data == 0)
2256     {
2257     if (i >= argc - 1 || longopwasequals)
2258 nigel 77 {
2259 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2260 ph10 561 pcregrep_exit(usage(2));
2261 nigel 87 }
2262     option_data = argv[++i];
2263     }
2264    
2265     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2266     multiple times to create a list of patterns. */
2267    
2268     if (op->type == OP_PATLIST)
2269     {
2270     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2271     {
2272     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2273     MAX_PATTERN_COUNT);
2274     return 2;
2275     }
2276     patterns[cmd_pattern_count++] = option_data;
2277     }
2278    
2279     /* Otherwise, deal with single string or numeric data values. */
2280    
2281     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2282     {
2283     *((char **)op->dataptr) = option_data;
2284     }
2285 ph10 558
2286     /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2287     only for unpicking arguments, so just keep it simple. */
2288    
2289 nigel 87 else
2290     {
2291 ph10 561 unsigned long int n = 0;
2292 ph10 558 char *endptr = option_data;
2293     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2294     while (isdigit((unsigned char)(*endptr)))
2295     n = n * 10 + (int)(*endptr++ - '0');
2296 nigel 87 if (*endptr != 0)
2297     {
2298     if (longop)
2299 nigel 77 {
2300 nigel 87 char *equals = strchr(op->long_name, '=');
2301     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2302 ph10 530 (int)(equals - op->long_name);
2303 nigel 87 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2304     option_data, nlen, op->long_name);
2305 nigel 77 }
2306 nigel 87 else
2307     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2308     option_data, op->one_char);
2309 ph10 561 pcregrep_exit(usage(2));
2310 nigel 77 }
2311 nigel 87 *((int *)op->dataptr) = n;
2312 nigel 77 }
2313 nigel 49 }
2314    
2315 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2316     for -A and -B. */
2317    
2318     if (both_context > 0)
2319     {
2320     if (after_context == 0) after_context = both_context;
2321     if (before_context == 0) before_context = both_context;
2322     }
2323 ph10 286
2324     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2325 ph10 565 However, the latter two set only_matching. */
2326 nigel 77
2327 ph10 565 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2328 ph10 286 (file_offsets && line_offsets))
2329 ph10 280 {
2330     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2331     "and/or --line-offsets\n");
2332 ph10 561 pcregrep_exit(usage(2));
2333 ph10 280 }
2334    
2335 ph10 565 if (file_offsets || line_offsets) only_matching = 0;
2336 ph10 286
2337 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2338     LC_ALL environment variable is set, and if so, use it. */
2339 nigel 49
2340 nigel 87 if (locale == NULL)
2341 nigel 53 {
2342 nigel 87 locale = getenv("LC_ALL");
2343     locale_from = "LCC_ALL";
2344 nigel 53 }
2345 nigel 49
2346 nigel 87 if (locale == NULL)
2347     {
2348     locale = getenv("LC_CTYPE");
2349     locale_from = "LC_CTYPE";
2350     }
2351 nigel 49
2352 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2353     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2354    
2355     if (locale != NULL)
2356 nigel 49 {
2357 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2358 nigel 53 {
2359 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2360     locale, locale_from);
2361 nigel 53 return 2;
2362     }
2363 nigel 87 pcretables = pcre_maketables();
2364     }
2365 nigel 77
2366 nigel 87 /* Sort out colouring */
2367    
2368     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2369     {
2370     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2371     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2372     else
2373 nigel 53 {
2374 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2375     colour_option);
2376     return 2;
2377 nigel 77 }
2378 nigel 87 if (do_colour)
2379 nigel 77 {
2380 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2381     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2382     if (cs != NULL) colour_string = cs;
2383 nigel 77 }
2384 nigel 87 }
2385 ph10 535
2386 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2387    
2388     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2389     {
2390     pcre_options |= PCRE_NEWLINE_CR;
2391 nigel 93 endlinetype = EL_CR;
2392 nigel 91 }
2393     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2394     {
2395     pcre_options |= PCRE_NEWLINE_LF;
2396 nigel 93 endlinetype = EL_LF;
2397 nigel 91 }
2398     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2399     {
2400     pcre_options |= PCRE_NEWLINE_CRLF;
2401 nigel 93 endlinetype = EL_CRLF;
2402 nigel 91 }
2403 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2404     {
2405     pcre_options |= PCRE_NEWLINE_ANY;
2406     endlinetype = EL_ANY;
2407     }
2408 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2409     {
2410     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2411     endlinetype = EL_ANYCRLF;
2412     }
2413 nigel 91 else
2414     {
2415     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2416     return 2;
2417     }
2418    
2419 nigel 87 /* Interpret the text values for -d and -D */
2420    
2421     if (dee_option != NULL)
2422     {
2423     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2424     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2425     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2426     else
2427 nigel 77 {
2428 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2429     return 2;
2430 nigel 53 }
2431 nigel 49 }
2432    
2433 nigel 87 if (DEE_option != NULL)
2434     {
2435     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2436     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2437     else
2438     {
2439     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2440     return 2;
2441     }
2442     }
2443 nigel 49
2444 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2445 nigel 87
2446     #ifdef JFRIEDL_DEBUG
2447     if (S_arg > 9)
2448 nigel 49 {
2449 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2450     return 2;
2451     }
2452 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2453     {
2454     if (jfriedl_XT == 0) jfriedl_XT = 1;
2455     if (jfriedl_XR == 0) jfriedl_XR = 1;
2456     }
2457 nigel 87 #endif
2458 nigel 77
2459 nigel 87 /* Get memory to store the pattern and hints lists. */
2460    
2461     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2462     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2463    
2464     if (pattern_list == NULL || hints_list == NULL)
2465     {
2466     fprintf(stderr, "pcregrep: malloc failed\n");
2467 ph10 123 goto EXIT2;
2468 nigel 87 }
2469    
2470     /* If no patterns were provided by -e, and there is no file provided by -f,
2471     the first argument is the one and only pattern, and it must exist. */
2472    
2473     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2474     {
2475 nigel 63 if (i >= argc) return usage(2);
2476 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2477     }
2478 nigel 77
2479 nigel 87 /* Compile the patterns that were provided on the command line, either by
2480     multiple uses of -e or as a single unkeyed pattern. */
2481    
2482     for (j = 0; j < cmd_pattern_count; j++)
2483     {
2484     if (!compile_pattern(patterns[j], pcre_options, NULL,
2485     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2486 ph10 123 goto EXIT2;
2487 nigel 87 }
2488    
2489     /* Compile the regular expressions that are provided in a file. */
2490    
2491     if (pattern_filename != NULL)
2492     {
2493     int linenumber = 0;
2494     FILE *f;
2495     char *filename;
2496     char buffer[MBUFTHIRD];
2497    
2498     if (strcmp(pattern_filename, "-") == 0)
2499 nigel 77 {
2500 nigel 87 f = stdin;
2501     filename = stdin_name;
2502 nigel 77 }
2503 nigel 87 else
2504 nigel 77 {
2505 nigel 87 f = fopen(pattern_filename, "r");
2506     if (f == NULL)
2507     {
2508     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2509     strerror(errno));
2510 ph10 123 goto EXIT2;
2511 nigel 87 }
2512     filename = pattern_filename;
2513 nigel 77 }
2514    
2515 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2516 nigel 53 {
2517 nigel 87 char *s = buffer + (int)strlen(buffer);
2518     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2519     *s = 0;
2520     linenumber++;
2521     if (buffer[0] == 0) continue; /* Skip blank lines */
2522     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2523 ph10 121 goto EXIT2;
2524 nigel 53 }
2525 nigel 87
2526     if (f != stdin) fclose(f);
2527 nigel 49 }
2528    
2529 nigel 77 /* Study the regular expressions, as we will be running them many times */
2530 nigel 53
2531     for (j = 0; j < pattern_count; j++)
2532     {
2533     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2534     if (error != NULL)
2535     {
2536     char s[16];
2537     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2538     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2539 ph10 121 goto EXIT2;
2540 nigel 53 }
2541 ph10 142 hint_count++;
2542 nigel 53 }
2543 ph10 561
2544     /* If --match-limit or --recursion-limit was set, put the value(s) into the
2545     pcre_extra block for each pattern. */
2546 nigel 53
2547 ph10 561 if (match_limit > 0 || match_limit_recursion > 0)
2548     {
2549     for (j = 0; j < pattern_count; j++)
2550     {
2551     if (hints_list[j] == NULL)
2552     {
2553     hints_list[j] = malloc(sizeof(pcre_extra));
2554     if (hints_list[j] == NULL)
2555     {
2556     fprintf(stderr, "pcregrep: malloc failed\n");
2557     pcregrep_exit(2);
2558     }
2559     }
2560     if (match_limit > 0)
2561     {
2562     hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2563     hints_list[j]->match_limit = match_limit;
2564     }
2565     if (match_limit_recursion > 0)
2566     {
2567     hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2568     hints_list[j]->match_limit_recursion = match_limit_recursion;
2569     }
2570     }
2571     }
2572    
2573 nigel 77 /* If there are include or exclude patterns, compile them. */
2574    
2575     if (exclude_pattern != NULL)
2576     {
2577 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2578     pcretables);
2579 nigel 77 if (exclude_compiled == NULL)
2580     {
2581     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2582     errptr, error);
2583 ph10 121 goto EXIT2;
2584 nigel 77 }
2585     }
2586    
2587     if (include_pattern != NULL)
2588     {
2589 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2590     pcretables);
2591 nigel 77 if (include_compiled == NULL)
2592     {
2593     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2594     errptr, error);
2595 ph10 121 goto EXIT2;
2596 nigel 77 }
2597     }
2598    
2599 ph10 325 if (exclude_dir_pattern != NULL)
2600     {
2601     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2602     pcretables);
2603     if (exclude_dir_compiled == NULL)
2604     {
2605     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2606     errptr, error);
2607     goto EXIT2;
2608     }
2609     }
2610    
2611     if (include_dir_pattern != NULL)
2612     {
2613     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2614     pcretables);
2615     if (include_dir_compiled == NULL)
2616     {
2617     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2618     errptr, error);
2619     goto EXIT2;
2620     }
2621     }
2622    
2623 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2624 nigel 49
2625 nigel 87 if (i >= argc)
2626 ph10 121 {
2627 ph10 286 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2628 ph10 121 goto EXIT;
2629 ph10 123 }
2630 nigel 49
2631 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2632     Pass in the fact that there is only one argument at top level - this suppresses
2633 nigel 87 the file name if the argument is not a directory and filenames are not
2634     otherwise forced. */
2635 nigel 49
2636 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2637 nigel 49
2638     for (; i < argc; i++)
2639     {
2640 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2641     only_one_at_top);
2642 nigel 77 if (frc > 1) rc = frc;
2643     else if (frc == 0 && rc == 1) rc = 0;
2644 nigel 49 }
2645    
2646 ph10 121 EXIT:
2647     if (pattern_list != NULL)
2648     {
2649 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2650 ph10 121 free(pattern_list);
2651 ph10 123 }
2652 ph10 121 if (hints_list != NULL)
2653     {
2654 ph10 561 for (i = 0; i < hint_count; i++)
2655     {
2656     if (hints_list[i] != NULL) free(hints_list[i]);
2657     }
2658 ph10 121 free(hints_list);
2659 ph10 123 }
2660 ph10 561 pcregrep_exit(rc);
2661 ph10 121
2662     EXIT2:
2663     rc = 2;
2664     goto EXIT;
2665 nigel 49 }
2666    
2667 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12