/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 588 - (hide annotations) (download)
Sat Jan 15 11:22:47 2011 UTC (3 years, 6 months ago) by ph10
File MIME type: text/plain
File size: 77169 byte(s)
Another bugfix for pcregrep with -M and colour.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 584 Copyright (c) 1997-2011 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 ph10 378 #define OFFSET_SIZE 99
75 nigel 49
76 nigel 77 #if BUFSIZ > 8192
77     #define MBUFTHIRD BUFSIZ
78     #else
79     #define MBUFTHIRD 8192
80     #endif
81 nigel 49
82 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
83     output. The order is important; it is assumed that a file name is wanted for
84     all values greater than FN_DEFAULT. */
85 nigel 77
86 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87 nigel 87
88 ph10 286 /* File reading styles */
89    
90     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92 nigel 87 /* Actions for the -d and -D options */
93    
94     enum { dee_READ, dee_SKIP, dee_RECURSE };
95     enum { DEE_READ, DEE_SKIP };
96    
97     /* Actions for special processing options (flag bits) */
98    
99     #define PO_WORD_MATCH 0x0001
100     #define PO_LINE_MATCH 0x0002
101     #define PO_FIXED_STRINGS 0x0004
102    
103 nigel 93 /* Line ending types */
104 nigel 87
105 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 nigel 87
107 ph10 535 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108     environments), a warning is issued if the value of fwrite() is ignored.
109     Unfortunately, casting to (void) does not suppress the warning. To get round
110     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 ph10 515 apply to fprintf(). */
112 nigel 93
113 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114 nigel 93
115 ph10 515
116    
117 nigel 49 /*************************************************
118     * Global variables *
119     *************************************************/
120    
121 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
122     regular code. */
123    
124     #ifdef JFRIEDL_DEBUG
125     static int S_arg = -1;
126 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128     static const char *jfriedl_prefix = "";
129     static const char *jfriedl_postfix = "";
130 nigel 87 #endif
131    
132 nigel 93 static int endlinetype;
133 nigel 91
134 nigel 87 static char *colour_string = (char *)"1;31";
135     static char *colour_option = NULL;
136     static char *dee_option = NULL;
137     static char *DEE_option = NULL;
138 nigel 91 static char *newline = NULL;
139 nigel 53 static char *pattern_filename = NULL;
140 nigel 77 static char *stdin_name = (char *)"(standard input)";
141 nigel 87 static char *locale = NULL;
142    
143     static const unsigned char *pcretables = NULL;
144    
145 nigel 53 static int pattern_count = 0;
146 ph10 121 static pcre **pattern_list = NULL;
147     static pcre_extra **hints_list = NULL;
148 nigel 49
149 nigel 77 static char *include_pattern = NULL;
150     static char *exclude_pattern = NULL;
151 ph10 325 static char *include_dir_pattern = NULL;
152     static char *exclude_dir_pattern = NULL;
153 nigel 77
154     static pcre *include_compiled = NULL;
155     static pcre *exclude_compiled = NULL;
156 ph10 325 static pcre *include_dir_compiled = NULL;
157     static pcre *exclude_dir_compiled = NULL;
158 nigel 77
159     static int after_context = 0;
160     static int before_context = 0;
161     static int both_context = 0;
162 nigel 87 static int dee_action = dee_READ;
163     static int DEE_action = DEE_READ;
164     static int error_count = 0;
165     static int filenames = FN_DEFAULT;
166 ph10 565 static int only_matching = -1;
167 nigel 87 static int process_options = 0;
168 nigel 77
169 ph10 561 static unsigned long int match_limit = 0;
170     static unsigned long int match_limit_recursion = 0;
171    
172 nigel 49 static BOOL count_only = FALSE;
173 nigel 87 static BOOL do_colour = FALSE;
174 ph10 280 static BOOL file_offsets = FALSE;
175 nigel 77 static BOOL hyphenpending = FALSE;
176 nigel 49 static BOOL invert = FALSE;
177 ph10 519 static BOOL line_buffered = FALSE;
178 ph10 280 static BOOL line_offsets = FALSE;
179 nigel 77 static BOOL multiline = FALSE;
180 nigel 49 static BOOL number = FALSE;
181 ph10 420 static BOOL omit_zero_count = FALSE;
182 ph10 561 static BOOL resource_error = FALSE;
183 nigel 77 static BOOL quiet = FALSE;
184 nigel 49 static BOOL silent = FALSE;
185 nigel 93 static BOOL utf8 = FALSE;
186 nigel 49
187 nigel 53 /* Structure for options and list of them */
188 nigel 49
189 ph10 584 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
190     OP_OP_NUMBER, OP_PATLIST };
191 nigel 77
192 nigel 53 typedef struct option_item {
193 nigel 77 int type;
194 nigel 53 int one_char;
195 nigel 77 void *dataptr;
196 nigel 67 const char *long_name;
197     const char *help_text;
198 nigel 53 } option_item;
199 nigel 49
200 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
201     used to identify them. */
202    
203 ph10 325 #define N_COLOUR (-1)
204     #define N_EXCLUDE (-2)
205     #define N_EXCLUDE_DIR (-3)
206     #define N_HELP (-4)
207     #define N_INCLUDE (-5)
208     #define N_INCLUDE_DIR (-6)
209     #define N_LABEL (-7)
210     #define N_LOCALE (-8)
211     #define N_NULL (-9)
212     #define N_LOFFSETS (-10)
213     #define N_FOFFSETS (-11)
214 ph10 519 #define N_LBUFFER (-12)
215 ph10 561 #define N_M_LIMIT (-13)
216     #define N_M_LIMIT_REC (-14)
217 nigel 87
218 nigel 53 static option_item optionlist[] = {
219 ph10 584 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
220     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
221     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
222     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
223     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
224     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
225     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
226     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
227     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
228     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
229     { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
230     { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
231     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
232     { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
233     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
234     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
235     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
236     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
237     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
238     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
239     { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
240     { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
241     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
242     { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
243     { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
245     { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
247     { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
248     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
249     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
250     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
251     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
252     { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
253     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
254 ph10 571
255     /* These two were accidentally implemented with underscores instead of
256     hyphens in the option names. As this was not discovered for several releases,
257     the incorrect versions are left in the table for compatibility. However, the
258     --help function misses out any option that has an underscore in its name. */
259 ph10 579
260 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
261     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
262 ph10 571
263 nigel 87 #ifdef JFRIEDL_DEBUG
264     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
265     #endif
266     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
267     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
268     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
269     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
270     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
271     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
272     { OP_NODATA, 0, NULL, NULL, NULL }
273 nigel 53 };
274    
275 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
276     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
277     that the combination of -w and -x has the same effect as -x on its own, so we
278     can treat them as the same. */
279 nigel 53
280 nigel 87 static const char *prefix[] = {
281     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
282    
283     static const char *suffix[] = {
284     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
285    
286 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
287 nigel 87
288 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
289 nigel 87
290 nigel 93 const char utf8_table4[] = {
291     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
292     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
293     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
294     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
295    
296    
297    
298 nigel 53 /*************************************************
299 ph10 586 * Exit from the program *
300     *************************************************/
301    
302     /* If there has been a resource error, give a suitable message.
303    
304     Argument: the return code
305     Returns: does not return
306     */
307    
308     static void
309     pcregrep_exit(int rc)
310     {
311     if (resource_error)
312     {
313     fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
314     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
315     fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
316     }
317    
318     exit(rc);
319     }
320    
321    
322     /*************************************************
323 nigel 87 * OS-specific functions *
324 nigel 53 *************************************************/
325    
326     /* These functions are defined so that they can be made system specific,
327 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
328 nigel 53
329    
330     /************* Directory scanning in Unix ***********/
331    
332 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
333 nigel 53 #include <sys/types.h>
334     #include <sys/stat.h>
335     #include <dirent.h>
336    
337     typedef DIR directory_type;
338    
339 nigel 67 static int
340 nigel 53 isdirectory(char *filename)
341     {
342     struct stat statbuf;
343     if (stat(filename, &statbuf) < 0)
344     return 0; /* In the expectation that opening as a file will fail */
345     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
346     }
347    
348 nigel 67 static directory_type *
349 nigel 53 opendirectory(char *filename)
350     {
351     return opendir(filename);
352     }
353    
354 nigel 67 static char *
355 nigel 53 readdirectory(directory_type *dir)
356     {
357     for (;;)
358     {
359     struct dirent *dent = readdir(dir);
360     if (dent == NULL) return NULL;
361     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
362     return dent->d_name;
363     }
364 ph10 151 /* Control never reaches here */
365 nigel 53 }
366    
367 nigel 67 static void
368 nigel 53 closedirectory(directory_type *dir)
369     {
370     closedir(dir);
371     }
372    
373    
374 nigel 87 /************* Test for regular file in Unix **********/
375    
376     static int
377     isregfile(char *filename)
378     {
379     struct stat statbuf;
380     if (stat(filename, &statbuf) < 0)
381     return 1; /* In the expectation that opening as a file will fail */
382     return (statbuf.st_mode & S_IFMT) == S_IFREG;
383     }
384    
385    
386 ph10 519 /************* Test for a terminal in Unix **********/
387 nigel 87
388     static BOOL
389     is_stdout_tty(void)
390     {
391     return isatty(fileno(stdout));
392     }
393    
394 ph10 519 static BOOL
395     is_file_tty(FILE *f)
396     {
397     return isatty(fileno(f));
398     }
399 nigel 87
400 ph10 519
401 nigel 63 /************* Directory scanning in Win32 ***********/
402 nigel 53
403 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
404 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
405 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
406     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
407 ph10 558 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
408     undefined when it is indeed undefined. */
409 nigel 53
410 ph10 558 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
411 nigel 63
412     #ifndef STRICT
413     # define STRICT
414     #endif
415     #ifndef WIN32_LEAN_AND_MEAN
416     # define WIN32_LEAN_AND_MEAN
417     #endif
418 ph10 283
419     #include <windows.h>
420    
421 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
422     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
423     #endif
424    
425 nigel 63 typedef struct directory_type
426     {
427     HANDLE handle;
428     BOOL first;
429     WIN32_FIND_DATA data;
430     } directory_type;
431    
432     int
433     isdirectory(char *filename)
434     {
435     DWORD attr = GetFileAttributes(filename);
436     if (attr == INVALID_FILE_ATTRIBUTES)
437     return 0;
438     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
439     }
440    
441     directory_type *
442     opendirectory(char *filename)
443     {
444     size_t len;
445     char *pattern;
446     directory_type *dir;
447     DWORD err;
448     len = strlen(filename);
449     pattern = (char *) malloc(len + 3);
450     dir = (directory_type *) malloc(sizeof(*dir));
451     if ((pattern == NULL) || (dir == NULL))
452     {
453     fprintf(stderr, "pcregrep: malloc failed\n");
454 ph10 561 pcregrep_exit(2);
455 nigel 63 }
456     memcpy(pattern, filename, len);
457     memcpy(&(pattern[len]), "\\*", 3);
458     dir->handle = FindFirstFile(pattern, &(dir->data));
459     if (dir->handle != INVALID_HANDLE_VALUE)
460     {
461     free(pattern);
462     dir->first = TRUE;
463     return dir;
464     }
465     err = GetLastError();
466     free(pattern);
467     free(dir);
468     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
469     return NULL;
470     }
471    
472     char *
473     readdirectory(directory_type *dir)
474     {
475     for (;;)
476     {
477     if (!dir->first)
478     {
479     if (!FindNextFile(dir->handle, &(dir->data)))
480     return NULL;
481     }
482     else
483     {
484     dir->first = FALSE;
485     }
486     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
487     return dir->data.cFileName;
488     }
489     #ifndef _MSC_VER
490     return NULL; /* Keep compiler happy; never executed */
491     #endif
492     }
493    
494     void
495     closedirectory(directory_type *dir)
496     {
497     FindClose(dir->handle);
498     free(dir);
499     }
500    
501    
502 nigel 87 /************* Test for regular file in Win32 **********/
503    
504     /* I don't know how to do this, or if it can be done; assume all paths are
505     regular if they are not directories. */
506    
507     int isregfile(char *filename)
508     {
509 ph10 283 return !isdirectory(filename);
510 nigel 87 }
511    
512    
513 ph10 519 /************* Test for a terminal in Win32 **********/
514 nigel 87
515     /* I don't know how to do this; assume never */
516    
517     static BOOL
518     is_stdout_tty(void)
519     {
520 ph10 283 return FALSE;
521 nigel 87 }
522    
523 ph10 519 static BOOL
524     is_file_tty(FILE *f)
525     {
526     return FALSE;
527     }
528 nigel 87
529 ph10 519
530 nigel 53 /************* Directory scanning when we can't do it ***********/
531    
532     /* The type is void, and apart from isdirectory(), the functions do nothing. */
533    
534 nigel 63 #else
535    
536 nigel 53 typedef void directory_type;
537    
538 nigel 87 int isdirectory(char *filename) { return 0; }
539 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
540     char *readdirectory(directory_type *dir) { return (char*)0;}
541 nigel 53 void closedirectory(directory_type *dir) {}
542    
543 nigel 87
544     /************* Test for regular when we can't do it **********/
545    
546     /* Assume all files are regular. */
547    
548     int isregfile(char *filename) { return 1; }
549    
550    
551 ph10 519 /************* Test for a terminal when we can't do it **********/
552 nigel 87
553     static BOOL
554     is_stdout_tty(void)
555     {
556     return FALSE;
557     }
558    
559 ph10 519 static BOOL
560     is_file_tty(FILE *f)
561     {
562     return FALSE;
563     }
564 nigel 87
565 nigel 53 #endif
566    
567    
568    
569 ph10 137 #ifndef HAVE_STRERROR
570 nigel 49 /*************************************************
571     * Provide strerror() for non-ANSI libraries *
572     *************************************************/
573    
574     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
575     in their libraries, but can provide the same facility by this simple
576     alternative function. */
577    
578     extern int sys_nerr;
579     extern char *sys_errlist[];
580    
581     char *
582     strerror(int n)
583     {
584     if (n < 0 || n >= sys_nerr) return "unknown error number";
585     return sys_errlist[n];
586     }
587     #endif /* HAVE_STRERROR */
588    
589    
590    
591     /*************************************************
592 ph10 519 * Read one line of input *
593     *************************************************/
594    
595 ph10 535 /* Normally, input is read using fread() into a large buffer, so many lines may
596     be read at once. However, doing this for tty input means that no output appears
597 ph10 519 until a lot of input has been typed. Instead, tty input is handled line by
598     line. We cannot use fgets() for this, because it does not stop at a binary
599 ph10 535 zero, and therefore there is no way of telling how many characters it has read,
600 ph10 519 because there may be binary zeros embedded in the data.
601    
602     Arguments:
603     buffer the buffer to read into
604     length the maximum number of characters to read
605     f the file
606 ph10 535
607 ph10 519 Returns: the number of characters read, zero at end of file
608 ph10 535 */
609 ph10 519
610     static int
611     read_one_line(char *buffer, int length, FILE *f)
612     {
613     int c;
614     int yield = 0;
615     while ((c = fgetc(f)) != EOF)
616     {
617     buffer[yield++] = c;
618 ph10 535 if (c == '\n' || yield >= length) break;
619     }
620     return yield;
621 ph10 519 }
622    
623    
624    
625     /*************************************************
626 nigel 93 * Find end of line *
627     *************************************************/
628    
629     /* The length of the endline sequence that is found is set via lenptr. This may
630     be zero at the very end of the file if there is no line-ending sequence there.
631    
632     Arguments:
633     p current position in line
634     endptr end of available data
635     lenptr where to put the length of the eol sequence
636    
637 ph10 587 Returns: pointer to the last byte of the line, including the newline byte(s)
638 nigel 93 */
639    
640     static char *
641     end_of_line(char *p, char *endptr, int *lenptr)
642     {
643     switch(endlinetype)
644     {
645     default: /* Just in case */
646     case EL_LF:
647     while (p < endptr && *p != '\n') p++;
648     if (p < endptr)
649     {
650     *lenptr = 1;
651     return p + 1;
652     }
653     *lenptr = 0;
654     return endptr;
655    
656     case EL_CR:
657     while (p < endptr && *p != '\r') p++;
658     if (p < endptr)
659     {
660     *lenptr = 1;
661     return p + 1;
662     }
663     *lenptr = 0;
664     return endptr;
665    
666     case EL_CRLF:
667     for (;;)
668     {
669     while (p < endptr && *p != '\r') p++;
670     if (++p >= endptr)
671     {
672     *lenptr = 0;
673     return endptr;
674     }
675     if (*p == '\n')
676     {
677     *lenptr = 2;
678     return p + 1;
679     }
680     }
681     break;
682    
683 ph10 149 case EL_ANYCRLF:
684     while (p < endptr)
685     {
686     int extra = 0;
687     register int c = *((unsigned char *)p);
688    
689     if (utf8 && c >= 0xc0)
690     {
691     int gcii, gcss;
692     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
693     gcss = 6*extra;
694     c = (c & utf8_table3[extra]) << gcss;
695     for (gcii = 1; gcii <= extra; gcii++)
696     {
697     gcss -= 6;
698     c |= (p[gcii] & 0x3f) << gcss;
699     }
700     }
701    
702     p += 1 + extra;
703    
704     switch (c)
705     {
706     case 0x0a: /* LF */
707     *lenptr = 1;
708     return p;
709    
710     case 0x0d: /* CR */
711     if (p < endptr && *p == 0x0a)
712     {
713     *lenptr = 2;
714     p++;
715     }
716     else *lenptr = 1;
717     return p;
718 ph10 150
719 ph10 149 default:
720     break;
721     }
722     } /* End of loop for ANYCRLF case */
723 ph10 150
724 ph10 149 *lenptr = 0; /* Must have hit the end */
725     return endptr;
726    
727 nigel 93 case EL_ANY:
728     while (p < endptr)
729     {
730     int extra = 0;
731     register int c = *((unsigned char *)p);
732    
733     if (utf8 && c >= 0xc0)
734     {
735     int gcii, gcss;
736     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
737     gcss = 6*extra;
738     c = (c & utf8_table3[extra]) << gcss;
739     for (gcii = 1; gcii <= extra; gcii++)
740     {
741     gcss -= 6;
742     c |= (p[gcii] & 0x3f) << gcss;
743     }
744     }
745    
746     p += 1 + extra;
747    
748     switch (c)
749     {
750     case 0x0a: /* LF */
751     case 0x0b: /* VT */
752     case 0x0c: /* FF */
753     *lenptr = 1;
754     return p;
755    
756     case 0x0d: /* CR */
757     if (p < endptr && *p == 0x0a)
758     {
759     *lenptr = 2;
760     p++;
761     }
762     else *lenptr = 1;
763     return p;
764    
765     case 0x85: /* NEL */
766     *lenptr = utf8? 2 : 1;
767     return p;
768    
769     case 0x2028: /* LS */
770     case 0x2029: /* PS */
771     *lenptr = 3;
772     return p;
773    
774     default:
775     break;
776     }
777     } /* End of loop for ANY case */
778    
779     *lenptr = 0; /* Must have hit the end */
780     return endptr;
781     } /* End of overall switch */
782     }
783    
784    
785    
786     /*************************************************
787     * Find start of previous line *
788     *************************************************/
789    
790     /* This is called when looking back for before lines to print.
791    
792     Arguments:
793     p start of the subsequent line
794     startptr start of available data
795    
796     Returns: pointer to the start of the previous line
797     */
798    
799     static char *
800     previous_line(char *p, char *startptr)
801     {
802     switch(endlinetype)
803     {
804     default: /* Just in case */
805     case EL_LF:
806     p--;
807     while (p > startptr && p[-1] != '\n') p--;
808     return p;
809    
810     case EL_CR:
811     p--;
812     while (p > startptr && p[-1] != '\n') p--;
813     return p;
814    
815     case EL_CRLF:
816     for (;;)
817     {
818     p -= 2;
819     while (p > startptr && p[-1] != '\n') p--;
820     if (p <= startptr + 1 || p[-2] == '\r') return p;
821     }
822     return p; /* But control should never get here */
823    
824     case EL_ANY:
825 ph10 150 case EL_ANYCRLF:
826 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
827     if (utf8) while ((*p & 0xc0) == 0x80) p--;
828    
829     while (p > startptr)
830     {
831     register int c;
832     char *pp = p - 1;
833    
834     if (utf8)
835     {
836     int extra = 0;
837     while ((*pp & 0xc0) == 0x80) pp--;
838     c = *((unsigned char *)pp);
839     if (c >= 0xc0)
840     {
841     int gcii, gcss;
842     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
843     gcss = 6*extra;
844     c = (c & utf8_table3[extra]) << gcss;
845     for (gcii = 1; gcii <= extra; gcii++)
846     {
847     gcss -= 6;
848     c |= (pp[gcii] & 0x3f) << gcss;
849     }
850     }
851     }
852     else c = *((unsigned char *)pp);
853    
854 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
855 nigel 93 {
856     case 0x0a: /* LF */
857 ph10 149 case 0x0d: /* CR */
858     return p;
859 ph10 150
860 ph10 149 default:
861     break;
862 ph10 150 }
863 ph10 149
864     else switch (c)
865     {
866     case 0x0a: /* LF */
867 nigel 93 case 0x0b: /* VT */
868     case 0x0c: /* FF */
869     case 0x0d: /* CR */
870     case 0x85: /* NEL */
871     case 0x2028: /* LS */
872     case 0x2029: /* PS */
873     return p;
874    
875     default:
876     break;
877     }
878    
879     p = pp; /* Back one character */
880     } /* End of loop for ANY case */
881    
882     return startptr; /* Hit start of data */
883     } /* End of overall switch */
884     }
885    
886    
887    
888    
889    
890     /*************************************************
891 nigel 77 * Print the previous "after" lines *
892 nigel 49 *************************************************/
893    
894 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
895 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
896     that a binary zero does not terminate it.
897 nigel 77
898     Arguments:
899     lastmatchnumber the number of the last matching line, plus one
900     lastmatchrestart where we restarted after the last match
901     endptr end of available data
902     printname filename for printing
903    
904     Returns: nothing
905     */
906    
907     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
908     char *endptr, char *printname)
909     {
910     if (after_context > 0 && lastmatchnumber > 0)
911     {
912     int count = 0;
913     while (lastmatchrestart < endptr && count++ < after_context)
914     {
915 nigel 93 int ellength;
916 nigel 77 char *pp = lastmatchrestart;
917     if (printname != NULL) fprintf(stdout, "%s-", printname);
918     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
919 nigel 93 pp = end_of_line(pp, endptr, &ellength);
920 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
921 nigel 93 lastmatchrestart = pp;
922 nigel 77 }
923     hyphenpending = TRUE;
924     }
925     }
926    
927    
928    
929     /*************************************************
930 ph10 378 * Apply patterns to subject till one matches *
931     *************************************************/
932    
933 ph10 392 /* This function is called to run through all patterns, looking for a match. It
934     is used multiple times for the same subject when colouring is enabled, in order
935 ph10 378 to find all possible matches.
936    
937     Arguments:
938     matchptr the start of the subject
939     length the length of the subject to match
940     offsets the offets vector to fill in
941     mrc address of where to put the result of pcre_exec()
942 ph10 392
943     Returns: TRUE if there was a match
944 ph10 378 FALSE if there was no match
945     invert if there was a non-fatal error
946 ph10 392 */
947 ph10 378
948     static BOOL
949     match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
950     {
951     int i;
952 ph10 561 size_t slen = length;
953     const char *msg = "this text:\n\n";
954     if (slen > 200)
955     {
956     slen = 200;
957     msg = "text that starts:\n\n";
958 ph10 579 }
959 ph10 378 for (i = 0; i < pattern_count; i++)
960     {
961 ph10 530 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
962 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
963 ph10 378 if (*mrc >= 0) return TRUE;
964     if (*mrc == PCRE_ERROR_NOMATCH) continue;
965 ph10 561 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
966 ph10 378 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
967 ph10 561 fprintf(stderr, "%s", msg);
968     FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
969     fprintf(stderr, "\n\n");
970     if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
971     resource_error = TRUE;
972 ph10 378 if (error_count++ > 20)
973     {
974 ph10 561 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
975     pcregrep_exit(2);
976 ph10 378 }
977     return invert; /* No more matching; don't show the line again */
978     }
979    
980     return FALSE; /* No match, no errors */
981     }
982    
983    
984    
985     /*************************************************
986 nigel 77 * Grep an individual file *
987     *************************************************/
988    
989     /* This is called from grep_or_recurse() below. It uses a buffer that is three
990     times the value of MBUFTHIRD. The matching point is never allowed to stray into
991     the top third of the buffer, thus keeping more of the file available for
992     context printing or for multiline scanning. For large files, the pointer will
993     be in the middle third most of the time, so the bottom third is available for
994     "before" context printing.
995    
996     Arguments:
997 ph10 286 handle the fopened FILE stream for a normal file
998     the gzFile pointer when reading is via libz
999     the BZFILE pointer when reading is via libbz2
1000     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1001 nigel 77 printname the file name if it is to be printed for each match
1002     or NULL if the file name is not to be printed
1003     it cannot be NULL if filenames[_nomatch]_only is set
1004    
1005     Returns: 0 if there was at least one match
1006     1 otherwise (no matches)
1007 ph10 286 2 if there is a read error on a .bz2 file
1008 nigel 77 */
1009    
1010 nigel 49 static int
1011 ph10 286 pcregrep(void *handle, int frtype, char *printname)
1012 nigel 49 {
1013     int rc = 1;
1014 nigel 77 int linenumber = 1;
1015     int lastmatchnumber = 0;
1016 nigel 49 int count = 0;
1017 ph10 280 int filepos = 0;
1018 ph10 378 int offsets[OFFSET_SIZE];
1019 nigel 77 char *lastmatchrestart = NULL;
1020     char buffer[3*MBUFTHIRD];
1021     char *ptr = buffer;
1022     char *endptr;
1023     size_t bufflength;
1024     BOOL endhyphenpending = FALSE;
1025 ph10 519 BOOL input_line_buffered = line_buffered;
1026 ph10 286 FILE *in = NULL; /* Ensure initialized */
1027 nigel 49
1028 ph10 286 #ifdef SUPPORT_LIBZ
1029     gzFile ingz = NULL;
1030     #endif
1031 nigel 77
1032 ph10 286 #ifdef SUPPORT_LIBBZ2
1033     BZFILE *inbz2 = NULL;
1034     #endif
1035    
1036    
1037     /* Do the first read into the start of the buffer and set up the pointer to end
1038     of what we have. In the case of libz, a non-zipped .gz file will be read as a
1039     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1040     fail. */
1041    
1042     #ifdef SUPPORT_LIBZ
1043     if (frtype == FR_LIBZ)
1044     {
1045     ingz = (gzFile)handle;
1046     bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1047     }
1048     else
1049     #endif
1050    
1051     #ifdef SUPPORT_LIBBZ2
1052     if (frtype == FR_LIBBZ2)
1053     {
1054     inbz2 = (BZFILE *)handle;
1055     bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1056     if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1057     } /* without the cast it is unsigned. */
1058     else
1059     #endif
1060    
1061     {
1062     in = (FILE *)handle;
1063 ph10 519 if (is_file_tty(in)) input_line_buffered = TRUE;
1064 ph10 535 bufflength = input_line_buffered?
1065 ph10 519 read_one_line(buffer, 3*MBUFTHIRD, in) :
1066     fread(buffer, 1, 3*MBUFTHIRD, in);
1067 ph10 286 }
1068 ph10 535
1069 nigel 77 endptr = buffer + bufflength;
1070    
1071     /* Loop while the current pointer is not at the end of the file. For large
1072     files, endptr will be at the end of the buffer when we are in the middle of the
1073     file, but ptr will never get there, because as soon as it gets over 2/3 of the
1074     way, the buffer is shifted left and re-filled. */
1075    
1076     while (ptr < endptr)
1077 nigel 49 {
1078 ph10 378 int endlinelength;
1079 nigel 87 int mrc = 0;
1080 ph10 378 BOOL match;
1081 ph10 286 char *matchptr = ptr;
1082 nigel 77 char *t = ptr;
1083     size_t length, linelength;
1084 nigel 49
1085 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
1086     of the subject string to pass to pcre_exec(). In multiline mode, it is the
1087     length remainder of the data in the buffer. Otherwise, it is the length of
1088 ph10 378 the next line, excluding the terminating newline. After matching, we always
1089     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1090     option is used for compiling, so that any match is constrained to be in the
1091     first line. */
1092 nigel 77
1093 nigel 93 t = end_of_line(t, endptr, &endlinelength);
1094     linelength = t - ptr - endlinelength;
1095 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
1096 nigel 77
1097 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
1098    
1099     #ifdef JFRIEDL_DEBUG
1100     if (jfriedl_XT || jfriedl_XR)
1101     {
1102     #include <sys/time.h>
1103     #include <time.h>
1104     struct timeval start_time, end_time;
1105     struct timezone dummy;
1106 ph10 392 int i;
1107 nigel 89
1108     if (jfriedl_XT)
1109     {
1110     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1111     const char *orig = ptr;
1112     ptr = malloc(newlen + 1);
1113     if (!ptr) {
1114     printf("out of memory");
1115 ph10 561 pcregrep_exit(2);
1116 nigel 89 }
1117     endptr = ptr;
1118     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1119     for (i = 0; i < jfriedl_XT; i++) {
1120     strncpy(endptr, orig, length);
1121     endptr += length;
1122     }
1123     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1124     length = newlen;
1125     }
1126    
1127     if (gettimeofday(&start_time, &dummy) != 0)
1128     perror("bad gettimeofday");
1129    
1130    
1131     for (i = 0; i < jfriedl_XR; i++)
1132 ph10 392 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1133 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1134 nigel 89
1135     if (gettimeofday(&end_time, &dummy) != 0)
1136     perror("bad gettimeofday");
1137    
1138     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1139     -
1140     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1141    
1142     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1143     return 0;
1144     }
1145     #endif
1146    
1147 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
1148 ph10 279 in order to find any further matches in the same line. */
1149 nigel 89
1150 ph10 286 ONLY_MATCHING_RESTART:
1151    
1152 ph10 392 /* Run through all the patterns until one matches or there is an error other
1153 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1154     finding subsequent matches when colouring matched lines. */
1155 ph10 392
1156 ph10 378 match = match_patterns(matchptr, length, offsets, &mrc);
1157 nigel 77
1158 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1159 nigel 77
1160 nigel 49 if (match != invert)
1161     {
1162 nigel 77 BOOL hyphenprinted = FALSE;
1163    
1164 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1165 nigel 77
1166 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1167    
1168     /* Just count if just counting is wanted. */
1169    
1170 nigel 49 if (count_only) count++;
1171    
1172 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1173     in the file. */
1174    
1175 ph10 420 else if (filenames == FN_MATCH_ONLY)
1176 nigel 49 {
1177 nigel 77 fprintf(stdout, "%s\n", printname);
1178 nigel 49 return 0;
1179     }
1180    
1181 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1182    
1183 nigel 77 else if (quiet) return 0;
1184 nigel 49
1185 ph10 579 /* The --only-matching option prints just the substring that matched, or a
1186 ph10 565 captured portion of it, as long as this string is not empty, and the
1187     --file-offsets and --line-offsets options output offsets for the matching
1188     substring (they both force --only-matching = 0). None of these options
1189 ph10 280 prints any context. Afterwards, adjust the start and length, and then jump
1190     back to look for further matches in the same line. If we are in invert
1191 ph10 565 mode, however, nothing is printed and we do not restart - this could still
1192     be useful because the return code is set. */
1193 nigel 87
1194 ph10 565 else if (only_matching >= 0)
1195 nigel 87 {
1196 ph10 279 if (!invert)
1197 ph10 286 {
1198 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1199     if (number) fprintf(stdout, "%d:", linenumber);
1200 ph10 280 if (line_offsets)
1201 ph10 565 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1202 ph10 286 offsets[1] - offsets[0]);
1203 ph10 280 else if (file_offsets)
1204 ph10 579 fprintf(stdout, "%d,%d\n",
1205 ph10 565 (int)(filepos + matchptr + offsets[0] - ptr),
1206 ph10 286 offsets[1] - offsets[0]);
1207 ph10 565 else if (only_matching < mrc)
1208 ph10 377 {
1209 ph10 565 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1210     if (plen > 0)
1211 ph10 579 {
1212 ph10 565 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1213     FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1214     if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1215     fprintf(stdout, "\n");
1216 ph10 579 }
1217 ph10 392 }
1218 ph10 565 else if (printname != NULL || number) fprintf(stdout, "\n");
1219 ph10 279 matchptr += offsets[1];
1220     length -= offsets[1];
1221 ph10 286 match = FALSE;
1222 ph10 564 if (line_buffered) fflush(stdout);
1223     rc = 0; /* Had some success */
1224 ph10 286 goto ONLY_MATCHING_RESTART;
1225     }
1226 nigel 87 }
1227    
1228     /* This is the default case when none of the above options is set. We print
1229     the matching lines(s), possibly preceded and/or followed by other lines of
1230     context. */
1231    
1232 nigel 49 else
1233     {
1234 nigel 77 /* See if there is a requirement to print some "after" lines from a
1235     previous match. We never print any overlaps. */
1236    
1237     if (after_context > 0 && lastmatchnumber > 0)
1238     {
1239 nigel 93 int ellength;
1240 nigel 77 int linecount = 0;
1241     char *p = lastmatchrestart;
1242    
1243     while (p < ptr && linecount < after_context)
1244     {
1245 nigel 93 p = end_of_line(p, ptr, &ellength);
1246 nigel 77 linecount++;
1247     }
1248    
1249     /* It is important to advance lastmatchrestart during this printing so
1250 nigel 87 that it interacts correctly with any "before" printing below. Print
1251     each line's data using fwrite() in case there are binary zeroes. */
1252 nigel 77
1253     while (lastmatchrestart < p)
1254     {
1255     char *pp = lastmatchrestart;
1256     if (printname != NULL) fprintf(stdout, "%s-", printname);
1257     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1258 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1259 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1260 nigel 93 lastmatchrestart = pp;
1261 nigel 77 }
1262     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1263     }
1264    
1265     /* If there were non-contiguous lines printed above, insert hyphens. */
1266    
1267     if (hyphenpending)
1268     {
1269     fprintf(stdout, "--\n");
1270     hyphenpending = FALSE;
1271     hyphenprinted = TRUE;
1272     }
1273    
1274     /* See if there is a requirement to print some "before" lines for this
1275     match. Again, don't print overlaps. */
1276    
1277     if (before_context > 0)
1278     {
1279     int linecount = 0;
1280     char *p = ptr;
1281    
1282     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1283 nigel 87 linecount < before_context)
1284 nigel 77 {
1285 nigel 87 linecount++;
1286 nigel 93 p = previous_line(p, buffer);
1287 nigel 77 }
1288    
1289     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1290     fprintf(stdout, "--\n");
1291    
1292     while (p < ptr)
1293     {
1294 nigel 93 int ellength;
1295 nigel 77 char *pp = p;
1296     if (printname != NULL) fprintf(stdout, "%s-", printname);
1297     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1298 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1299 ph10 515 FWRITE(p, 1, pp - p, stdout);
1300 nigel 93 p = pp;
1301 nigel 77 }
1302     }
1303    
1304     /* Now print the matching line(s); ensure we set hyphenpending at the end
1305 nigel 85 of the file if any context lines are being output. */
1306 nigel 77
1307 nigel 85 if (after_context > 0 || before_context > 0)
1308     endhyphenpending = TRUE;
1309    
1310 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1311 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1312 nigel 77
1313     /* In multiline mode, we want to print to the end of the line in which
1314     the end of the matched string is found, so we adjust linelength and the
1315 ph10 222 line number appropriately, but only when there actually was a match
1316     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1317     the match will always be before the first newline sequence. */
1318 nigel 77
1319 ph10 587 if (multiline & !invert)
1320 nigel 77 {
1321 ph10 587 char *endmatch = ptr + offsets[1];
1322     t = ptr;
1323     while (t < endmatch)
1324 nigel 93 {
1325 ph10 587 t = end_of_line(t, endptr, &endlinelength);
1326     if (t < endmatch) linenumber++; else break;
1327 nigel 93 }
1328 ph10 587 linelength = t - ptr - endlinelength;
1329 nigel 77 }
1330    
1331 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1332     zeroes are treated as just another data character. */
1333    
1334     /* This extra option, for Jeffrey Friedl's debugging requirements,
1335     replaces the matched string, or a specific captured string if it exists,
1336     with X. When this happens, colouring is ignored. */
1337    
1338     #ifdef JFRIEDL_DEBUG
1339     if (S_arg >= 0 && S_arg < mrc)
1340     {
1341     int first = S_arg * 2;
1342     int last = first + 1;
1343 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1344 nigel 87 fprintf(stdout, "X");
1345 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1346 nigel 87 }
1347     else
1348     #endif
1349    
1350 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1351 ph10 585 matches, but not of course if the line is a non-match. */
1352 ph10 587
1353 ph10 585 if (do_colour && !invert)
1354 nigel 87 {
1355 ph10 587 int plength;
1356 ph10 392 int last_offset = 0;
1357 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1358 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1359 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1360 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1361 ph10 378 for (;;)
1362     {
1363 ph10 392 last_offset += offsets[1];
1364 ph10 378 matchptr += offsets[1];
1365     length -= offsets[1];
1366 ph10 588 if (last_offset >= linelength + endlinelength ||
1367     !match_patterns(matchptr, length, offsets, &mrc)) break;
1368 ph10 515 FWRITE(matchptr, 1, offsets[0], stdout);
1369 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1370 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1371 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1372     }
1373 ph10 587
1374     /* In multiline mode, we may have already printed the complete line
1375     and its line-ending characters (if they matched the pattern), so there
1376     may be no more to print. */
1377    
1378     plength = (linelength + endlinelength) - last_offset;
1379     if (plength > 0)
1380     FWRITE(ptr + last_offset, 1, plength, stdout);
1381 nigel 87 }
1382 ph10 392
1383 ph10 378 /* Not colouring; no need to search for further matches */
1384 ph10 392
1385 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1386 nigel 49 }
1387    
1388 ph10 519 /* End of doing what has to be done for a match. If --line-buffered was
1389     given, flush the output. */
1390 nigel 87
1391 ph10 519 if (line_buffered) fflush(stdout);
1392 nigel 77 rc = 0; /* Had some success */
1393    
1394     /* Remember where the last match happened for after_context. We remember
1395     where we are about to restart, and that line's number. */
1396    
1397 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1398 nigel 77 lastmatchnumber = linenumber + 1;
1399 nigel 49 }
1400 nigel 77
1401 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1402     anything to be printed), we have to move on to the end of the match before
1403     proceeding. */
1404    
1405     if (multiline && invert && match)
1406     {
1407     int ellength;
1408     char *endmatch = ptr + offsets[1];
1409     t = ptr;
1410     while (t < endmatch)
1411     {
1412     t = end_of_line(t, endptr, &ellength);
1413     if (t <= endmatch) linenumber++; else break;
1414     }
1415     endmatch = end_of_line(endmatch, endptr, &ellength);
1416     linelength = endmatch - ptr - ellength;
1417     }
1418    
1419 ph10 286 /* Advance to after the newline and increment the line number. The file
1420 ph10 280 offset to the current line is maintained in filepos. */
1421 nigel 77
1422 nigel 93 ptr += linelength + endlinelength;
1423 ph10 530 filepos += (int)(linelength + endlinelength);
1424 nigel 77 linenumber++;
1425 ph10 535
1426     /* If input is line buffered, and the buffer is not yet full, read another
1427 ph10 519 line and add it into the buffer. */
1428 ph10 535
1429 ph10 519 if (input_line_buffered && bufflength < sizeof(buffer))
1430     {
1431     int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1432     bufflength += add;
1433 ph10 535 endptr += add;
1434     }
1435 nigel 77
1436     /* If we haven't yet reached the end of the file (the buffer is full), and
1437     the current point is in the top 1/3 of the buffer, slide the buffer down by
1438     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1439     about to be lost, print them. */
1440    
1441     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1442     {
1443     if (after_context > 0 &&
1444     lastmatchnumber > 0 &&
1445     lastmatchrestart < buffer + MBUFTHIRD)
1446     {
1447     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1448     lastmatchnumber = 0;
1449     }
1450    
1451     /* Now do the shuffle */
1452    
1453     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1454     ptr -= MBUFTHIRD;
1455 ph10 286
1456     #ifdef SUPPORT_LIBZ
1457     if (frtype == FR_LIBZ)
1458     bufflength = 2*MBUFTHIRD +
1459     gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1460     else
1461     #endif
1462    
1463     #ifdef SUPPORT_LIBBZ2
1464     if (frtype == FR_LIBBZ2)
1465     bufflength = 2*MBUFTHIRD +
1466     BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1467     else
1468     #endif
1469    
1470 ph10 535 bufflength = 2*MBUFTHIRD +
1471     (input_line_buffered?
1472     read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1473 ph10 519 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1474 nigel 77 endptr = buffer + bufflength;
1475    
1476     /* Adjust any last match point */
1477    
1478     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1479     }
1480     } /* Loop through the whole file */
1481    
1482     /* End of file; print final "after" lines if wanted; do_after_lines sets
1483     hyphenpending if it prints something. */
1484    
1485 ph10 565 if (only_matching < 0 && !count_only)
1486 nigel 87 {
1487     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1488     hyphenpending |= endhyphenpending;
1489     }
1490 nigel 77
1491     /* Print the file name if we are looking for those without matches and there
1492     were none. If we found a match, we won't have got this far. */
1493    
1494 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1495 nigel 77 {
1496     fprintf(stdout, "%s\n", printname);
1497     return 0;
1498 nigel 49 }
1499    
1500 nigel 77 /* Print the match count if wanted */
1501    
1502 nigel 49 if (count_only)
1503     {
1504 ph10 420 if (count > 0 || !omit_zero_count)
1505 ph10 461 {
1506     if (printname != NULL && filenames != FN_NONE)
1507 ph10 420 fprintf(stdout, "%s:", printname);
1508     fprintf(stdout, "%d\n", count);
1509 ph10 461 }
1510 nigel 49 }
1511    
1512     return rc;
1513     }
1514    
1515    
1516    
1517     /*************************************************
1518 nigel 53 * Grep a file or recurse into a directory *
1519     *************************************************/
1520    
1521 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1522     recursing; if it's a file, grep it.
1523    
1524     Arguments:
1525     pathname the path to investigate
1526 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1527 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1528    
1529     Returns: 0 if there was at least one match
1530     1 if there were no matches
1531     2 there was some kind of error
1532    
1533     However, file opening failures are suppressed if "silent" is set.
1534     */
1535    
1536 nigel 53 static int
1537 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1538 nigel 53 {
1539     int rc = 1;
1540     int sep;
1541 ph10 286 int frtype;
1542     int pathlen;
1543     void *handle;
1544     FILE *in = NULL; /* Ensure initialized */
1545 nigel 53
1546 ph10 286 #ifdef SUPPORT_LIBZ
1547     gzFile ingz = NULL;
1548     #endif
1549    
1550     #ifdef SUPPORT_LIBBZ2
1551     BZFILE *inbz2 = NULL;
1552     #endif
1553    
1554 nigel 77 /* If the file name is "-" we scan stdin */
1555 nigel 53
1556 nigel 77 if (strcmp(pathname, "-") == 0)
1557 nigel 53 {
1558 ph10 286 return pcregrep(stdin, FR_PLAIN,
1559 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1560 nigel 77 stdin_name : NULL);
1561     }
1562    
1563 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1564 ph10 325 each file and directory within it, subject to any include or exclude patterns
1565     that were set. The scanning code is localized so it can be made
1566     system-specific. */
1567 nigel 87
1568     if ((sep = isdirectory(pathname)) != 0)
1569 nigel 77 {
1570 nigel 87 if (dee_action == dee_SKIP) return 1;
1571     if (dee_action == dee_RECURSE)
1572 nigel 53 {
1573 nigel 87 char buffer[1024];
1574     char *nextfile;
1575     directory_type *dir = opendirectory(pathname);
1576 nigel 53
1577 nigel 87 if (dir == NULL)
1578     {
1579     if (!silent)
1580     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1581     strerror(errno));
1582     return 2;
1583     }
1584 nigel 77
1585 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1586     {
1587 ph10 324 int frc, nflen;
1588 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1589 ph10 530 nflen = (int)(strlen(nextfile));
1590 ph10 345
1591 ph10 325 if (isdirectory(buffer))
1592     {
1593     if (exclude_dir_compiled != NULL &&
1594     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1595     continue;
1596 ph10 345
1597 ph10 325 if (include_dir_compiled != NULL &&
1598     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1599     continue;
1600     }
1601 ph10 345 else
1602     {
1603 ph10 324 if (exclude_compiled != NULL &&
1604     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1605     continue;
1606 ph10 345
1607 ph10 324 if (include_compiled != NULL &&
1608     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1609     continue;
1610 ph10 345 }
1611 nigel 77
1612 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1613     if (frc > 1) rc = frc;
1614     else if (frc == 0 && rc == 1) rc = 0;
1615     }
1616    
1617     closedirectory(dir);
1618     return rc;
1619 nigel 53 }
1620     }
1621    
1622 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1623     been requested. */
1624 nigel 53
1625 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1626    
1627     /* Control reaches here if we have a regular file, or if we have a directory
1628     and recursion or skipping was not requested, or if we have anything else and
1629     skipping was not requested. The scan proceeds. If this is the first and only
1630     argument at top level, we don't show the file name, unless we are only showing
1631     the file name, or the filename was forced (-H). */
1632    
1633 ph10 530 pathlen = (int)(strlen(pathname));
1634 ph10 286
1635     /* Open using zlib if it is supported and the file name ends with .gz. */
1636    
1637     #ifdef SUPPORT_LIBZ
1638     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1639 nigel 53 {
1640 ph10 286 ingz = gzopen(pathname, "rb");
1641     if (ingz == NULL)
1642     {
1643     if (!silent)
1644     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1645     strerror(errno));
1646     return 2;
1647     }
1648     handle = (void *)ingz;
1649     frtype = FR_LIBZ;
1650     }
1651     else
1652     #endif
1653    
1654     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1655    
1656     #ifdef SUPPORT_LIBBZ2
1657     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1658     {
1659     inbz2 = BZ2_bzopen(pathname, "rb");
1660     handle = (void *)inbz2;
1661     frtype = FR_LIBBZ2;
1662     }
1663     else
1664     #endif
1665    
1666     /* Otherwise use plain fopen(). The label is so that we can come back here if
1667     an attempt to read a .bz2 file indicates that it really is a plain file. */
1668    
1669     #ifdef SUPPORT_LIBBZ2
1670     PLAIN_FILE:
1671     #endif
1672     {
1673 ph10 419 in = fopen(pathname, "rb");
1674 ph10 286 handle = (void *)in;
1675     frtype = FR_PLAIN;
1676     }
1677    
1678     /* All the opening methods return errno when they fail. */
1679    
1680     if (handle == NULL)
1681     {
1682 nigel 77 if (!silent)
1683     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1684     strerror(errno));
1685 nigel 53 return 2;
1686     }
1687    
1688 ph10 286 /* Now grep the file */
1689    
1690     rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1691 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1692 nigel 77
1693 ph10 286 /* Close in an appropriate manner. */
1694    
1695     #ifdef SUPPORT_LIBZ
1696     if (frtype == FR_LIBZ)
1697     gzclose(ingz);
1698     else
1699     #endif
1700    
1701     /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1702     read failed. If the error indicates that the file isn't in fact bzipped, try
1703     again as a normal file. */
1704    
1705     #ifdef SUPPORT_LIBBZ2
1706     if (frtype == FR_LIBBZ2)
1707     {
1708     if (rc == 2)
1709     {
1710     int errnum;
1711     const char *err = BZ2_bzerror(inbz2, &errnum);
1712     if (errnum == BZ_DATA_ERROR_MAGIC)
1713     {
1714     BZ2_bzclose(inbz2);
1715     goto PLAIN_FILE;
1716     }
1717     else if (!silent)
1718     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1719     pathname, err);
1720     }
1721     BZ2_bzclose(inbz2);
1722     }
1723     else
1724     #endif
1725    
1726     /* Normal file close */
1727    
1728 nigel 53 fclose(in);
1729 ph10 286
1730     /* Pass back the yield from pcregrep(). */
1731    
1732 nigel 53 return rc;
1733     }
1734    
1735    
1736    
1737    
1738     /*************************************************
1739 nigel 49 * Usage function *
1740     *************************************************/
1741    
1742     static int
1743     usage(int rc)
1744     {
1745 nigel 87 option_item *op;
1746     fprintf(stderr, "Usage: pcregrep [-");
1747     for (op = optionlist; op->one_char != 0; op++)
1748     {
1749     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1750     }
1751     fprintf(stderr, "] [long options] [pattern] [files]\n");
1752 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1753     "options.\n");
1754 nigel 49 return rc;
1755     }
1756    
1757    
1758    
1759    
1760     /*************************************************
1761 nigel 53 * Help function *
1762     *************************************************/
1763    
1764     static void
1765     help(void)
1766     {
1767     option_item *op;
1768    
1769 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1770 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1771 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1772 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1773    
1774     #ifdef SUPPORT_LIBZ
1775     printf("Files whose names end in .gz are read using zlib.\n");
1776     #endif
1777    
1778     #ifdef SUPPORT_LIBBZ2
1779     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1780     #endif
1781    
1782     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1783     printf("Other files and the standard input are read as plain files.\n\n");
1784     #else
1785     printf("All files are read as plain files, without any interpretation.\n\n");
1786     #endif
1787    
1788 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1789     printf("Options:\n");
1790    
1791     for (op = optionlist; op->one_char != 0; op++)
1792     {
1793     int n;
1794     char s[4];
1795 ph10 579
1796 ph10 571 /* Two options were accidentally implemented and documented with underscores
1797     instead of hyphens in their names, something that was not noticed for quite a
1798 ph10 579 few releases. When fixing this, I left the underscored versions in the list
1799     in case people were using them. However, we don't want to display them in the
1800     help data. There are no other options that contain underscores, and we do not
1801     expect ever to implement such options. Therefore, just omit any option that
1802 ph10 571 contains an underscore. */
1803 ph10 579
1804     if (strchr(op->long_name, '_') != NULL) continue;
1805    
1806 nigel 53 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1807 ph10 571 n = 31 - printf(" %s --%s", s, op->long_name);
1808 nigel 53 if (n < 1) n = 1;
1809 ph10 571 printf("%.*s%s\n", n, " ", op->help_text);
1810 nigel 53 }
1811    
1812 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1813     printf("trailing white space is removed and blank lines are ignored.\n");
1814     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1815 nigel 53
1816 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1817 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1818     }
1819    
1820    
1821    
1822    
1823     /*************************************************
1824 nigel 77 * Handle a single-letter, no data option *
1825 nigel 53 *************************************************/
1826    
1827     static int
1828     handle_option(int letter, int options)
1829     {
1830     switch(letter)
1831     {
1832 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1833 ph10 561 case N_HELP: help(); pcregrep_exit(0);
1834 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1835 ph10 535 case N_LBUFFER: line_buffered = TRUE; break;
1836 nigel 53 case 'c': count_only = TRUE; break;
1837 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1838     case 'H': filenames = FN_FORCE; break;
1839     case 'h': filenames = FN_NONE; break;
1840 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1841 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1842 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
1843 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1844 nigel 53 case 'n': number = TRUE; break;
1845 ph10 565 case 'o': only_matching = 0; break;
1846 nigel 77 case 'q': quiet = TRUE; break;
1847 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1848 nigel 53 case 's': silent = TRUE; break;
1849 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1850 nigel 53 case 'v': invert = TRUE; break;
1851 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1852     case 'x': process_options |= PO_LINE_MATCH; break;
1853 nigel 53
1854     case 'V':
1855 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1856 ph10 561 pcregrep_exit(0);
1857 nigel 53 break;
1858    
1859     default:
1860     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1861 ph10 561 pcregrep_exit(usage(2));
1862 nigel 53 }
1863    
1864     return options;
1865     }
1866    
1867    
1868    
1869    
1870     /*************************************************
1871 nigel 87 * Construct printed ordinal *
1872     *************************************************/
1873    
1874     /* This turns a number into "1st", "3rd", etc. */
1875    
1876     static char *
1877     ordin(int n)
1878     {
1879     static char buffer[8];
1880     char *p = buffer;
1881     sprintf(p, "%d", n);
1882     while (*p != 0) p++;
1883     switch (n%10)
1884     {
1885     case 1: strcpy(p, "st"); break;
1886     case 2: strcpy(p, "nd"); break;
1887     case 3: strcpy(p, "rd"); break;
1888     default: strcpy(p, "th"); break;
1889     }
1890     return buffer;
1891     }
1892    
1893    
1894    
1895     /*************************************************
1896     * Compile a single pattern *
1897     *************************************************/
1898    
1899     /* When the -F option has been used, this is called for each substring.
1900     Otherwise it's called for each supplied pattern.
1901    
1902     Arguments:
1903     pattern the pattern string
1904     options the PCRE options
1905     filename the file name, or NULL for a command-line pattern
1906     count 0 if this is the only command line pattern, or
1907     number of the command line pattern, or
1908     linenumber for a pattern from a file
1909    
1910     Returns: TRUE on success, FALSE after an error
1911     */
1912    
1913     static BOOL
1914     compile_single_pattern(char *pattern, int options, char *filename, int count)
1915     {
1916     char buffer[MBUFTHIRD + 16];
1917     const char *error;
1918     int errptr;
1919    
1920     if (pattern_count >= MAX_PATTERN_COUNT)
1921     {
1922     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1923     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1924     return FALSE;
1925     }
1926    
1927     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1928     suffix[process_options]);
1929     pattern_list[pattern_count] =
1930     pcre_compile(buffer, options, &error, &errptr, pcretables);
1931 ph10 142 if (pattern_list[pattern_count] != NULL)
1932 ph10 141 {
1933 ph10 142 pattern_count++;
1934 ph10 141 return TRUE;
1935 ph10 142 }
1936 nigel 87
1937     /* Handle compile errors */
1938    
1939     errptr -= (int)strlen(prefix[process_options]);
1940     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1941    
1942     if (filename == NULL)
1943     {
1944     if (count == 0)
1945     fprintf(stderr, "pcregrep: Error in command-line regex "
1946     "at offset %d: %s\n", errptr, error);
1947     else
1948     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1949     "at offset %d: %s\n", ordin(count), errptr, error);
1950     }
1951     else
1952     {
1953     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1954     "at offset %d: %s\n", count, filename, errptr, error);
1955     }
1956    
1957     return FALSE;
1958     }
1959    
1960    
1961    
1962     /*************************************************
1963     * Compile one supplied pattern *
1964     *************************************************/
1965    
1966     /* When the -F option has been used, each string may be a list of strings,
1967 nigel 91 separated by line breaks. They will be matched literally.
1968 nigel 87
1969     Arguments:
1970     pattern the pattern string
1971     options the PCRE options
1972     filename the file name, or NULL for a command-line pattern
1973     count 0 if this is the only command line pattern, or
1974     number of the command line pattern, or
1975     linenumber for a pattern from a file
1976    
1977     Returns: TRUE on success, FALSE after an error
1978     */
1979    
1980     static BOOL
1981     compile_pattern(char *pattern, int options, char *filename, int count)
1982     {
1983     if ((process_options & PO_FIXED_STRINGS) != 0)
1984     {
1985 nigel 93 char *eop = pattern + strlen(pattern);
1986 nigel 87 char buffer[MBUFTHIRD];
1987     for(;;)
1988     {
1989 nigel 93 int ellength;
1990     char *p = end_of_line(pattern, eop, &ellength);
1991     if (ellength == 0)
1992 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1993 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1994 nigel 93 pattern = p;
1995 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1996     return FALSE;
1997     }
1998     }
1999     else return compile_single_pattern(pattern, options, filename, count);
2000     }
2001    
2002    
2003    
2004     /*************************************************
2005 nigel 49 * Main program *
2006     *************************************************/
2007    
2008 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2009    
2010 nigel 49 int
2011     main(int argc, char **argv)
2012     {
2013 nigel 53 int i, j;
2014 nigel 49 int rc = 1;
2015 nigel 87 int pcre_options = 0;
2016     int cmd_pattern_count = 0;
2017 ph10 141 int hint_count = 0;
2018 nigel 49 int errptr;
2019 nigel 87 BOOL only_one_at_top;
2020     char *patterns[MAX_PATTERN_COUNT];
2021     const char *locale_from = "--locale";
2022 nigel 49 const char *error;
2023    
2024 nigel 93 /* Set the default line ending value from the default in the PCRE library;
2025     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2026 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
2027 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
2028 ph10 391 rather than escapes such as as '\r'. */
2029 nigel 91
2030     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2031     switch(i)
2032     {
2033 ph10 391 default: newline = (char *)"lf"; break;
2034     case 13: newline = (char *)"cr"; break;
2035     case (13 << 8) | 10: newline = (char *)"crlf"; break;
2036     case -1: newline = (char *)"any"; break;
2037     case -2: newline = (char *)"anycrlf"; break;
2038 nigel 91 }
2039    
2040 nigel 49 /* Process the options */
2041    
2042     for (i = 1; i < argc; i++)
2043     {
2044 nigel 77 option_item *op = NULL;
2045     char *option_data = (char *)""; /* default to keep compiler happy */
2046     BOOL longop;
2047     BOOL longopwasequals = FALSE;
2048    
2049 nigel 49 if (argv[i][0] != '-') break;
2050 nigel 53
2051 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2052 nigel 87 but only if we have previously had -e or -f to define the patterns. */
2053 nigel 63
2054 nigel 77 if (argv[i][1] == 0)
2055     {
2056 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
2057 ph10 561 else pcregrep_exit(usage(2));
2058 nigel 77 }
2059 nigel 63
2060 nigel 77 /* Handle a long name option, or -- to terminate the options */
2061 nigel 53
2062     if (argv[i][1] == '-')
2063 nigel 49 {
2064 nigel 77 char *arg = argv[i] + 2;
2065     char *argequals = strchr(arg, '=');
2066 nigel 53
2067 nigel 77 if (*arg == 0) /* -- terminates options */
2068 nigel 49 {
2069 nigel 77 i++;
2070     break; /* out of the options-handling loop */
2071 nigel 53 }
2072 nigel 49
2073 nigel 77 longop = TRUE;
2074    
2075     /* Some long options have data that follows after =, for example file=name.
2076     Some options have variations in the long name spelling: specifically, we
2077     allow "regexp" because GNU grep allows it, though I personally go along
2078 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2079 ph10 422 These options are entered in the table as "regex(p)". Options can be in
2080     both these categories. */
2081 nigel 77
2082 nigel 53 for (op = optionlist; op->one_char != 0; op++)
2083     {
2084 nigel 77 char *opbra = strchr(op->long_name, '(');
2085     char *equals = strchr(op->long_name, '=');
2086 ph10 461
2087 ph10 422 /* Handle options with only one spelling of the name */
2088 ph10 461
2089 ph10 422 if (opbra == NULL) /* Does not contain '(' */
2090 nigel 53 {
2091 nigel 77 if (equals == NULL) /* Not thing=data case */
2092     {
2093     if (strcmp(arg, op->long_name) == 0) break;
2094     }
2095     else /* Special case xxx=data */
2096     {
2097 ph10 530 int oplen = (int)(equals - op->long_name);
2098 ph10 535 int arglen = (argequals == NULL)?
2099 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2100 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2101     {
2102     option_data = arg + arglen;
2103     if (*option_data == '=')
2104     {
2105     option_data++;
2106     longopwasequals = TRUE;
2107     }
2108     break;
2109     }
2110     }
2111 nigel 53 }
2112 ph10 461
2113 ph10 422 /* Handle options with an alternate spelling of the name */
2114 ph10 461
2115     else
2116 nigel 77 {
2117     char buff1[24];
2118     char buff2[24];
2119 ph10 461
2120 ph10 530 int baselen = (int)(opbra - op->long_name);
2121     int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2122 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
2123 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2124 ph10 461
2125 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
2126 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2127 ph10 461
2128     if (strncmp(arg, buff1, arglen) == 0 ||
2129 ph10 422 strncmp(arg, buff2, arglen) == 0)
2130     {
2131     if (equals != NULL && argequals != NULL)
2132     {
2133 ph10 461 option_data = argequals;
2134 ph10 422 if (*option_data == '=')
2135     {
2136 ph10 461 option_data++;
2137 ph10 422 longopwasequals = TRUE;
2138 ph10 461 }
2139     }
2140 nigel 77 break;
2141 ph10 461 }
2142 nigel 77 }
2143 nigel 53 }
2144 nigel 77
2145 nigel 53 if (op->one_char == 0)
2146     {
2147     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2148 ph10 561 pcregrep_exit(usage(2));
2149 nigel 53 }
2150     }
2151 nigel 49
2152 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2153     are not in the right form for putting in the option table because they use
2154     only one hyphen, yet are more than one character long. By putting them
2155     separately here, they will not get displayed as part of the help() output,
2156     but I don't think Jeffrey will care about that. */
2157    
2158     #ifdef JFRIEDL_DEBUG
2159     else if (strcmp(argv[i], "-pre") == 0) {
2160     jfriedl_prefix = argv[++i];
2161     continue;
2162     } else if (strcmp(argv[i], "-post") == 0) {
2163     jfriedl_postfix = argv[++i];
2164     continue;
2165     } else if (strcmp(argv[i], "-XT") == 0) {
2166     sscanf(argv[++i], "%d", &jfriedl_XT);
2167     continue;
2168     } else if (strcmp(argv[i], "-XR") == 0) {
2169     sscanf(argv[++i], "%d", &jfriedl_XR);
2170     continue;
2171     }
2172     #endif
2173    
2174    
2175 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2176     continue till we hit the last one or one that needs data. */
2177 nigel 53
2178     else
2179     {
2180     char *s = argv[i] + 1;
2181 nigel 77 longop = FALSE;
2182 nigel 53 while (*s != 0)
2183     {
2184 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2185 ph10 579 {
2186     if (*s == op->one_char) break;
2187 ph10 565 }
2188 nigel 77 if (op->one_char == 0)
2189 nigel 53 {
2190 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2191     *s, argv[i]);
2192 ph10 561 pcregrep_exit(usage(2));
2193 nigel 77 }
2194 ph10 579
2195 ph10 565 /* Check for a single-character option that has data: OP_OP_NUMBER
2196 ph10 579 is used for one that either has a numerical number or defaults, i.e. the
2197 ph10 565 data is optional. If a digit follows, there is data; if not, carry on
2198     with other single-character options in the same string. */
2199 ph10 579
2200 ph10 565 option_data = s+1;
2201     if (op->type == OP_OP_NUMBER)
2202 ph10 579 {
2203     if (isdigit((unsigned char)s[1])) break;
2204 nigel 53 }
2205 ph10 565 else /* Check for end or a dataless option */
2206 ph10 579 {
2207 ph10 565 if (op->type != OP_NODATA || s[1] == 0) break;
2208 ph10 579 }
2209    
2210     /* Handle a single-character option with no data, then loop for the
2211 ph10 565 next character in the string. */
2212    
2213 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2214 nigel 49 }
2215     }
2216 nigel 77
2217 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2218     is NO_DATA, it means that there is no data, and the option might set
2219     something in the PCRE options. */
2220 nigel 77
2221     if (op->type == OP_NODATA)
2222     {
2223 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2224     continue;
2225     }
2226    
2227     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2228     either has a value or defaults to something. It cannot have data in a
2229 ph10 579 separate item. At the moment, the only such options are "colo(u)r",
2230 ph10 565 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2231 nigel 87
2232     if (*option_data == 0 &&
2233     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2234     {
2235     switch (op->one_char)
2236 nigel 77 {
2237 nigel 87 case N_COLOUR:
2238     colour_option = (char *)"auto";
2239     break;
2240 ph10 579
2241 ph10 565 case 'o':
2242     only_matching = 0;
2243 ph10 579 break;
2244    
2245 nigel 87 #ifdef JFRIEDL_DEBUG
2246     case 'S':
2247     S_arg = 0;
2248     break;
2249     #endif
2250 nigel 77 }
2251 nigel 87 continue;
2252     }
2253 nigel 77
2254 nigel 87 /* Otherwise, find the data string for the option. */
2255    
2256     if (*option_data == 0)
2257     {
2258     if (i >= argc - 1 || longopwasequals)
2259 nigel 77 {
2260 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2261 ph10 561 pcregrep_exit(usage(2));
2262 nigel 87 }
2263     option_data = argv[++i];
2264     }
2265    
2266     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2267     multiple times to create a list of patterns. */
2268    
2269     if (op->type == OP_PATLIST)
2270     {
2271     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2272     {
2273     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2274     MAX_PATTERN_COUNT);
2275     return 2;
2276     }
2277     patterns[cmd_pattern_count++] = option_data;
2278     }
2279    
2280     /* Otherwise, deal with single string or numeric data values. */
2281    
2282 ph10 584 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2283     op->type != OP_OP_NUMBER)
2284 nigel 87 {
2285     *((char **)op->dataptr) = option_data;
2286     }
2287 ph10 558
2288     /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2289     only for unpicking arguments, so just keep it simple. */
2290    
2291 nigel 87 else
2292     {
2293 ph10 561 unsigned long int n = 0;
2294 ph10 558 char *endptr = option_data;
2295     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2296     while (isdigit((unsigned char)(*endptr)))
2297     n = n * 10 + (int)(*endptr++ - '0');
2298 nigel 87 if (*endptr != 0)
2299     {
2300     if (longop)
2301 nigel 77 {
2302 nigel 87 char *equals = strchr(op->long_name, '=');
2303     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2304 ph10 530 (int)(equals - op->long_name);
2305 nigel 87 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2306     option_data, nlen, op->long_name);
2307 nigel 77 }
2308 nigel 87 else
2309     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2310     option_data, op->one_char);
2311 ph10 561 pcregrep_exit(usage(2));
2312 nigel 77 }
2313 ph10 584 if (op->type == OP_LONGNUMBER)
2314     *((unsigned long int *)op->dataptr) = n;
2315     else
2316     *((int *)op->dataptr) = n;
2317 nigel 77 }
2318 nigel 49 }
2319    
2320 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2321     for -A and -B. */
2322    
2323     if (both_context > 0)
2324     {
2325     if (after_context == 0) after_context = both_context;
2326     if (before_context == 0) before_context = both_context;
2327     }
2328 ph10 286
2329     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2330 ph10 565 However, the latter two set only_matching. */
2331 nigel 77
2332 ph10 565 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2333 ph10 286 (file_offsets && line_offsets))
2334 ph10 280 {
2335     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2336     "and/or --line-offsets\n");
2337 ph10 561 pcregrep_exit(usage(2));
2338 ph10 280 }
2339    
2340 ph10 565 if (file_offsets || line_offsets) only_matching = 0;
2341 ph10 286
2342 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2343     LC_ALL environment variable is set, and if so, use it. */
2344 nigel 49
2345 nigel 87 if (locale == NULL)
2346 nigel 53 {
2347 nigel 87 locale = getenv("LC_ALL");
2348     locale_from = "LCC_ALL";
2349 nigel 53 }
2350 nigel 49
2351 nigel 87 if (locale == NULL)
2352     {
2353     locale = getenv("LC_CTYPE");
2354     locale_from = "LC_CTYPE";
2355     }
2356 nigel 49
2357 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2358     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2359    
2360     if (locale != NULL)
2361 nigel 49 {
2362 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2363 nigel 53 {
2364 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2365     locale, locale_from);
2366 nigel 53 return 2;
2367     }
2368 nigel 87 pcretables = pcre_maketables();
2369     }
2370 nigel 77
2371 nigel 87 /* Sort out colouring */
2372    
2373     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2374     {
2375     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2376     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2377     else
2378 nigel 53 {
2379 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2380     colour_option);
2381     return 2;
2382 nigel 77 }
2383 nigel 87 if (do_colour)
2384 nigel 77 {
2385 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2386     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2387     if (cs != NULL) colour_string = cs;
2388 nigel 77 }
2389 nigel 87 }
2390 ph10 535
2391 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2392    
2393     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2394     {
2395     pcre_options |= PCRE_NEWLINE_CR;
2396 nigel 93 endlinetype = EL_CR;
2397 nigel 91 }
2398     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2399     {
2400     pcre_options |= PCRE_NEWLINE_LF;
2401 nigel 93 endlinetype = EL_LF;
2402 nigel 91 }
2403     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2404     {
2405     pcre_options |= PCRE_NEWLINE_CRLF;
2406 nigel 93 endlinetype = EL_CRLF;
2407 nigel 91 }
2408 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2409     {
2410     pcre_options |= PCRE_NEWLINE_ANY;
2411     endlinetype = EL_ANY;
2412     }
2413 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2414     {
2415     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2416     endlinetype = EL_ANYCRLF;
2417     }
2418 nigel 91 else
2419     {
2420     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2421     return 2;
2422     }
2423    
2424 nigel 87 /* Interpret the text values for -d and -D */
2425    
2426     if (dee_option != NULL)
2427     {
2428     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2429     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2430     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2431     else
2432 nigel 77 {
2433 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2434     return 2;
2435 nigel 53 }
2436 nigel 49 }
2437    
2438 nigel 87 if (DEE_option != NULL)
2439     {
2440     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2441     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2442     else
2443     {
2444     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2445     return 2;
2446     }
2447     }
2448 nigel 49
2449 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2450 nigel 87
2451     #ifdef JFRIEDL_DEBUG
2452     if (S_arg > 9)
2453 nigel 49 {
2454 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2455     return 2;
2456     }
2457 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2458     {
2459     if (jfriedl_XT == 0) jfriedl_XT = 1;
2460     if (jfriedl_XR == 0) jfriedl_XR = 1;
2461     }
2462 nigel 87 #endif
2463 nigel 77
2464 nigel 87 /* Get memory to store the pattern and hints lists. */
2465    
2466     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2467     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2468    
2469     if (pattern_list == NULL || hints_list == NULL)
2470     {
2471     fprintf(stderr, "pcregrep: malloc failed\n");
2472 ph10 123 goto EXIT2;
2473 nigel 87 }
2474    
2475     /* If no patterns were provided by -e, and there is no file provided by -f,
2476     the first argument is the one and only pattern, and it must exist. */
2477    
2478     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2479     {
2480 nigel 63 if (i >= argc) return usage(2);
2481 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2482     }
2483 nigel 77
2484 nigel 87 /* Compile the patterns that were provided on the command line, either by
2485     multiple uses of -e or as a single unkeyed pattern. */
2486    
2487     for (j = 0; j < cmd_pattern_count; j++)
2488     {
2489     if (!compile_pattern(patterns[j], pcre_options, NULL,
2490     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2491 ph10 123 goto EXIT2;
2492 nigel 87 }
2493    
2494     /* Compile the regular expressions that are provided in a file. */
2495    
2496     if (pattern_filename != NULL)
2497     {
2498     int linenumber = 0;
2499     FILE *f;
2500     char *filename;
2501     char buffer[MBUFTHIRD];
2502    
2503     if (strcmp(pattern_filename, "-") == 0)
2504 nigel 77 {
2505 nigel 87 f = stdin;
2506     filename = stdin_name;
2507 nigel 77 }
2508 nigel 87 else
2509 nigel 77 {
2510 nigel 87 f = fopen(pattern_filename, "r");
2511     if (f == NULL)
2512     {
2513     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2514     strerror(errno));
2515 ph10 123 goto EXIT2;
2516 nigel 87 }
2517     filename = pattern_filename;
2518 nigel 77 }
2519    
2520 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2521 nigel 53 {
2522 nigel 87 char *s = buffer + (int)strlen(buffer);
2523     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2524     *s = 0;
2525     linenumber++;
2526     if (buffer[0] == 0) continue; /* Skip blank lines */
2527     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2528 ph10 121 goto EXIT2;
2529 nigel 53 }
2530 nigel 87
2531     if (f != stdin) fclose(f);
2532 nigel 49 }
2533    
2534 nigel 77 /* Study the regular expressions, as we will be running them many times */
2535 nigel 53
2536     for (j = 0; j < pattern_count; j++)
2537     {
2538     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2539     if (error != NULL)
2540     {
2541     char s[16];
2542     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2543     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2544 ph10 121 goto EXIT2;
2545 nigel 53 }
2546 ph10 142 hint_count++;
2547 nigel 53 }
2548 ph10 579
2549 ph10 561 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2550     pcre_extra block for each pattern. */
2551 nigel 53
2552 ph10 561 if (match_limit > 0 || match_limit_recursion > 0)
2553     {
2554     for (j = 0; j < pattern_count; j++)
2555     {
2556     if (hints_list[j] == NULL)
2557     {
2558     hints_list[j] = malloc(sizeof(pcre_extra));
2559 ph10 579 if (hints_list[j] == NULL)
2560 ph10 561 {
2561     fprintf(stderr, "pcregrep: malloc failed\n");
2562     pcregrep_exit(2);
2563     }
2564     }
2565     if (match_limit > 0)
2566 ph10 579 {
2567 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2568     hints_list[j]->match_limit = match_limit;
2569 ph10 579 }
2570 ph10 561 if (match_limit_recursion > 0)
2571 ph10 579 {
2572 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2573     hints_list[j]->match_limit_recursion = match_limit_recursion;
2574 ph10 579 }
2575 ph10 561 }
2576 ph10 579 }
2577 ph10 561
2578 nigel 77 /* If there are include or exclude patterns, compile them. */
2579    
2580     if (exclude_pattern != NULL)
2581     {
2582 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2583     pcretables);
2584 nigel 77 if (exclude_compiled == NULL)
2585     {
2586     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2587     errptr, error);
2588 ph10 121 goto EXIT2;
2589 nigel 77 }
2590     }
2591    
2592     if (include_pattern != NULL)
2593     {
2594 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2595     pcretables);
2596 nigel 77 if (include_compiled == NULL)
2597     {
2598     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2599     errptr, error);
2600 ph10 121 goto EXIT2;
2601 nigel 77 }
2602     }
2603    
2604 ph10 325 if (exclude_dir_pattern != NULL)
2605     {
2606     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2607     pcretables);
2608     if (exclude_dir_compiled == NULL)
2609     {
2610     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2611     errptr, error);
2612     goto EXIT2;
2613     }
2614     }
2615    
2616     if (include_dir_pattern != NULL)
2617     {
2618     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2619     pcretables);
2620     if (include_dir_compiled == NULL)
2621     {
2622     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2623     errptr, error);
2624     goto EXIT2;
2625     }
2626     }
2627    
2628 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2629 nigel 49
2630 nigel 87 if (i >= argc)
2631 ph10 121 {
2632 ph10 286 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2633 ph10 121 goto EXIT;
2634 ph10 123 }
2635 nigel 49
2636 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2637     Pass in the fact that there is only one argument at top level - this suppresses
2638 nigel 87 the file name if the argument is not a directory and filenames are not
2639     otherwise forced. */
2640 nigel 49
2641 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2642 nigel 49
2643     for (; i < argc; i++)
2644     {
2645 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2646     only_one_at_top);
2647 nigel 77 if (frc > 1) rc = frc;
2648     else if (frc == 0 && rc == 1) rc = 0;
2649 nigel 49 }
2650    
2651 ph10 121 EXIT:
2652     if (pattern_list != NULL)
2653     {
2654 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2655 ph10 121 free(pattern_list);
2656 ph10 123 }
2657 ph10 121 if (hints_list != NULL)
2658     {
2659 ph10 579 for (i = 0; i < hint_count; i++)
2660 ph10 561 {
2661     if (hints_list[i] != NULL) free(hints_list[i]);
2662 ph10 579 }
2663 ph10 121 free(hints_list);
2664 ph10 123 }
2665 ph10 561 pcregrep_exit(rc);
2666 ph10 121
2667     EXIT2:
2668     rc = 2;
2669     goto EXIT;
2670 nigel 49 }
2671    
2672 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12