/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 586 - (hide annotations) (download)
Wed Jan 12 17:36:47 2011 UTC (3 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 76954 byte(s)
Move definition of pcregrep_exit() above its first reference; this applies only 
to Windows.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 584 Copyright (c) 1997-2011 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 ph10 378 #define OFFSET_SIZE 99
75 nigel 49
76 nigel 77 #if BUFSIZ > 8192
77     #define MBUFTHIRD BUFSIZ
78     #else
79     #define MBUFTHIRD 8192
80     #endif
81 nigel 49
82 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
83     output. The order is important; it is assumed that a file name is wanted for
84     all values greater than FN_DEFAULT. */
85 nigel 77
86 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87 nigel 87
88 ph10 286 /* File reading styles */
89    
90     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92 nigel 87 /* Actions for the -d and -D options */
93    
94     enum { dee_READ, dee_SKIP, dee_RECURSE };
95     enum { DEE_READ, DEE_SKIP };
96    
97     /* Actions for special processing options (flag bits) */
98    
99     #define PO_WORD_MATCH 0x0001
100     #define PO_LINE_MATCH 0x0002
101     #define PO_FIXED_STRINGS 0x0004
102    
103 nigel 93 /* Line ending types */
104 nigel 87
105 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 nigel 87
107 ph10 535 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108     environments), a warning is issued if the value of fwrite() is ignored.
109     Unfortunately, casting to (void) does not suppress the warning. To get round
110     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 ph10 515 apply to fprintf(). */
112 nigel 93
113 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114 nigel 93
115 ph10 515
116    
117 nigel 49 /*************************************************
118     * Global variables *
119     *************************************************/
120    
121 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
122     regular code. */
123    
124     #ifdef JFRIEDL_DEBUG
125     static int S_arg = -1;
126 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128     static const char *jfriedl_prefix = "";
129     static const char *jfriedl_postfix = "";
130 nigel 87 #endif
131    
132 nigel 93 static int endlinetype;
133 nigel 91
134 nigel 87 static char *colour_string = (char *)"1;31";
135     static char *colour_option = NULL;
136     static char *dee_option = NULL;
137     static char *DEE_option = NULL;
138 nigel 91 static char *newline = NULL;
139 nigel 53 static char *pattern_filename = NULL;
140 nigel 77 static char *stdin_name = (char *)"(standard input)";
141 nigel 87 static char *locale = NULL;
142    
143     static const unsigned char *pcretables = NULL;
144    
145 nigel 53 static int pattern_count = 0;
146 ph10 121 static pcre **pattern_list = NULL;
147     static pcre_extra **hints_list = NULL;
148 nigel 49
149 nigel 77 static char *include_pattern = NULL;
150     static char *exclude_pattern = NULL;
151 ph10 325 static char *include_dir_pattern = NULL;
152     static char *exclude_dir_pattern = NULL;
153 nigel 77
154     static pcre *include_compiled = NULL;
155     static pcre *exclude_compiled = NULL;
156 ph10 325 static pcre *include_dir_compiled = NULL;
157     static pcre *exclude_dir_compiled = NULL;
158 nigel 77
159     static int after_context = 0;
160     static int before_context = 0;
161     static int both_context = 0;
162 nigel 87 static int dee_action = dee_READ;
163     static int DEE_action = DEE_READ;
164     static int error_count = 0;
165     static int filenames = FN_DEFAULT;
166 ph10 565 static int only_matching = -1;
167 nigel 87 static int process_options = 0;
168 nigel 77
169 ph10 561 static unsigned long int match_limit = 0;
170     static unsigned long int match_limit_recursion = 0;
171    
172 nigel 49 static BOOL count_only = FALSE;
173 nigel 87 static BOOL do_colour = FALSE;
174 ph10 280 static BOOL file_offsets = FALSE;
175 nigel 77 static BOOL hyphenpending = FALSE;
176 nigel 49 static BOOL invert = FALSE;
177 ph10 519 static BOOL line_buffered = FALSE;
178 ph10 280 static BOOL line_offsets = FALSE;
179 nigel 77 static BOOL multiline = FALSE;
180 nigel 49 static BOOL number = FALSE;
181 ph10 420 static BOOL omit_zero_count = FALSE;
182 ph10 561 static BOOL resource_error = FALSE;
183 nigel 77 static BOOL quiet = FALSE;
184 nigel 49 static BOOL silent = FALSE;
185 nigel 93 static BOOL utf8 = FALSE;
186 nigel 49
187 nigel 53 /* Structure for options and list of them */
188 nigel 49
189 ph10 584 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
190     OP_OP_NUMBER, OP_PATLIST };
191 nigel 77
192 nigel 53 typedef struct option_item {
193 nigel 77 int type;
194 nigel 53 int one_char;
195 nigel 77 void *dataptr;
196 nigel 67 const char *long_name;
197     const char *help_text;
198 nigel 53 } option_item;
199 nigel 49
200 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
201     used to identify them. */
202    
203 ph10 325 #define N_COLOUR (-1)
204     #define N_EXCLUDE (-2)
205     #define N_EXCLUDE_DIR (-3)
206     #define N_HELP (-4)
207     #define N_INCLUDE (-5)
208     #define N_INCLUDE_DIR (-6)
209     #define N_LABEL (-7)
210     #define N_LOCALE (-8)
211     #define N_NULL (-9)
212     #define N_LOFFSETS (-10)
213     #define N_FOFFSETS (-11)
214 ph10 519 #define N_LBUFFER (-12)
215 ph10 561 #define N_M_LIMIT (-13)
216     #define N_M_LIMIT_REC (-14)
217 nigel 87
218 nigel 53 static option_item optionlist[] = {
219 ph10 584 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
220     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
221     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
222     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
223     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
224     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
225     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
226     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
227     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
228     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
229     { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
230     { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
231     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
232     { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
233     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
234     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
235     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
236     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
237     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
238     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
239     { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
240     { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
241     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
242     { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
243     { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
245     { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
247     { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
248     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
249     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
250     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
251     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
252     { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
253     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
254 ph10 571
255     /* These two were accidentally implemented with underscores instead of
256     hyphens in the option names. As this was not discovered for several releases,
257     the incorrect versions are left in the table for compatibility. However, the
258     --help function misses out any option that has an underscore in its name. */
259 ph10 579
260 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
261     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
262 ph10 571
263 nigel 87 #ifdef JFRIEDL_DEBUG
264     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
265     #endif
266     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
267     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
268     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
269     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
270     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
271     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
272     { OP_NODATA, 0, NULL, NULL, NULL }
273 nigel 53 };
274    
275 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
276     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
277     that the combination of -w and -x has the same effect as -x on its own, so we
278     can treat them as the same. */
279 nigel 53
280 nigel 87 static const char *prefix[] = {
281     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
282    
283     static const char *suffix[] = {
284     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
285    
286 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
287 nigel 87
288 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
289 nigel 87
290 nigel 93 const char utf8_table4[] = {
291     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
292     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
293     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
294     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
295    
296    
297    
298 nigel 53 /*************************************************
299 ph10 586 * Exit from the program *
300     *************************************************/
301    
302     /* If there has been a resource error, give a suitable message.
303    
304     Argument: the return code
305     Returns: does not return
306     */
307    
308     static void
309     pcregrep_exit(int rc)
310     {
311     if (resource_error)
312     {
313     fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
314     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
315     fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
316     }
317    
318     exit(rc);
319     }
320    
321    
322     /*************************************************
323 nigel 87 * OS-specific functions *
324 nigel 53 *************************************************/
325    
326     /* These functions are defined so that they can be made system specific,
327 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
328 nigel 53
329    
330     /************* Directory scanning in Unix ***********/
331    
332 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
333 nigel 53 #include <sys/types.h>
334     #include <sys/stat.h>
335     #include <dirent.h>
336    
337     typedef DIR directory_type;
338    
339 nigel 67 static int
340 nigel 53 isdirectory(char *filename)
341     {
342     struct stat statbuf;
343     if (stat(filename, &statbuf) < 0)
344     return 0; /* In the expectation that opening as a file will fail */
345     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
346     }
347    
348 nigel 67 static directory_type *
349 nigel 53 opendirectory(char *filename)
350     {
351     return opendir(filename);
352     }
353    
354 nigel 67 static char *
355 nigel 53 readdirectory(directory_type *dir)
356     {
357     for (;;)
358     {
359     struct dirent *dent = readdir(dir);
360     if (dent == NULL) return NULL;
361     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
362     return dent->d_name;
363     }
364 ph10 151 /* Control never reaches here */
365 nigel 53 }
366    
367 nigel 67 static void
368 nigel 53 closedirectory(directory_type *dir)
369     {
370     closedir(dir);
371     }
372    
373    
374 nigel 87 /************* Test for regular file in Unix **********/
375    
376     static int
377     isregfile(char *filename)
378     {
379     struct stat statbuf;
380     if (stat(filename, &statbuf) < 0)
381     return 1; /* In the expectation that opening as a file will fail */
382     return (statbuf.st_mode & S_IFMT) == S_IFREG;
383     }
384    
385    
386 ph10 519 /************* Test for a terminal in Unix **********/
387 nigel 87
388     static BOOL
389     is_stdout_tty(void)
390     {
391     return isatty(fileno(stdout));
392     }
393    
394 ph10 519 static BOOL
395     is_file_tty(FILE *f)
396     {
397     return isatty(fileno(f));
398     }
399 nigel 87
400 ph10 519
401 nigel 63 /************* Directory scanning in Win32 ***********/
402 nigel 53
403 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
404 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
405 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
406     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
407 ph10 558 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
408     undefined when it is indeed undefined. */
409 nigel 53
410 ph10 558 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
411 nigel 63
412     #ifndef STRICT
413     # define STRICT
414     #endif
415     #ifndef WIN32_LEAN_AND_MEAN
416     # define WIN32_LEAN_AND_MEAN
417     #endif
418 ph10 283
419     #include <windows.h>
420    
421 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
422     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
423     #endif
424    
425 nigel 63 typedef struct directory_type
426     {
427     HANDLE handle;
428     BOOL first;
429     WIN32_FIND_DATA data;
430     } directory_type;
431    
432     int
433     isdirectory(char *filename)
434     {
435     DWORD attr = GetFileAttributes(filename);
436     if (attr == INVALID_FILE_ATTRIBUTES)
437     return 0;
438     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
439     }
440    
441     directory_type *
442     opendirectory(char *filename)
443     {
444     size_t len;
445     char *pattern;
446     directory_type *dir;
447     DWORD err;
448     len = strlen(filename);
449     pattern = (char *) malloc(len + 3);
450     dir = (directory_type *) malloc(sizeof(*dir));
451     if ((pattern == NULL) || (dir == NULL))
452     {
453     fprintf(stderr, "pcregrep: malloc failed\n");
454 ph10 561 pcregrep_exit(2);
455 nigel 63 }
456     memcpy(pattern, filename, len);
457     memcpy(&(pattern[len]), "\\*", 3);
458     dir->handle = FindFirstFile(pattern, &(dir->data));
459     if (dir->handle != INVALID_HANDLE_VALUE)
460     {
461     free(pattern);
462     dir->first = TRUE;
463     return dir;
464     }
465     err = GetLastError();
466     free(pattern);
467     free(dir);
468     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
469     return NULL;
470     }
471    
472     char *
473     readdirectory(directory_type *dir)
474     {
475     for (;;)
476     {
477     if (!dir->first)
478     {
479     if (!FindNextFile(dir->handle, &(dir->data)))
480     return NULL;
481     }
482     else
483     {
484     dir->first = FALSE;
485     }
486     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
487     return dir->data.cFileName;
488     }
489     #ifndef _MSC_VER
490     return NULL; /* Keep compiler happy; never executed */
491     #endif
492     }
493    
494     void
495     closedirectory(directory_type *dir)
496     {
497     FindClose(dir->handle);
498     free(dir);
499     }
500    
501    
502 nigel 87 /************* Test for regular file in Win32 **********/
503    
504     /* I don't know how to do this, or if it can be done; assume all paths are
505     regular if they are not directories. */
506    
507     int isregfile(char *filename)
508     {
509 ph10 283 return !isdirectory(filename);
510 nigel 87 }
511    
512    
513 ph10 519 /************* Test for a terminal in Win32 **********/
514 nigel 87
515     /* I don't know how to do this; assume never */
516    
517     static BOOL
518     is_stdout_tty(void)
519     {
520 ph10 283 return FALSE;
521 nigel 87 }
522    
523 ph10 519 static BOOL
524     is_file_tty(FILE *f)
525     {
526     return FALSE;
527     }
528 nigel 87
529 ph10 519
530 nigel 53 /************* Directory scanning when we can't do it ***********/
531    
532     /* The type is void, and apart from isdirectory(), the functions do nothing. */
533    
534 nigel 63 #else
535    
536 nigel 53 typedef void directory_type;
537    
538 nigel 87 int isdirectory(char *filename) { return 0; }
539 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
540     char *readdirectory(directory_type *dir) { return (char*)0;}
541 nigel 53 void closedirectory(directory_type *dir) {}
542    
543 nigel 87
544     /************* Test for regular when we can't do it **********/
545    
546     /* Assume all files are regular. */
547    
548     int isregfile(char *filename) { return 1; }
549    
550    
551 ph10 519 /************* Test for a terminal when we can't do it **********/
552 nigel 87
553     static BOOL
554     is_stdout_tty(void)
555     {
556     return FALSE;
557     }
558    
559 ph10 519 static BOOL
560     is_file_tty(FILE *f)
561     {
562     return FALSE;
563     }
564 nigel 87
565 nigel 53 #endif
566    
567    
568    
569 ph10 137 #ifndef HAVE_STRERROR
570 nigel 49 /*************************************************
571     * Provide strerror() for non-ANSI libraries *
572     *************************************************/
573    
574     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
575     in their libraries, but can provide the same facility by this simple
576     alternative function. */
577    
578     extern int sys_nerr;
579     extern char *sys_errlist[];
580    
581     char *
582     strerror(int n)
583     {
584     if (n < 0 || n >= sys_nerr) return "unknown error number";
585     return sys_errlist[n];
586     }
587     #endif /* HAVE_STRERROR */
588    
589    
590    
591     /*************************************************
592 ph10 519 * Read one line of input *
593     *************************************************/
594    
595 ph10 535 /* Normally, input is read using fread() into a large buffer, so many lines may
596     be read at once. However, doing this for tty input means that no output appears
597 ph10 519 until a lot of input has been typed. Instead, tty input is handled line by
598     line. We cannot use fgets() for this, because it does not stop at a binary
599 ph10 535 zero, and therefore there is no way of telling how many characters it has read,
600 ph10 519 because there may be binary zeros embedded in the data.
601    
602     Arguments:
603     buffer the buffer to read into
604     length the maximum number of characters to read
605     f the file
606 ph10 535
607 ph10 519 Returns: the number of characters read, zero at end of file
608 ph10 535 */
609 ph10 519
610     static int
611     read_one_line(char *buffer, int length, FILE *f)
612     {
613     int c;
614     int yield = 0;
615     while ((c = fgetc(f)) != EOF)
616     {
617     buffer[yield++] = c;
618 ph10 535 if (c == '\n' || yield >= length) break;
619     }
620     return yield;
621 ph10 519 }
622    
623    
624    
625     /*************************************************
626 nigel 93 * Find end of line *
627     *************************************************/
628    
629     /* The length of the endline sequence that is found is set via lenptr. This may
630     be zero at the very end of the file if there is no line-ending sequence there.
631    
632     Arguments:
633     p current position in line
634     endptr end of available data
635     lenptr where to put the length of the eol sequence
636    
637     Returns: pointer to the last byte of the line
638     */
639    
640     static char *
641     end_of_line(char *p, char *endptr, int *lenptr)
642     {
643     switch(endlinetype)
644     {
645     default: /* Just in case */
646     case EL_LF:
647     while (p < endptr && *p != '\n') p++;
648     if (p < endptr)
649     {
650     *lenptr = 1;
651     return p + 1;
652     }
653     *lenptr = 0;
654     return endptr;
655    
656     case EL_CR:
657     while (p < endptr && *p != '\r') p++;
658     if (p < endptr)
659     {
660     *lenptr = 1;
661     return p + 1;
662     }
663     *lenptr = 0;
664     return endptr;
665    
666     case EL_CRLF:
667     for (;;)
668     {
669     while (p < endptr && *p != '\r') p++;
670     if (++p >= endptr)
671     {
672     *lenptr = 0;
673     return endptr;
674     }
675     if (*p == '\n')
676     {
677     *lenptr = 2;
678     return p + 1;
679     }
680     }
681     break;
682    
683 ph10 149 case EL_ANYCRLF:
684     while (p < endptr)
685     {
686     int extra = 0;
687     register int c = *((unsigned char *)p);
688    
689     if (utf8 && c >= 0xc0)
690     {
691     int gcii, gcss;
692     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
693     gcss = 6*extra;
694     c = (c & utf8_table3[extra]) << gcss;
695     for (gcii = 1; gcii <= extra; gcii++)
696     {
697     gcss -= 6;
698     c |= (p[gcii] & 0x3f) << gcss;
699     }
700     }
701    
702     p += 1 + extra;
703    
704     switch (c)
705     {
706     case 0x0a: /* LF */
707     *lenptr = 1;
708     return p;
709    
710     case 0x0d: /* CR */
711     if (p < endptr && *p == 0x0a)
712     {
713     *lenptr = 2;
714     p++;
715     }
716     else *lenptr = 1;
717     return p;
718 ph10 150
719 ph10 149 default:
720     break;
721     }
722     } /* End of loop for ANYCRLF case */
723 ph10 150
724 ph10 149 *lenptr = 0; /* Must have hit the end */
725     return endptr;
726    
727 nigel 93 case EL_ANY:
728     while (p < endptr)
729     {
730     int extra = 0;
731     register int c = *((unsigned char *)p);
732    
733     if (utf8 && c >= 0xc0)
734     {
735     int gcii, gcss;
736     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
737     gcss = 6*extra;
738     c = (c & utf8_table3[extra]) << gcss;
739     for (gcii = 1; gcii <= extra; gcii++)
740     {
741     gcss -= 6;
742     c |= (p[gcii] & 0x3f) << gcss;
743     }
744     }
745    
746     p += 1 + extra;
747    
748     switch (c)
749     {
750     case 0x0a: /* LF */
751     case 0x0b: /* VT */
752     case 0x0c: /* FF */
753     *lenptr = 1;
754     return p;
755    
756     case 0x0d: /* CR */
757     if (p < endptr && *p == 0x0a)
758     {
759     *lenptr = 2;
760     p++;
761     }
762     else *lenptr = 1;
763     return p;
764    
765     case 0x85: /* NEL */
766     *lenptr = utf8? 2 : 1;
767     return p;
768    
769     case 0x2028: /* LS */
770     case 0x2029: /* PS */
771     *lenptr = 3;
772     return p;
773    
774     default:
775     break;
776     }
777     } /* End of loop for ANY case */
778    
779     *lenptr = 0; /* Must have hit the end */
780     return endptr;
781     } /* End of overall switch */
782     }
783    
784    
785    
786     /*************************************************
787     * Find start of previous line *
788     *************************************************/
789    
790     /* This is called when looking back for before lines to print.
791    
792     Arguments:
793     p start of the subsequent line
794     startptr start of available data
795    
796     Returns: pointer to the start of the previous line
797     */
798    
799     static char *
800     previous_line(char *p, char *startptr)
801     {
802     switch(endlinetype)
803     {
804     default: /* Just in case */
805     case EL_LF:
806     p--;
807     while (p > startptr && p[-1] != '\n') p--;
808     return p;
809    
810     case EL_CR:
811     p--;
812     while (p > startptr && p[-1] != '\n') p--;
813     return p;
814    
815     case EL_CRLF:
816     for (;;)
817     {
818     p -= 2;
819     while (p > startptr && p[-1] != '\n') p--;
820     if (p <= startptr + 1 || p[-2] == '\r') return p;
821     }
822     return p; /* But control should never get here */
823    
824     case EL_ANY:
825 ph10 150 case EL_ANYCRLF:
826 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
827     if (utf8) while ((*p & 0xc0) == 0x80) p--;
828    
829     while (p > startptr)
830     {
831     register int c;
832     char *pp = p - 1;
833    
834     if (utf8)
835     {
836     int extra = 0;
837     while ((*pp & 0xc0) == 0x80) pp--;
838     c = *((unsigned char *)pp);
839     if (c >= 0xc0)
840     {
841     int gcii, gcss;
842     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
843     gcss = 6*extra;
844     c = (c & utf8_table3[extra]) << gcss;
845     for (gcii = 1; gcii <= extra; gcii++)
846     {
847     gcss -= 6;
848     c |= (pp[gcii] & 0x3f) << gcss;
849     }
850     }
851     }
852     else c = *((unsigned char *)pp);
853    
854 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
855 nigel 93 {
856     case 0x0a: /* LF */
857 ph10 149 case 0x0d: /* CR */
858     return p;
859 ph10 150
860 ph10 149 default:
861     break;
862 ph10 150 }
863 ph10 149
864     else switch (c)
865     {
866     case 0x0a: /* LF */
867 nigel 93 case 0x0b: /* VT */
868     case 0x0c: /* FF */
869     case 0x0d: /* CR */
870     case 0x85: /* NEL */
871     case 0x2028: /* LS */
872     case 0x2029: /* PS */
873     return p;
874    
875     default:
876     break;
877     }
878    
879     p = pp; /* Back one character */
880     } /* End of loop for ANY case */
881    
882     return startptr; /* Hit start of data */
883     } /* End of overall switch */
884     }
885    
886    
887    
888    
889    
890     /*************************************************
891 nigel 77 * Print the previous "after" lines *
892 nigel 49 *************************************************/
893    
894 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
895 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
896     that a binary zero does not terminate it.
897 nigel 77
898     Arguments:
899     lastmatchnumber the number of the last matching line, plus one
900     lastmatchrestart where we restarted after the last match
901     endptr end of available data
902     printname filename for printing
903    
904     Returns: nothing
905     */
906    
907     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
908     char *endptr, char *printname)
909     {
910     if (after_context > 0 && lastmatchnumber > 0)
911     {
912     int count = 0;
913     while (lastmatchrestart < endptr && count++ < after_context)
914     {
915 nigel 93 int ellength;
916 nigel 77 char *pp = lastmatchrestart;
917     if (printname != NULL) fprintf(stdout, "%s-", printname);
918     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
919 nigel 93 pp = end_of_line(pp, endptr, &ellength);
920 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
921 nigel 93 lastmatchrestart = pp;
922 nigel 77 }
923     hyphenpending = TRUE;
924     }
925     }
926    
927    
928    
929     /*************************************************
930 ph10 378 * Apply patterns to subject till one matches *
931     *************************************************/
932    
933 ph10 392 /* This function is called to run through all patterns, looking for a match. It
934     is used multiple times for the same subject when colouring is enabled, in order
935 ph10 378 to find all possible matches.
936    
937     Arguments:
938     matchptr the start of the subject
939     length the length of the subject to match
940     offsets the offets vector to fill in
941     mrc address of where to put the result of pcre_exec()
942 ph10 392
943     Returns: TRUE if there was a match
944 ph10 378 FALSE if there was no match
945     invert if there was a non-fatal error
946 ph10 392 */
947 ph10 378
948     static BOOL
949     match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
950     {
951     int i;
952 ph10 561 size_t slen = length;
953     const char *msg = "this text:\n\n";
954     if (slen > 200)
955     {
956     slen = 200;
957     msg = "text that starts:\n\n";
958 ph10 579 }
959 ph10 378 for (i = 0; i < pattern_count; i++)
960     {
961 ph10 530 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
962 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
963 ph10 378 if (*mrc >= 0) return TRUE;
964     if (*mrc == PCRE_ERROR_NOMATCH) continue;
965 ph10 561 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
966 ph10 378 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
967 ph10 561 fprintf(stderr, "%s", msg);
968     FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
969     fprintf(stderr, "\n\n");
970     if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
971     resource_error = TRUE;
972 ph10 378 if (error_count++ > 20)
973     {
974 ph10 561 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
975     pcregrep_exit(2);
976 ph10 378 }
977     return invert; /* No more matching; don't show the line again */
978     }
979    
980     return FALSE; /* No match, no errors */
981     }
982    
983    
984    
985     /*************************************************
986 nigel 77 * Grep an individual file *
987     *************************************************/
988    
989     /* This is called from grep_or_recurse() below. It uses a buffer that is three
990     times the value of MBUFTHIRD. The matching point is never allowed to stray into
991     the top third of the buffer, thus keeping more of the file available for
992     context printing or for multiline scanning. For large files, the pointer will
993     be in the middle third most of the time, so the bottom third is available for
994     "before" context printing.
995    
996     Arguments:
997 ph10 286 handle the fopened FILE stream for a normal file
998     the gzFile pointer when reading is via libz
999     the BZFILE pointer when reading is via libbz2
1000     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1001 nigel 77 printname the file name if it is to be printed for each match
1002     or NULL if the file name is not to be printed
1003     it cannot be NULL if filenames[_nomatch]_only is set
1004    
1005     Returns: 0 if there was at least one match
1006     1 otherwise (no matches)
1007 ph10 286 2 if there is a read error on a .bz2 file
1008 nigel 77 */
1009    
1010 nigel 49 static int
1011 ph10 286 pcregrep(void *handle, int frtype, char *printname)
1012 nigel 49 {
1013     int rc = 1;
1014 nigel 77 int linenumber = 1;
1015     int lastmatchnumber = 0;
1016 nigel 49 int count = 0;
1017 ph10 280 int filepos = 0;
1018 ph10 378 int offsets[OFFSET_SIZE];
1019 nigel 77 char *lastmatchrestart = NULL;
1020     char buffer[3*MBUFTHIRD];
1021     char *ptr = buffer;
1022     char *endptr;
1023     size_t bufflength;
1024     BOOL endhyphenpending = FALSE;
1025 ph10 519 BOOL input_line_buffered = line_buffered;
1026 ph10 286 FILE *in = NULL; /* Ensure initialized */
1027 nigel 49
1028 ph10 286 #ifdef SUPPORT_LIBZ
1029     gzFile ingz = NULL;
1030     #endif
1031 nigel 77
1032 ph10 286 #ifdef SUPPORT_LIBBZ2
1033     BZFILE *inbz2 = NULL;
1034     #endif
1035    
1036    
1037     /* Do the first read into the start of the buffer and set up the pointer to end
1038     of what we have. In the case of libz, a non-zipped .gz file will be read as a
1039     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1040     fail. */
1041    
1042     #ifdef SUPPORT_LIBZ
1043     if (frtype == FR_LIBZ)
1044     {
1045     ingz = (gzFile)handle;
1046     bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1047     }
1048     else
1049     #endif
1050    
1051     #ifdef SUPPORT_LIBBZ2
1052     if (frtype == FR_LIBBZ2)
1053     {
1054     inbz2 = (BZFILE *)handle;
1055     bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1056     if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1057     } /* without the cast it is unsigned. */
1058     else
1059     #endif
1060    
1061     {
1062     in = (FILE *)handle;
1063 ph10 519 if (is_file_tty(in)) input_line_buffered = TRUE;
1064 ph10 535 bufflength = input_line_buffered?
1065 ph10 519 read_one_line(buffer, 3*MBUFTHIRD, in) :
1066     fread(buffer, 1, 3*MBUFTHIRD, in);
1067 ph10 286 }
1068 ph10 535
1069 nigel 77 endptr = buffer + bufflength;
1070    
1071     /* Loop while the current pointer is not at the end of the file. For large
1072     files, endptr will be at the end of the buffer when we are in the middle of the
1073     file, but ptr will never get there, because as soon as it gets over 2/3 of the
1074     way, the buffer is shifted left and re-filled. */
1075    
1076     while (ptr < endptr)
1077 nigel 49 {
1078 ph10 378 int endlinelength;
1079 nigel 87 int mrc = 0;
1080 ph10 378 BOOL match;
1081 ph10 286 char *matchptr = ptr;
1082 nigel 77 char *t = ptr;
1083     size_t length, linelength;
1084 nigel 49
1085 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
1086     of the subject string to pass to pcre_exec(). In multiline mode, it is the
1087     length remainder of the data in the buffer. Otherwise, it is the length of
1088 ph10 378 the next line, excluding the terminating newline. After matching, we always
1089     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1090     option is used for compiling, so that any match is constrained to be in the
1091     first line. */
1092 nigel 77
1093 nigel 93 t = end_of_line(t, endptr, &endlinelength);
1094     linelength = t - ptr - endlinelength;
1095 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
1096 nigel 77
1097 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
1098    
1099     #ifdef JFRIEDL_DEBUG
1100     if (jfriedl_XT || jfriedl_XR)
1101     {
1102     #include <sys/time.h>
1103     #include <time.h>
1104     struct timeval start_time, end_time;
1105     struct timezone dummy;
1106 ph10 392 int i;
1107 nigel 89
1108     if (jfriedl_XT)
1109     {
1110     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1111     const char *orig = ptr;
1112     ptr = malloc(newlen + 1);
1113     if (!ptr) {
1114     printf("out of memory");
1115 ph10 561 pcregrep_exit(2);
1116 nigel 89 }
1117     endptr = ptr;
1118     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1119     for (i = 0; i < jfriedl_XT; i++) {
1120     strncpy(endptr, orig, length);
1121     endptr += length;
1122     }
1123     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1124     length = newlen;
1125     }
1126    
1127     if (gettimeofday(&start_time, &dummy) != 0)
1128     perror("bad gettimeofday");
1129    
1130    
1131     for (i = 0; i < jfriedl_XR; i++)
1132 ph10 392 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1133 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1134 nigel 89
1135     if (gettimeofday(&end_time, &dummy) != 0)
1136     perror("bad gettimeofday");
1137    
1138     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1139     -
1140     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1141    
1142     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1143     return 0;
1144     }
1145     #endif
1146    
1147 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
1148 ph10 279 in order to find any further matches in the same line. */
1149 nigel 89
1150 ph10 286 ONLY_MATCHING_RESTART:
1151    
1152 ph10 392 /* Run through all the patterns until one matches or there is an error other
1153 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1154     finding subsequent matches when colouring matched lines. */
1155 ph10 392
1156 ph10 378 match = match_patterns(matchptr, length, offsets, &mrc);
1157 nigel 77
1158 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1159 nigel 77
1160 nigel 49 if (match != invert)
1161     {
1162 nigel 77 BOOL hyphenprinted = FALSE;
1163    
1164 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1165 nigel 77
1166 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1167    
1168     /* Just count if just counting is wanted. */
1169    
1170 nigel 49 if (count_only) count++;
1171    
1172 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1173     in the file. */
1174    
1175 ph10 420 else if (filenames == FN_MATCH_ONLY)
1176 nigel 49 {
1177 nigel 77 fprintf(stdout, "%s\n", printname);
1178 nigel 49 return 0;
1179     }
1180    
1181 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1182    
1183 nigel 77 else if (quiet) return 0;
1184 nigel 49
1185 ph10 579 /* The --only-matching option prints just the substring that matched, or a
1186 ph10 565 captured portion of it, as long as this string is not empty, and the
1187     --file-offsets and --line-offsets options output offsets for the matching
1188     substring (they both force --only-matching = 0). None of these options
1189 ph10 280 prints any context. Afterwards, adjust the start and length, and then jump
1190     back to look for further matches in the same line. If we are in invert
1191 ph10 565 mode, however, nothing is printed and we do not restart - this could still
1192     be useful because the return code is set. */
1193 nigel 87
1194 ph10 565 else if (only_matching >= 0)
1195 nigel 87 {
1196 ph10 279 if (!invert)
1197 ph10 286 {
1198 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1199     if (number) fprintf(stdout, "%d:", linenumber);
1200 ph10 280 if (line_offsets)
1201 ph10 565 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1202 ph10 286 offsets[1] - offsets[0]);
1203 ph10 280 else if (file_offsets)
1204 ph10 579 fprintf(stdout, "%d,%d\n",
1205 ph10 565 (int)(filepos + matchptr + offsets[0] - ptr),
1206 ph10 286 offsets[1] - offsets[0]);
1207 ph10 565 else if (only_matching < mrc)
1208 ph10 377 {
1209 ph10 565 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1210     if (plen > 0)
1211 ph10 579 {
1212 ph10 565 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1213     FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1214     if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1215     fprintf(stdout, "\n");
1216 ph10 579 }
1217 ph10 392 }
1218 ph10 565 else if (printname != NULL || number) fprintf(stdout, "\n");
1219 ph10 279 matchptr += offsets[1];
1220     length -= offsets[1];
1221 ph10 286 match = FALSE;
1222 ph10 564 if (line_buffered) fflush(stdout);
1223     rc = 0; /* Had some success */
1224 ph10 286 goto ONLY_MATCHING_RESTART;
1225     }
1226 nigel 87 }
1227    
1228     /* This is the default case when none of the above options is set. We print
1229     the matching lines(s), possibly preceded and/or followed by other lines of
1230     context. */
1231    
1232 nigel 49 else
1233     {
1234 nigel 77 /* See if there is a requirement to print some "after" lines from a
1235     previous match. We never print any overlaps. */
1236    
1237     if (after_context > 0 && lastmatchnumber > 0)
1238     {
1239 nigel 93 int ellength;
1240 nigel 77 int linecount = 0;
1241     char *p = lastmatchrestart;
1242    
1243     while (p < ptr && linecount < after_context)
1244     {
1245 nigel 93 p = end_of_line(p, ptr, &ellength);
1246 nigel 77 linecount++;
1247     }
1248    
1249     /* It is important to advance lastmatchrestart during this printing so
1250 nigel 87 that it interacts correctly with any "before" printing below. Print
1251     each line's data using fwrite() in case there are binary zeroes. */
1252 nigel 77
1253     while (lastmatchrestart < p)
1254     {
1255     char *pp = lastmatchrestart;
1256     if (printname != NULL) fprintf(stdout, "%s-", printname);
1257     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1258 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1259 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1260 nigel 93 lastmatchrestart = pp;
1261 nigel 77 }
1262     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1263     }
1264    
1265     /* If there were non-contiguous lines printed above, insert hyphens. */
1266    
1267     if (hyphenpending)
1268     {
1269     fprintf(stdout, "--\n");
1270     hyphenpending = FALSE;
1271     hyphenprinted = TRUE;
1272     }
1273    
1274     /* See if there is a requirement to print some "before" lines for this
1275     match. Again, don't print overlaps. */
1276    
1277     if (before_context > 0)
1278     {
1279     int linecount = 0;
1280     char *p = ptr;
1281    
1282     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1283 nigel 87 linecount < before_context)
1284 nigel 77 {
1285 nigel 87 linecount++;
1286 nigel 93 p = previous_line(p, buffer);
1287 nigel 77 }
1288    
1289     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1290     fprintf(stdout, "--\n");
1291    
1292     while (p < ptr)
1293     {
1294 nigel 93 int ellength;
1295 nigel 77 char *pp = p;
1296     if (printname != NULL) fprintf(stdout, "%s-", printname);
1297     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1298 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1299 ph10 515 FWRITE(p, 1, pp - p, stdout);
1300 nigel 93 p = pp;
1301 nigel 77 }
1302     }
1303    
1304     /* Now print the matching line(s); ensure we set hyphenpending at the end
1305 nigel 85 of the file if any context lines are being output. */
1306 nigel 77
1307 nigel 85 if (after_context > 0 || before_context > 0)
1308     endhyphenpending = TRUE;
1309    
1310 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1311 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1312 nigel 77
1313     /* In multiline mode, we want to print to the end of the line in which
1314     the end of the matched string is found, so we adjust linelength and the
1315 ph10 222 line number appropriately, but only when there actually was a match
1316     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1317     the match will always be before the first newline sequence. */
1318 nigel 77
1319     if (multiline)
1320     {
1321 nigel 93 int ellength;
1322 ph10 222 char *endmatch = ptr;
1323     if (!invert)
1324 nigel 93 {
1325 ph10 222 endmatch += offsets[1];
1326     t = ptr;
1327     while (t < endmatch)
1328     {
1329     t = end_of_line(t, endptr, &ellength);
1330     if (t <= endmatch) linenumber++; else break;
1331     }
1332 nigel 93 }
1333     endmatch = end_of_line(endmatch, endptr, &ellength);
1334     linelength = endmatch - ptr - ellength;
1335 nigel 77 }
1336    
1337 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1338     zeroes are treated as just another data character. */
1339    
1340     /* This extra option, for Jeffrey Friedl's debugging requirements,
1341     replaces the matched string, or a specific captured string if it exists,
1342     with X. When this happens, colouring is ignored. */
1343    
1344     #ifdef JFRIEDL_DEBUG
1345     if (S_arg >= 0 && S_arg < mrc)
1346     {
1347     int first = S_arg * 2;
1348     int last = first + 1;
1349 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1350 nigel 87 fprintf(stdout, "X");
1351 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1352 nigel 87 }
1353     else
1354     #endif
1355    
1356 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1357 ph10 585 matches, but not of course if the line is a non-match. */
1358 nigel 87
1359 ph10 585 if (do_colour && !invert)
1360 nigel 87 {
1361 ph10 392 int last_offset = 0;
1362 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1363 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1364 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1365 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1366 ph10 378 for (;;)
1367     {
1368 ph10 392 last_offset += offsets[1];
1369 ph10 378 matchptr += offsets[1];
1370     length -= offsets[1];
1371     if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1372 ph10 515 FWRITE(matchptr, 1, offsets[0], stdout);
1373 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1374 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1375 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1376     }
1377 ph10 535 FWRITE(ptr + last_offset, 1,
1378 ph10 515 (linelength + endlinelength) - last_offset, stdout);
1379 nigel 87 }
1380 ph10 392
1381 ph10 378 /* Not colouring; no need to search for further matches */
1382 ph10 392
1383 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1384 nigel 49 }
1385    
1386 ph10 519 /* End of doing what has to be done for a match. If --line-buffered was
1387     given, flush the output. */
1388 nigel 87
1389 ph10 519 if (line_buffered) fflush(stdout);
1390 nigel 77 rc = 0; /* Had some success */
1391    
1392     /* Remember where the last match happened for after_context. We remember
1393     where we are about to restart, and that line's number. */
1394    
1395 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1396 nigel 77 lastmatchnumber = linenumber + 1;
1397 nigel 49 }
1398 nigel 77
1399 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1400     anything to be printed), we have to move on to the end of the match before
1401     proceeding. */
1402    
1403     if (multiline && invert && match)
1404     {
1405     int ellength;
1406     char *endmatch = ptr + offsets[1];
1407     t = ptr;
1408     while (t < endmatch)
1409     {
1410     t = end_of_line(t, endptr, &ellength);
1411     if (t <= endmatch) linenumber++; else break;
1412     }
1413     endmatch = end_of_line(endmatch, endptr, &ellength);
1414     linelength = endmatch - ptr - ellength;
1415     }
1416    
1417 ph10 286 /* Advance to after the newline and increment the line number. The file
1418 ph10 280 offset to the current line is maintained in filepos. */
1419 nigel 77
1420 nigel 93 ptr += linelength + endlinelength;
1421 ph10 530 filepos += (int)(linelength + endlinelength);
1422 nigel 77 linenumber++;
1423 ph10 535
1424     /* If input is line buffered, and the buffer is not yet full, read another
1425 ph10 519 line and add it into the buffer. */
1426 ph10 535
1427 ph10 519 if (input_line_buffered && bufflength < sizeof(buffer))
1428     {
1429     int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1430     bufflength += add;
1431 ph10 535 endptr += add;
1432     }
1433 nigel 77
1434     /* If we haven't yet reached the end of the file (the buffer is full), and
1435     the current point is in the top 1/3 of the buffer, slide the buffer down by
1436     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1437     about to be lost, print them. */
1438    
1439     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1440     {
1441     if (after_context > 0 &&
1442     lastmatchnumber > 0 &&
1443     lastmatchrestart < buffer + MBUFTHIRD)
1444     {
1445     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1446     lastmatchnumber = 0;
1447     }
1448    
1449     /* Now do the shuffle */
1450    
1451     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1452     ptr -= MBUFTHIRD;
1453 ph10 286
1454     #ifdef SUPPORT_LIBZ
1455     if (frtype == FR_LIBZ)
1456     bufflength = 2*MBUFTHIRD +
1457     gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1458     else
1459     #endif
1460    
1461     #ifdef SUPPORT_LIBBZ2
1462     if (frtype == FR_LIBBZ2)
1463     bufflength = 2*MBUFTHIRD +
1464     BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1465     else
1466     #endif
1467    
1468 ph10 535 bufflength = 2*MBUFTHIRD +
1469     (input_line_buffered?
1470     read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1471 ph10 519 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1472 nigel 77 endptr = buffer + bufflength;
1473    
1474     /* Adjust any last match point */
1475    
1476     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1477     }
1478     } /* Loop through the whole file */
1479    
1480     /* End of file; print final "after" lines if wanted; do_after_lines sets
1481     hyphenpending if it prints something. */
1482    
1483 ph10 565 if (only_matching < 0 && !count_only)
1484 nigel 87 {
1485     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1486     hyphenpending |= endhyphenpending;
1487     }
1488 nigel 77
1489     /* Print the file name if we are looking for those without matches and there
1490     were none. If we found a match, we won't have got this far. */
1491    
1492 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1493 nigel 77 {
1494     fprintf(stdout, "%s\n", printname);
1495     return 0;
1496 nigel 49 }
1497    
1498 nigel 77 /* Print the match count if wanted */
1499    
1500 nigel 49 if (count_only)
1501     {
1502 ph10 420 if (count > 0 || !omit_zero_count)
1503 ph10 461 {
1504     if (printname != NULL && filenames != FN_NONE)
1505 ph10 420 fprintf(stdout, "%s:", printname);
1506     fprintf(stdout, "%d\n", count);
1507 ph10 461 }
1508 nigel 49 }
1509    
1510     return rc;
1511     }
1512    
1513    
1514    
1515     /*************************************************
1516 nigel 53 * Grep a file or recurse into a directory *
1517     *************************************************/
1518    
1519 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1520     recursing; if it's a file, grep it.
1521    
1522     Arguments:
1523     pathname the path to investigate
1524 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1525 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1526    
1527     Returns: 0 if there was at least one match
1528     1 if there were no matches
1529     2 there was some kind of error
1530    
1531     However, file opening failures are suppressed if "silent" is set.
1532     */
1533    
1534 nigel 53 static int
1535 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1536 nigel 53 {
1537     int rc = 1;
1538     int sep;
1539 ph10 286 int frtype;
1540     int pathlen;
1541     void *handle;
1542     FILE *in = NULL; /* Ensure initialized */
1543 nigel 53
1544 ph10 286 #ifdef SUPPORT_LIBZ
1545     gzFile ingz = NULL;
1546     #endif
1547    
1548     #ifdef SUPPORT_LIBBZ2
1549     BZFILE *inbz2 = NULL;
1550     #endif
1551    
1552 nigel 77 /* If the file name is "-" we scan stdin */
1553 nigel 53
1554 nigel 77 if (strcmp(pathname, "-") == 0)
1555 nigel 53 {
1556 ph10 286 return pcregrep(stdin, FR_PLAIN,
1557 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1558 nigel 77 stdin_name : NULL);
1559     }
1560    
1561 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1562 ph10 325 each file and directory within it, subject to any include or exclude patterns
1563     that were set. The scanning code is localized so it can be made
1564     system-specific. */
1565 nigel 87
1566     if ((sep = isdirectory(pathname)) != 0)
1567 nigel 77 {
1568 nigel 87 if (dee_action == dee_SKIP) return 1;
1569     if (dee_action == dee_RECURSE)
1570 nigel 53 {
1571 nigel 87 char buffer[1024];
1572     char *nextfile;
1573     directory_type *dir = opendirectory(pathname);
1574 nigel 53
1575 nigel 87 if (dir == NULL)
1576     {
1577     if (!silent)
1578     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1579     strerror(errno));
1580     return 2;
1581     }
1582 nigel 77
1583 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1584     {
1585 ph10 324 int frc, nflen;
1586 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1587 ph10 530 nflen = (int)(strlen(nextfile));
1588 ph10 345
1589 ph10 325 if (isdirectory(buffer))
1590     {
1591     if (exclude_dir_compiled != NULL &&
1592     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1593     continue;
1594 ph10 345
1595 ph10 325 if (include_dir_compiled != NULL &&
1596     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1597     continue;
1598     }
1599 ph10 345 else
1600     {
1601 ph10 324 if (exclude_compiled != NULL &&
1602     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1603     continue;
1604 ph10 345
1605 ph10 324 if (include_compiled != NULL &&
1606     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1607     continue;
1608 ph10 345 }
1609 nigel 77
1610 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1611     if (frc > 1) rc = frc;
1612     else if (frc == 0 && rc == 1) rc = 0;
1613     }
1614    
1615     closedirectory(dir);
1616     return rc;
1617 nigel 53 }
1618     }
1619    
1620 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1621     been requested. */
1622 nigel 53
1623 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1624    
1625     /* Control reaches here if we have a regular file, or if we have a directory
1626     and recursion or skipping was not requested, or if we have anything else and
1627     skipping was not requested. The scan proceeds. If this is the first and only
1628     argument at top level, we don't show the file name, unless we are only showing
1629     the file name, or the filename was forced (-H). */
1630    
1631 ph10 530 pathlen = (int)(strlen(pathname));
1632 ph10 286
1633     /* Open using zlib if it is supported and the file name ends with .gz. */
1634    
1635     #ifdef SUPPORT_LIBZ
1636     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1637 nigel 53 {
1638 ph10 286 ingz = gzopen(pathname, "rb");
1639     if (ingz == NULL)
1640     {
1641     if (!silent)
1642     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1643     strerror(errno));
1644     return 2;
1645     }
1646     handle = (void *)ingz;
1647     frtype = FR_LIBZ;
1648     }
1649     else
1650     #endif
1651    
1652     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1653    
1654     #ifdef SUPPORT_LIBBZ2
1655     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1656     {
1657     inbz2 = BZ2_bzopen(pathname, "rb");
1658     handle = (void *)inbz2;
1659     frtype = FR_LIBBZ2;
1660     }
1661     else
1662     #endif
1663    
1664     /* Otherwise use plain fopen(). The label is so that we can come back here if
1665     an attempt to read a .bz2 file indicates that it really is a plain file. */
1666    
1667     #ifdef SUPPORT_LIBBZ2
1668     PLAIN_FILE:
1669     #endif
1670     {
1671 ph10 419 in = fopen(pathname, "rb");
1672 ph10 286 handle = (void *)in;
1673     frtype = FR_PLAIN;
1674     }
1675    
1676     /* All the opening methods return errno when they fail. */
1677    
1678     if (handle == NULL)
1679     {
1680 nigel 77 if (!silent)
1681     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1682     strerror(errno));
1683 nigel 53 return 2;
1684     }
1685    
1686 ph10 286 /* Now grep the file */
1687    
1688     rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1689 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1690 nigel 77
1691 ph10 286 /* Close in an appropriate manner. */
1692    
1693     #ifdef SUPPORT_LIBZ
1694     if (frtype == FR_LIBZ)
1695     gzclose(ingz);
1696     else
1697     #endif
1698    
1699     /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1700     read failed. If the error indicates that the file isn't in fact bzipped, try
1701     again as a normal file. */
1702    
1703     #ifdef SUPPORT_LIBBZ2
1704     if (frtype == FR_LIBBZ2)
1705     {
1706     if (rc == 2)
1707     {
1708     int errnum;
1709     const char *err = BZ2_bzerror(inbz2, &errnum);
1710     if (errnum == BZ_DATA_ERROR_MAGIC)
1711     {
1712     BZ2_bzclose(inbz2);
1713     goto PLAIN_FILE;
1714     }
1715     else if (!silent)
1716     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1717     pathname, err);
1718     }
1719     BZ2_bzclose(inbz2);
1720     }
1721     else
1722     #endif
1723    
1724     /* Normal file close */
1725    
1726 nigel 53 fclose(in);
1727 ph10 286
1728     /* Pass back the yield from pcregrep(). */
1729    
1730 nigel 53 return rc;
1731     }
1732    
1733    
1734    
1735    
1736     /*************************************************
1737 nigel 49 * Usage function *
1738     *************************************************/
1739    
1740     static int
1741     usage(int rc)
1742     {
1743 nigel 87 option_item *op;
1744     fprintf(stderr, "Usage: pcregrep [-");
1745     for (op = optionlist; op->one_char != 0; op++)
1746     {
1747     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1748     }
1749     fprintf(stderr, "] [long options] [pattern] [files]\n");
1750 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1751     "options.\n");
1752 nigel 49 return rc;
1753     }
1754    
1755    
1756    
1757    
1758     /*************************************************
1759 nigel 53 * Help function *
1760     *************************************************/
1761    
1762     static void
1763     help(void)
1764     {
1765     option_item *op;
1766    
1767 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1768 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1769 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1770 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1771    
1772     #ifdef SUPPORT_LIBZ
1773     printf("Files whose names end in .gz are read using zlib.\n");
1774     #endif
1775    
1776     #ifdef SUPPORT_LIBBZ2
1777     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1778     #endif
1779    
1780     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1781     printf("Other files and the standard input are read as plain files.\n\n");
1782     #else
1783     printf("All files are read as plain files, without any interpretation.\n\n");
1784     #endif
1785    
1786 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1787     printf("Options:\n");
1788    
1789     for (op = optionlist; op->one_char != 0; op++)
1790     {
1791     int n;
1792     char s[4];
1793 ph10 579
1794 ph10 571 /* Two options were accidentally implemented and documented with underscores
1795     instead of hyphens in their names, something that was not noticed for quite a
1796 ph10 579 few releases. When fixing this, I left the underscored versions in the list
1797     in case people were using them. However, we don't want to display them in the
1798     help data. There are no other options that contain underscores, and we do not
1799     expect ever to implement such options. Therefore, just omit any option that
1800 ph10 571 contains an underscore. */
1801 ph10 579
1802     if (strchr(op->long_name, '_') != NULL) continue;
1803    
1804 nigel 53 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1805 ph10 571 n = 31 - printf(" %s --%s", s, op->long_name);
1806 nigel 53 if (n < 1) n = 1;
1807 ph10 571 printf("%.*s%s\n", n, " ", op->help_text);
1808 nigel 53 }
1809    
1810 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1811     printf("trailing white space is removed and blank lines are ignored.\n");
1812     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1813 nigel 53
1814 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1815 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1816     }
1817    
1818    
1819    
1820    
1821     /*************************************************
1822 nigel 77 * Handle a single-letter, no data option *
1823 nigel 53 *************************************************/
1824    
1825     static int
1826     handle_option(int letter, int options)
1827     {
1828     switch(letter)
1829     {
1830 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1831 ph10 561 case N_HELP: help(); pcregrep_exit(0);
1832 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1833 ph10 535 case N_LBUFFER: line_buffered = TRUE; break;
1834 nigel 53 case 'c': count_only = TRUE; break;
1835 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1836     case 'H': filenames = FN_FORCE; break;
1837     case 'h': filenames = FN_NONE; break;
1838 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1839 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1840 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
1841 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1842 nigel 53 case 'n': number = TRUE; break;
1843 ph10 565 case 'o': only_matching = 0; break;
1844 nigel 77 case 'q': quiet = TRUE; break;
1845 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1846 nigel 53 case 's': silent = TRUE; break;
1847 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1848 nigel 53 case 'v': invert = TRUE; break;
1849 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1850     case 'x': process_options |= PO_LINE_MATCH; break;
1851 nigel 53
1852     case 'V':
1853 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1854 ph10 561 pcregrep_exit(0);
1855 nigel 53 break;
1856    
1857     default:
1858     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1859 ph10 561 pcregrep_exit(usage(2));
1860 nigel 53 }
1861    
1862     return options;
1863     }
1864    
1865    
1866    
1867    
1868     /*************************************************
1869 nigel 87 * Construct printed ordinal *
1870     *************************************************/
1871    
1872     /* This turns a number into "1st", "3rd", etc. */
1873    
1874     static char *
1875     ordin(int n)
1876     {
1877     static char buffer[8];
1878     char *p = buffer;
1879     sprintf(p, "%d", n);
1880     while (*p != 0) p++;
1881     switch (n%10)
1882     {
1883     case 1: strcpy(p, "st"); break;
1884     case 2: strcpy(p, "nd"); break;
1885     case 3: strcpy(p, "rd"); break;
1886     default: strcpy(p, "th"); break;
1887     }
1888     return buffer;
1889     }
1890    
1891    
1892    
1893     /*************************************************
1894     * Compile a single pattern *
1895     *************************************************/
1896    
1897     /* When the -F option has been used, this is called for each substring.
1898     Otherwise it's called for each supplied pattern.
1899    
1900     Arguments:
1901     pattern the pattern string
1902     options the PCRE options
1903     filename the file name, or NULL for a command-line pattern
1904     count 0 if this is the only command line pattern, or
1905     number of the command line pattern, or
1906     linenumber for a pattern from a file
1907    
1908     Returns: TRUE on success, FALSE after an error
1909     */
1910    
1911     static BOOL
1912     compile_single_pattern(char *pattern, int options, char *filename, int count)
1913     {
1914     char buffer[MBUFTHIRD + 16];
1915     const char *error;
1916     int errptr;
1917    
1918     if (pattern_count >= MAX_PATTERN_COUNT)
1919     {
1920     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1921     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1922     return FALSE;
1923     }
1924    
1925     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1926     suffix[process_options]);
1927     pattern_list[pattern_count] =
1928     pcre_compile(buffer, options, &error, &errptr, pcretables);
1929 ph10 142 if (pattern_list[pattern_count] != NULL)
1930 ph10 141 {
1931 ph10 142 pattern_count++;
1932 ph10 141 return TRUE;
1933 ph10 142 }
1934 nigel 87
1935     /* Handle compile errors */
1936    
1937     errptr -= (int)strlen(prefix[process_options]);
1938     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1939    
1940     if (filename == NULL)
1941     {
1942     if (count == 0)
1943     fprintf(stderr, "pcregrep: Error in command-line regex "
1944     "at offset %d: %s\n", errptr, error);
1945     else
1946     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1947     "at offset %d: %s\n", ordin(count), errptr, error);
1948     }
1949     else
1950     {
1951     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1952     "at offset %d: %s\n", count, filename, errptr, error);
1953     }
1954    
1955     return FALSE;
1956     }
1957    
1958    
1959    
1960     /*************************************************
1961     * Compile one supplied pattern *
1962     *************************************************/
1963    
1964     /* When the -F option has been used, each string may be a list of strings,
1965 nigel 91 separated by line breaks. They will be matched literally.
1966 nigel 87
1967     Arguments:
1968     pattern the pattern string
1969     options the PCRE options
1970     filename the file name, or NULL for a command-line pattern
1971     count 0 if this is the only command line pattern, or
1972     number of the command line pattern, or
1973     linenumber for a pattern from a file
1974    
1975     Returns: TRUE on success, FALSE after an error
1976     */
1977    
1978     static BOOL
1979     compile_pattern(char *pattern, int options, char *filename, int count)
1980     {
1981     if ((process_options & PO_FIXED_STRINGS) != 0)
1982     {
1983 nigel 93 char *eop = pattern + strlen(pattern);
1984 nigel 87 char buffer[MBUFTHIRD];
1985     for(;;)
1986     {
1987 nigel 93 int ellength;
1988     char *p = end_of_line(pattern, eop, &ellength);
1989     if (ellength == 0)
1990 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1991 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1992 nigel 93 pattern = p;
1993 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1994     return FALSE;
1995     }
1996     }
1997     else return compile_single_pattern(pattern, options, filename, count);
1998     }
1999    
2000    
2001    
2002     /*************************************************
2003 nigel 49 * Main program *
2004     *************************************************/
2005    
2006 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2007    
2008 nigel 49 int
2009     main(int argc, char **argv)
2010     {
2011 nigel 53 int i, j;
2012 nigel 49 int rc = 1;
2013 nigel 87 int pcre_options = 0;
2014     int cmd_pattern_count = 0;
2015 ph10 141 int hint_count = 0;
2016 nigel 49 int errptr;
2017 nigel 87 BOOL only_one_at_top;
2018     char *patterns[MAX_PATTERN_COUNT];
2019     const char *locale_from = "--locale";
2020 nigel 49 const char *error;
2021    
2022 nigel 93 /* Set the default line ending value from the default in the PCRE library;
2023     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2024 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
2025 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
2026 ph10 391 rather than escapes such as as '\r'. */
2027 nigel 91
2028     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2029     switch(i)
2030     {
2031 ph10 391 default: newline = (char *)"lf"; break;
2032     case 13: newline = (char *)"cr"; break;
2033     case (13 << 8) | 10: newline = (char *)"crlf"; break;
2034     case -1: newline = (char *)"any"; break;
2035     case -2: newline = (char *)"anycrlf"; break;
2036 nigel 91 }
2037    
2038 nigel 49 /* Process the options */
2039    
2040     for (i = 1; i < argc; i++)
2041     {
2042 nigel 77 option_item *op = NULL;
2043     char *option_data = (char *)""; /* default to keep compiler happy */
2044     BOOL longop;
2045     BOOL longopwasequals = FALSE;
2046    
2047 nigel 49 if (argv[i][0] != '-') break;
2048 nigel 53
2049 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2050 nigel 87 but only if we have previously had -e or -f to define the patterns. */
2051 nigel 63
2052 nigel 77 if (argv[i][1] == 0)
2053     {
2054 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
2055 ph10 561 else pcregrep_exit(usage(2));
2056 nigel 77 }
2057 nigel 63
2058 nigel 77 /* Handle a long name option, or -- to terminate the options */
2059 nigel 53
2060     if (argv[i][1] == '-')
2061 nigel 49 {
2062 nigel 77 char *arg = argv[i] + 2;
2063     char *argequals = strchr(arg, '=');
2064 nigel 53
2065 nigel 77 if (*arg == 0) /* -- terminates options */
2066 nigel 49 {
2067 nigel 77 i++;
2068     break; /* out of the options-handling loop */
2069 nigel 53 }
2070 nigel 49
2071 nigel 77 longop = TRUE;
2072    
2073     /* Some long options have data that follows after =, for example file=name.
2074     Some options have variations in the long name spelling: specifically, we
2075     allow "regexp" because GNU grep allows it, though I personally go along
2076 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2077 ph10 422 These options are entered in the table as "regex(p)". Options can be in
2078     both these categories. */
2079 nigel 77
2080 nigel 53 for (op = optionlist; op->one_char != 0; op++)
2081     {
2082 nigel 77 char *opbra = strchr(op->long_name, '(');
2083     char *equals = strchr(op->long_name, '=');
2084 ph10 461
2085 ph10 422 /* Handle options with only one spelling of the name */
2086 ph10 461
2087 ph10 422 if (opbra == NULL) /* Does not contain '(' */
2088 nigel 53 {
2089 nigel 77 if (equals == NULL) /* Not thing=data case */
2090     {
2091     if (strcmp(arg, op->long_name) == 0) break;
2092     }
2093     else /* Special case xxx=data */
2094     {
2095 ph10 530 int oplen = (int)(equals - op->long_name);
2096 ph10 535 int arglen = (argequals == NULL)?
2097 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2098 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2099     {
2100     option_data = arg + arglen;
2101     if (*option_data == '=')
2102     {
2103     option_data++;
2104     longopwasequals = TRUE;
2105     }
2106     break;
2107     }
2108     }
2109 nigel 53 }
2110 ph10 461
2111 ph10 422 /* Handle options with an alternate spelling of the name */
2112 ph10 461
2113     else
2114 nigel 77 {
2115     char buff1[24];
2116     char buff2[24];
2117 ph10 461
2118 ph10 530 int baselen = (int)(opbra - op->long_name);
2119     int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2120 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
2121 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2122 ph10 461
2123 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
2124 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2125 ph10 461
2126     if (strncmp(arg, buff1, arglen) == 0 ||
2127 ph10 422 strncmp(arg, buff2, arglen) == 0)
2128     {
2129     if (equals != NULL && argequals != NULL)
2130     {
2131 ph10 461 option_data = argequals;
2132 ph10 422 if (*option_data == '=')
2133     {
2134 ph10 461 option_data++;
2135 ph10 422 longopwasequals = TRUE;
2136 ph10 461 }
2137     }
2138 nigel 77 break;
2139 ph10 461 }
2140 nigel 77 }
2141 nigel 53 }
2142 nigel 77
2143 nigel 53 if (op->one_char == 0)
2144     {
2145     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2146 ph10 561 pcregrep_exit(usage(2));
2147 nigel 53 }
2148     }
2149 nigel 49
2150 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2151     are not in the right form for putting in the option table because they use
2152     only one hyphen, yet are more than one character long. By putting them
2153     separately here, they will not get displayed as part of the help() output,
2154     but I don't think Jeffrey will care about that. */
2155    
2156     #ifdef JFRIEDL_DEBUG
2157     else if (strcmp(argv[i], "-pre") == 0) {
2158     jfriedl_prefix = argv[++i];
2159     continue;
2160     } else if (strcmp(argv[i], "-post") == 0) {
2161     jfriedl_postfix = argv[++i];
2162     continue;
2163     } else if (strcmp(argv[i], "-XT") == 0) {
2164     sscanf(argv[++i], "%d", &jfriedl_XT);
2165     continue;
2166     } else if (strcmp(argv[i], "-XR") == 0) {
2167     sscanf(argv[++i], "%d", &jfriedl_XR);
2168     continue;
2169     }
2170     #endif
2171    
2172    
2173 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2174     continue till we hit the last one or one that needs data. */
2175 nigel 53
2176     else
2177     {
2178     char *s = argv[i] + 1;
2179 nigel 77 longop = FALSE;
2180 nigel 53 while (*s != 0)
2181     {
2182 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2183 ph10 579 {
2184     if (*s == op->one_char) break;
2185 ph10 565 }
2186 nigel 77 if (op->one_char == 0)
2187 nigel 53 {
2188 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2189     *s, argv[i]);
2190 ph10 561 pcregrep_exit(usage(2));
2191 nigel 77 }
2192 ph10 579
2193 ph10 565 /* Check for a single-character option that has data: OP_OP_NUMBER
2194 ph10 579 is used for one that either has a numerical number or defaults, i.e. the
2195 ph10 565 data is optional. If a digit follows, there is data; if not, carry on
2196     with other single-character options in the same string. */
2197 ph10 579
2198 ph10 565 option_data = s+1;
2199     if (op->type == OP_OP_NUMBER)
2200 ph10 579 {
2201     if (isdigit((unsigned char)s[1])) break;
2202 nigel 53 }
2203 ph10 565 else /* Check for end or a dataless option */
2204 ph10 579 {
2205 ph10 565 if (op->type != OP_NODATA || s[1] == 0) break;
2206 ph10 579 }
2207    
2208     /* Handle a single-character option with no data, then loop for the
2209 ph10 565 next character in the string. */
2210    
2211 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2212 nigel 49 }
2213     }
2214 nigel 77
2215 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2216     is NO_DATA, it means that there is no data, and the option might set
2217     something in the PCRE options. */
2218 nigel 77
2219     if (op->type == OP_NODATA)
2220     {
2221 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2222     continue;
2223     }
2224    
2225     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2226     either has a value or defaults to something. It cannot have data in a
2227 ph10 579 separate item. At the moment, the only such options are "colo(u)r",
2228 ph10 565 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2229 nigel 87
2230     if (*option_data == 0 &&
2231     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2232     {
2233     switch (op->one_char)
2234 nigel 77 {
2235 nigel 87 case N_COLOUR:
2236     colour_option = (char *)"auto";
2237     break;
2238 ph10 579
2239 ph10 565 case 'o':
2240     only_matching = 0;
2241 ph10 579 break;
2242    
2243 nigel 87 #ifdef JFRIEDL_DEBUG
2244     case 'S':
2245     S_arg = 0;
2246     break;
2247     #endif
2248 nigel 77 }
2249 nigel 87 continue;
2250     }
2251 nigel 77
2252 nigel 87 /* Otherwise, find the data string for the option. */
2253    
2254     if (*option_data == 0)
2255     {
2256     if (i >= argc - 1 || longopwasequals)
2257 nigel 77 {
2258 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2259 ph10 561 pcregrep_exit(usage(2));
2260 nigel 87 }
2261     option_data = argv[++i];
2262     }
2263    
2264     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2265     multiple times to create a list of patterns. */
2266    
2267     if (op->type == OP_PATLIST)
2268     {
2269     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2270     {
2271     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2272     MAX_PATTERN_COUNT);
2273     return 2;
2274     }
2275     patterns[cmd_pattern_count++] = option_data;
2276     }
2277    
2278     /* Otherwise, deal with single string or numeric data values. */
2279    
2280 ph10 584 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2281     op->type != OP_OP_NUMBER)
2282 nigel 87 {
2283     *((char **)op->dataptr) = option_data;
2284     }
2285 ph10 558
2286     /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2287     only for unpicking arguments, so just keep it simple. */
2288    
2289 nigel 87 else
2290     {
2291 ph10 561 unsigned long int n = 0;
2292 ph10 558 char *endptr = option_data;
2293     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2294     while (isdigit((unsigned char)(*endptr)))
2295     n = n * 10 + (int)(*endptr++ - '0');
2296 nigel 87 if (*endptr != 0)
2297     {
2298     if (longop)
2299 nigel 77 {
2300 nigel 87 char *equals = strchr(op->long_name, '=');
2301     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2302 ph10 530 (int)(equals - op->long_name);
2303 nigel 87 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2304     option_data, nlen, op->long_name);
2305 nigel 77 }
2306 nigel 87 else
2307     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2308     option_data, op->one_char);
2309 ph10 561 pcregrep_exit(usage(2));
2310 nigel 77 }
2311 ph10 584 if (op->type == OP_LONGNUMBER)
2312     *((unsigned long int *)op->dataptr) = n;
2313     else
2314     *((int *)op->dataptr) = n;
2315 nigel 77 }
2316 nigel 49 }
2317    
2318 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2319     for -A and -B. */
2320    
2321     if (both_context > 0)
2322     {
2323     if (after_context == 0) after_context = both_context;
2324     if (before_context == 0) before_context = both_context;
2325     }
2326 ph10 286
2327     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2328 ph10 565 However, the latter two set only_matching. */
2329 nigel 77
2330 ph10 565 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2331 ph10 286 (file_offsets && line_offsets))
2332 ph10 280 {
2333     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2334     "and/or --line-offsets\n");
2335 ph10 561 pcregrep_exit(usage(2));
2336 ph10 280 }
2337    
2338 ph10 565 if (file_offsets || line_offsets) only_matching = 0;
2339 ph10 286
2340 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2341     LC_ALL environment variable is set, and if so, use it. */
2342 nigel 49
2343 nigel 87 if (locale == NULL)
2344 nigel 53 {
2345 nigel 87 locale = getenv("LC_ALL");
2346     locale_from = "LCC_ALL";
2347 nigel 53 }
2348 nigel 49
2349 nigel 87 if (locale == NULL)
2350     {
2351     locale = getenv("LC_CTYPE");
2352     locale_from = "LC_CTYPE";
2353     }
2354 nigel 49
2355 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2356     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2357    
2358     if (locale != NULL)
2359 nigel 49 {
2360 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2361 nigel 53 {
2362 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2363     locale, locale_from);
2364 nigel 53 return 2;
2365     }
2366 nigel 87 pcretables = pcre_maketables();
2367     }
2368 nigel 77
2369 nigel 87 /* Sort out colouring */
2370    
2371     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2372     {
2373     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2374     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2375     else
2376 nigel 53 {
2377 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2378     colour_option);
2379     return 2;
2380 nigel 77 }
2381 nigel 87 if (do_colour)
2382 nigel 77 {
2383 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2384     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2385     if (cs != NULL) colour_string = cs;
2386 nigel 77 }
2387 nigel 87 }
2388 ph10 535
2389 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2390    
2391     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2392     {
2393     pcre_options |= PCRE_NEWLINE_CR;
2394 nigel 93 endlinetype = EL_CR;
2395 nigel 91 }
2396     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2397     {
2398     pcre_options |= PCRE_NEWLINE_LF;
2399 nigel 93 endlinetype = EL_LF;
2400 nigel 91 }
2401     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2402     {
2403     pcre_options |= PCRE_NEWLINE_CRLF;
2404 nigel 93 endlinetype = EL_CRLF;
2405 nigel 91 }
2406 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2407     {
2408     pcre_options |= PCRE_NEWLINE_ANY;
2409     endlinetype = EL_ANY;
2410     }
2411 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2412     {
2413     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2414     endlinetype = EL_ANYCRLF;
2415     }
2416 nigel 91 else
2417     {
2418     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2419     return 2;
2420     }
2421    
2422 nigel 87 /* Interpret the text values for -d and -D */
2423    
2424     if (dee_option != NULL)
2425     {
2426     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2427     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2428     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2429     else
2430 nigel 77 {
2431 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2432     return 2;
2433 nigel 53 }
2434 nigel 49 }
2435    
2436 nigel 87 if (DEE_option != NULL)
2437     {
2438     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2439     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2440     else
2441     {
2442     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2443     return 2;
2444     }
2445     }
2446 nigel 49
2447 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2448 nigel 87
2449     #ifdef JFRIEDL_DEBUG
2450     if (S_arg > 9)
2451 nigel 49 {
2452 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2453     return 2;
2454     }
2455 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2456     {
2457     if (jfriedl_XT == 0) jfriedl_XT = 1;
2458     if (jfriedl_XR == 0) jfriedl_XR = 1;
2459     }
2460 nigel 87 #endif
2461 nigel 77
2462 nigel 87 /* Get memory to store the pattern and hints lists. */
2463    
2464     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2465     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2466    
2467     if (pattern_list == NULL || hints_list == NULL)
2468     {
2469     fprintf(stderr, "pcregrep: malloc failed\n");
2470 ph10 123 goto EXIT2;
2471 nigel 87 }
2472    
2473     /* If no patterns were provided by -e, and there is no file provided by -f,
2474     the first argument is the one and only pattern, and it must exist. */
2475    
2476     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2477     {
2478 nigel 63 if (i >= argc) return usage(2);
2479 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2480     }
2481 nigel 77
2482 nigel 87 /* Compile the patterns that were provided on the command line, either by
2483     multiple uses of -e or as a single unkeyed pattern. */
2484    
2485     for (j = 0; j < cmd_pattern_count; j++)
2486     {
2487     if (!compile_pattern(patterns[j], pcre_options, NULL,
2488     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2489 ph10 123 goto EXIT2;
2490 nigel 87 }
2491    
2492     /* Compile the regular expressions that are provided in a file. */
2493    
2494     if (pattern_filename != NULL)
2495     {
2496     int linenumber = 0;
2497     FILE *f;
2498     char *filename;
2499     char buffer[MBUFTHIRD];
2500    
2501     if (strcmp(pattern_filename, "-") == 0)
2502 nigel 77 {
2503 nigel 87 f = stdin;
2504     filename = stdin_name;
2505 nigel 77 }
2506 nigel 87 else
2507 nigel 77 {
2508 nigel 87 f = fopen(pattern_filename, "r");
2509     if (f == NULL)
2510     {
2511     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2512     strerror(errno));
2513 ph10 123 goto EXIT2;
2514 nigel 87 }
2515     filename = pattern_filename;
2516 nigel 77 }
2517    
2518 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2519 nigel 53 {
2520 nigel 87 char *s = buffer + (int)strlen(buffer);
2521     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2522     *s = 0;
2523     linenumber++;
2524     if (buffer[0] == 0) continue; /* Skip blank lines */
2525     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2526 ph10 121 goto EXIT2;
2527 nigel 53 }
2528 nigel 87
2529     if (f != stdin) fclose(f);
2530 nigel 49 }
2531    
2532 nigel 77 /* Study the regular expressions, as we will be running them many times */
2533 nigel 53
2534     for (j = 0; j < pattern_count; j++)
2535     {
2536     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2537     if (error != NULL)
2538     {
2539     char s[16];
2540     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2541     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2542 ph10 121 goto EXIT2;
2543 nigel 53 }
2544 ph10 142 hint_count++;
2545 nigel 53 }
2546 ph10 579
2547 ph10 561 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2548     pcre_extra block for each pattern. */
2549 nigel 53
2550 ph10 561 if (match_limit > 0 || match_limit_recursion > 0)
2551     {
2552     for (j = 0; j < pattern_count; j++)
2553     {
2554     if (hints_list[j] == NULL)
2555     {
2556     hints_list[j] = malloc(sizeof(pcre_extra));
2557 ph10 579 if (hints_list[j] == NULL)
2558 ph10 561 {
2559     fprintf(stderr, "pcregrep: malloc failed\n");
2560     pcregrep_exit(2);
2561     }
2562     }
2563     if (match_limit > 0)
2564 ph10 579 {
2565 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2566     hints_list[j]->match_limit = match_limit;
2567 ph10 579 }
2568 ph10 561 if (match_limit_recursion > 0)
2569 ph10 579 {
2570 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2571     hints_list[j]->match_limit_recursion = match_limit_recursion;
2572 ph10 579 }
2573 ph10 561 }
2574 ph10 579 }
2575 ph10 561
2576 nigel 77 /* If there are include or exclude patterns, compile them. */
2577    
2578     if (exclude_pattern != NULL)
2579     {
2580 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2581     pcretables);
2582 nigel 77 if (exclude_compiled == NULL)
2583     {
2584     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2585     errptr, error);
2586 ph10 121 goto EXIT2;
2587 nigel 77 }
2588     }
2589    
2590     if (include_pattern != NULL)
2591     {
2592 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2593     pcretables);
2594 nigel 77 if (include_compiled == NULL)
2595     {
2596     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2597     errptr, error);
2598 ph10 121 goto EXIT2;
2599 nigel 77 }
2600     }
2601    
2602 ph10 325 if (exclude_dir_pattern != NULL)
2603     {
2604     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2605     pcretables);
2606     if (exclude_dir_compiled == NULL)
2607     {
2608     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2609     errptr, error);
2610     goto EXIT2;
2611     }
2612     }
2613    
2614     if (include_dir_pattern != NULL)
2615     {
2616     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2617     pcretables);
2618     if (include_dir_compiled == NULL)
2619     {
2620     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2621     errptr, error);
2622     goto EXIT2;
2623     }
2624     }
2625    
2626 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2627 nigel 49
2628 nigel 87 if (i >= argc)
2629 ph10 121 {
2630 ph10 286 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2631 ph10 121 goto EXIT;
2632 ph10 123 }
2633 nigel 49
2634 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2635     Pass in the fact that there is only one argument at top level - this suppresses
2636 nigel 87 the file name if the argument is not a directory and filenames are not
2637     otherwise forced. */
2638 nigel 49
2639 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2640 nigel 49
2641     for (; i < argc; i++)
2642     {
2643 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2644     only_one_at_top);
2645 nigel 77 if (frc > 1) rc = frc;
2646     else if (frc == 0 && rc == 1) rc = 0;
2647 nigel 49 }
2648    
2649 ph10 121 EXIT:
2650     if (pattern_list != NULL)
2651     {
2652 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2653 ph10 121 free(pattern_list);
2654 ph10 123 }
2655 ph10 121 if (hints_list != NULL)
2656     {
2657 ph10 579 for (i = 0; i < hint_count; i++)
2658 ph10 561 {
2659     if (hints_list[i] != NULL) free(hints_list[i]);
2660 ph10 579 }
2661 ph10 121 free(hints_list);
2662 ph10 123 }
2663 ph10 561 pcregrep_exit(rc);
2664 ph10 121
2665     EXIT2:
2666     rc = 2;
2667     goto EXIT;
2668 nigel 49 }
2669    
2670 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12