/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 636 - (hide annotations) (download)
Sun Jul 24 17:43:51 2011 UTC (21 months, 3 weeks ago) by ph10
File MIME type: text/plain
File size: 77222 byte(s)
Code tidy

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 584 Copyright (c) 1997-2011 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 ph10 378 #define OFFSET_SIZE 99
75 nigel 49
76 nigel 77 #if BUFSIZ > 8192
77     #define MBUFTHIRD BUFSIZ
78     #else
79     #define MBUFTHIRD 8192
80     #endif
81 nigel 49
82 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
83     output. The order is important; it is assumed that a file name is wanted for
84     all values greater than FN_DEFAULT. */
85 nigel 77
86 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87 nigel 87
88 ph10 286 /* File reading styles */
89    
90     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92 nigel 87 /* Actions for the -d and -D options */
93    
94     enum { dee_READ, dee_SKIP, dee_RECURSE };
95     enum { DEE_READ, DEE_SKIP };
96    
97     /* Actions for special processing options (flag bits) */
98    
99     #define PO_WORD_MATCH 0x0001
100     #define PO_LINE_MATCH 0x0002
101     #define PO_FIXED_STRINGS 0x0004
102    
103 nigel 93 /* Line ending types */
104 nigel 87
105 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 nigel 87
107 ph10 535 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108     environments), a warning is issued if the value of fwrite() is ignored.
109     Unfortunately, casting to (void) does not suppress the warning. To get round
110     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 ph10 515 apply to fprintf(). */
112 nigel 93
113 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114 nigel 93
115 ph10 515
116    
117 nigel 49 /*************************************************
118     * Global variables *
119     *************************************************/
120    
121 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
122     regular code. */
123    
124     #ifdef JFRIEDL_DEBUG
125     static int S_arg = -1;
126 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128     static const char *jfriedl_prefix = "";
129     static const char *jfriedl_postfix = "";
130 nigel 87 #endif
131    
132 nigel 93 static int endlinetype;
133 nigel 91
134 nigel 87 static char *colour_string = (char *)"1;31";
135     static char *colour_option = NULL;
136     static char *dee_option = NULL;
137     static char *DEE_option = NULL;
138 nigel 91 static char *newline = NULL;
139 nigel 53 static char *pattern_filename = NULL;
140 nigel 77 static char *stdin_name = (char *)"(standard input)";
141 nigel 87 static char *locale = NULL;
142    
143     static const unsigned char *pcretables = NULL;
144    
145 nigel 53 static int pattern_count = 0;
146 ph10 121 static pcre **pattern_list = NULL;
147     static pcre_extra **hints_list = NULL;
148 nigel 49
149 nigel 77 static char *include_pattern = NULL;
150     static char *exclude_pattern = NULL;
151 ph10 325 static char *include_dir_pattern = NULL;
152     static char *exclude_dir_pattern = NULL;
153 nigel 77
154     static pcre *include_compiled = NULL;
155     static pcre *exclude_compiled = NULL;
156 ph10 325 static pcre *include_dir_compiled = NULL;
157     static pcre *exclude_dir_compiled = NULL;
158 nigel 77
159     static int after_context = 0;
160     static int before_context = 0;
161     static int both_context = 0;
162 nigel 87 static int dee_action = dee_READ;
163     static int DEE_action = DEE_READ;
164     static int error_count = 0;
165     static int filenames = FN_DEFAULT;
166 ph10 565 static int only_matching = -1;
167 nigel 87 static int process_options = 0;
168 nigel 77
169 ph10 561 static unsigned long int match_limit = 0;
170     static unsigned long int match_limit_recursion = 0;
171    
172 nigel 49 static BOOL count_only = FALSE;
173 nigel 87 static BOOL do_colour = FALSE;
174 ph10 280 static BOOL file_offsets = FALSE;
175 nigel 77 static BOOL hyphenpending = FALSE;
176 nigel 49 static BOOL invert = FALSE;
177 ph10 519 static BOOL line_buffered = FALSE;
178 ph10 280 static BOOL line_offsets = FALSE;
179 nigel 77 static BOOL multiline = FALSE;
180 nigel 49 static BOOL number = FALSE;
181 ph10 420 static BOOL omit_zero_count = FALSE;
182 ph10 561 static BOOL resource_error = FALSE;
183 nigel 77 static BOOL quiet = FALSE;
184 nigel 49 static BOOL silent = FALSE;
185 nigel 93 static BOOL utf8 = FALSE;
186 nigel 49
187 nigel 53 /* Structure for options and list of them */
188 nigel 49
189 ph10 584 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
190     OP_OP_NUMBER, OP_PATLIST };
191 nigel 77
192 nigel 53 typedef struct option_item {
193 nigel 77 int type;
194 nigel 53 int one_char;
195 nigel 77 void *dataptr;
196 nigel 67 const char *long_name;
197     const char *help_text;
198 nigel 53 } option_item;
199 nigel 49
200 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
201     used to identify them. */
202    
203 ph10 325 #define N_COLOUR (-1)
204     #define N_EXCLUDE (-2)
205     #define N_EXCLUDE_DIR (-3)
206     #define N_HELP (-4)
207     #define N_INCLUDE (-5)
208     #define N_INCLUDE_DIR (-6)
209     #define N_LABEL (-7)
210     #define N_LOCALE (-8)
211     #define N_NULL (-9)
212     #define N_LOFFSETS (-10)
213     #define N_FOFFSETS (-11)
214 ph10 519 #define N_LBUFFER (-12)
215 ph10 561 #define N_M_LIMIT (-13)
216     #define N_M_LIMIT_REC (-14)
217 nigel 87
218 nigel 53 static option_item optionlist[] = {
219 ph10 584 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
220     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
221     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
222     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
223     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
224     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
225     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
226     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
227     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
228     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
229     { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
230     { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
231     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
232     { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
233     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
234     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
235     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
236     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
237     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
238     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
239     { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
240     { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
241     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
242     { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
243     { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
245     { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
247     { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
248     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
249     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
250     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
251     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
252     { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
253     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
254 ph10 571
255     /* These two were accidentally implemented with underscores instead of
256     hyphens in the option names. As this was not discovered for several releases,
257     the incorrect versions are left in the table for compatibility. However, the
258     --help function misses out any option that has an underscore in its name. */
259 ph10 579
260 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
261     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
262 ph10 571
263 nigel 87 #ifdef JFRIEDL_DEBUG
264     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
265     #endif
266     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
267     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
268     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
269     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
270     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
271     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
272     { OP_NODATA, 0, NULL, NULL, NULL }
273 nigel 53 };
274    
275 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
276     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
277     that the combination of -w and -x has the same effect as -x on its own, so we
278     can treat them as the same. */
279 nigel 53
280 nigel 87 static const char *prefix[] = {
281     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
282    
283     static const char *suffix[] = {
284     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
285    
286 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
287 nigel 87
288 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
289 nigel 87
290 nigel 93 const char utf8_table4[] = {
291     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
292     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
293     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
294     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
295    
296    
297    
298 nigel 53 /*************************************************
299 ph10 586 * Exit from the program *
300     *************************************************/
301    
302     /* If there has been a resource error, give a suitable message.
303    
304     Argument: the return code
305     Returns: does not return
306     */
307    
308     static void
309     pcregrep_exit(int rc)
310     {
311     if (resource_error)
312     {
313     fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
314     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
315     fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
316     }
317    
318     exit(rc);
319     }
320    
321    
322     /*************************************************
323 nigel 87 * OS-specific functions *
324 nigel 53 *************************************************/
325    
326     /* These functions are defined so that they can be made system specific,
327 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
328 nigel 53
329    
330     /************* Directory scanning in Unix ***********/
331    
332 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
333 nigel 53 #include <sys/types.h>
334     #include <sys/stat.h>
335     #include <dirent.h>
336    
337     typedef DIR directory_type;
338    
339 nigel 67 static int
340 nigel 53 isdirectory(char *filename)
341     {
342     struct stat statbuf;
343     if (stat(filename, &statbuf) < 0)
344     return 0; /* In the expectation that opening as a file will fail */
345     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
346     }
347    
348 nigel 67 static directory_type *
349 nigel 53 opendirectory(char *filename)
350     {
351     return opendir(filename);
352     }
353    
354 nigel 67 static char *
355 nigel 53 readdirectory(directory_type *dir)
356     {
357     for (;;)
358     {
359     struct dirent *dent = readdir(dir);
360     if (dent == NULL) return NULL;
361     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
362     return dent->d_name;
363     }
364 ph10 151 /* Control never reaches here */
365 nigel 53 }
366    
367 nigel 67 static void
368 nigel 53 closedirectory(directory_type *dir)
369     {
370     closedir(dir);
371     }
372    
373    
374 nigel 87 /************* Test for regular file in Unix **********/
375    
376     static int
377     isregfile(char *filename)
378     {
379     struct stat statbuf;
380     if (stat(filename, &statbuf) < 0)
381     return 1; /* In the expectation that opening as a file will fail */
382     return (statbuf.st_mode & S_IFMT) == S_IFREG;
383     }
384    
385    
386 ph10 519 /************* Test for a terminal in Unix **********/
387 nigel 87
388     static BOOL
389     is_stdout_tty(void)
390     {
391     return isatty(fileno(stdout));
392     }
393    
394 ph10 519 static BOOL
395     is_file_tty(FILE *f)
396     {
397     return isatty(fileno(f));
398     }
399 nigel 87
400 ph10 519
401 nigel 63 /************* Directory scanning in Win32 ***********/
402 nigel 53
403 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
404 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
405 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
406     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
407 ph10 558 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
408     undefined when it is indeed undefined. */
409 nigel 53
410 ph10 558 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
411 nigel 63
412     #ifndef STRICT
413     # define STRICT
414     #endif
415     #ifndef WIN32_LEAN_AND_MEAN
416     # define WIN32_LEAN_AND_MEAN
417     #endif
418 ph10 283
419     #include <windows.h>
420    
421 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
422     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
423     #endif
424    
425 nigel 63 typedef struct directory_type
426     {
427     HANDLE handle;
428     BOOL first;
429     WIN32_FIND_DATA data;
430     } directory_type;
431    
432     int
433     isdirectory(char *filename)
434     {
435     DWORD attr = GetFileAttributes(filename);
436     if (attr == INVALID_FILE_ATTRIBUTES)
437     return 0;
438     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
439     }
440    
441     directory_type *
442     opendirectory(char *filename)
443     {
444     size_t len;
445     char *pattern;
446     directory_type *dir;
447     DWORD err;
448     len = strlen(filename);
449     pattern = (char *) malloc(len + 3);
450     dir = (directory_type *) malloc(sizeof(*dir));
451     if ((pattern == NULL) || (dir == NULL))
452     {
453     fprintf(stderr, "pcregrep: malloc failed\n");
454 ph10 561 pcregrep_exit(2);
455 nigel 63 }
456     memcpy(pattern, filename, len);
457     memcpy(&(pattern[len]), "\\*", 3);
458     dir->handle = FindFirstFile(pattern, &(dir->data));
459     if (dir->handle != INVALID_HANDLE_VALUE)
460     {
461     free(pattern);
462     dir->first = TRUE;
463     return dir;
464     }
465     err = GetLastError();
466     free(pattern);
467     free(dir);
468     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
469     return NULL;
470     }
471    
472     char *
473     readdirectory(directory_type *dir)
474     {
475     for (;;)
476     {
477     if (!dir->first)
478     {
479     if (!FindNextFile(dir->handle, &(dir->data)))
480     return NULL;
481     }
482     else
483     {
484     dir->first = FALSE;
485     }
486     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
487     return dir->data.cFileName;
488     }
489     #ifndef _MSC_VER
490     return NULL; /* Keep compiler happy; never executed */
491     #endif
492     }
493    
494     void
495     closedirectory(directory_type *dir)
496     {
497     FindClose(dir->handle);
498     free(dir);
499     }
500    
501    
502 nigel 87 /************* Test for regular file in Win32 **********/
503    
504     /* I don't know how to do this, or if it can be done; assume all paths are
505     regular if they are not directories. */
506    
507     int isregfile(char *filename)
508     {
509 ph10 283 return !isdirectory(filename);
510 nigel 87 }
511    
512    
513 ph10 519 /************* Test for a terminal in Win32 **********/
514 nigel 87
515     /* I don't know how to do this; assume never */
516    
517     static BOOL
518     is_stdout_tty(void)
519     {
520 ph10 283 return FALSE;
521 nigel 87 }
522    
523 ph10 519 static BOOL
524     is_file_tty(FILE *f)
525     {
526     return FALSE;
527     }
528 nigel 87
529 ph10 519
530 nigel 53 /************* Directory scanning when we can't do it ***********/
531    
532     /* The type is void, and apart from isdirectory(), the functions do nothing. */
533    
534 nigel 63 #else
535    
536 nigel 53 typedef void directory_type;
537    
538 nigel 87 int isdirectory(char *filename) { return 0; }
539 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
540     char *readdirectory(directory_type *dir) { return (char*)0;}
541 nigel 53 void closedirectory(directory_type *dir) {}
542    
543 nigel 87
544     /************* Test for regular when we can't do it **********/
545    
546     /* Assume all files are regular. */
547    
548     int isregfile(char *filename) { return 1; }
549    
550    
551 ph10 519 /************* Test for a terminal when we can't do it **********/
552 nigel 87
553     static BOOL
554     is_stdout_tty(void)
555     {
556     return FALSE;
557     }
558    
559 ph10 519 static BOOL
560     is_file_tty(FILE *f)
561     {
562     return FALSE;
563     }
564 nigel 87
565 nigel 53 #endif
566    
567    
568    
569 ph10 137 #ifndef HAVE_STRERROR
570 nigel 49 /*************************************************
571     * Provide strerror() for non-ANSI libraries *
572     *************************************************/
573    
574     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
575     in their libraries, but can provide the same facility by this simple
576     alternative function. */
577    
578     extern int sys_nerr;
579     extern char *sys_errlist[];
580    
581     char *
582     strerror(int n)
583     {
584     if (n < 0 || n >= sys_nerr) return "unknown error number";
585     return sys_errlist[n];
586     }
587     #endif /* HAVE_STRERROR */
588    
589    
590    
591     /*************************************************
592 ph10 519 * Read one line of input *
593     *************************************************/
594    
595 ph10 535 /* Normally, input is read using fread() into a large buffer, so many lines may
596     be read at once. However, doing this for tty input means that no output appears
597 ph10 519 until a lot of input has been typed. Instead, tty input is handled line by
598     line. We cannot use fgets() for this, because it does not stop at a binary
599 ph10 535 zero, and therefore there is no way of telling how many characters it has read,
600 ph10 519 because there may be binary zeros embedded in the data.
601    
602     Arguments:
603     buffer the buffer to read into
604     length the maximum number of characters to read
605     f the file
606 ph10 535
607 ph10 519 Returns: the number of characters read, zero at end of file
608 ph10 535 */
609 ph10 519
610     static int
611     read_one_line(char *buffer, int length, FILE *f)
612     {
613     int c;
614     int yield = 0;
615     while ((c = fgetc(f)) != EOF)
616     {
617     buffer[yield++] = c;
618 ph10 535 if (c == '\n' || yield >= length) break;
619     }
620     return yield;
621 ph10 519 }
622    
623    
624    
625     /*************************************************
626 nigel 93 * Find end of line *
627     *************************************************/
628    
629     /* The length of the endline sequence that is found is set via lenptr. This may
630     be zero at the very end of the file if there is no line-ending sequence there.
631    
632     Arguments:
633     p current position in line
634     endptr end of available data
635     lenptr where to put the length of the eol sequence
636    
637 ph10 587 Returns: pointer to the last byte of the line, including the newline byte(s)
638 nigel 93 */
639    
640     static char *
641     end_of_line(char *p, char *endptr, int *lenptr)
642     {
643     switch(endlinetype)
644     {
645     default: /* Just in case */
646     case EL_LF:
647     while (p < endptr && *p != '\n') p++;
648     if (p < endptr)
649     {
650     *lenptr = 1;
651     return p + 1;
652     }
653     *lenptr = 0;
654     return endptr;
655    
656     case EL_CR:
657     while (p < endptr && *p != '\r') p++;
658     if (p < endptr)
659     {
660     *lenptr = 1;
661     return p + 1;
662     }
663     *lenptr = 0;
664     return endptr;
665    
666     case EL_CRLF:
667     for (;;)
668     {
669     while (p < endptr && *p != '\r') p++;
670     if (++p >= endptr)
671     {
672     *lenptr = 0;
673     return endptr;
674     }
675     if (*p == '\n')
676     {
677     *lenptr = 2;
678     return p + 1;
679     }
680     }
681     break;
682    
683 ph10 149 case EL_ANYCRLF:
684     while (p < endptr)
685     {
686     int extra = 0;
687     register int c = *((unsigned char *)p);
688    
689     if (utf8 && c >= 0xc0)
690     {
691     int gcii, gcss;
692     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
693     gcss = 6*extra;
694     c = (c & utf8_table3[extra]) << gcss;
695     for (gcii = 1; gcii <= extra; gcii++)
696     {
697     gcss -= 6;
698     c |= (p[gcii] & 0x3f) << gcss;
699     }
700     }
701    
702     p += 1 + extra;
703    
704     switch (c)
705     {
706     case 0x0a: /* LF */
707     *lenptr = 1;
708     return p;
709    
710     case 0x0d: /* CR */
711     if (p < endptr && *p == 0x0a)
712     {
713     *lenptr = 2;
714     p++;
715     }
716     else *lenptr = 1;
717     return p;
718 ph10 150
719 ph10 149 default:
720     break;
721     }
722     } /* End of loop for ANYCRLF case */
723 ph10 150
724 ph10 149 *lenptr = 0; /* Must have hit the end */
725     return endptr;
726    
727 nigel 93 case EL_ANY:
728     while (p < endptr)
729     {
730     int extra = 0;
731     register int c = *((unsigned char *)p);
732    
733     if (utf8 && c >= 0xc0)
734     {
735     int gcii, gcss;
736     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
737     gcss = 6*extra;
738     c = (c & utf8_table3[extra]) << gcss;
739     for (gcii = 1; gcii <= extra; gcii++)
740     {
741     gcss -= 6;
742     c |= (p[gcii] & 0x3f) << gcss;
743     }
744     }
745    
746     p += 1 + extra;
747    
748     switch (c)
749     {
750     case 0x0a: /* LF */
751     case 0x0b: /* VT */
752     case 0x0c: /* FF */
753     *lenptr = 1;
754     return p;
755    
756     case 0x0d: /* CR */
757     if (p < endptr && *p == 0x0a)
758     {
759     *lenptr = 2;
760     p++;
761     }
762     else *lenptr = 1;
763     return p;
764    
765     case 0x85: /* NEL */
766     *lenptr = utf8? 2 : 1;
767     return p;
768    
769     case 0x2028: /* LS */
770     case 0x2029: /* PS */
771     *lenptr = 3;
772     return p;
773    
774     default:
775     break;
776     }
777     } /* End of loop for ANY case */
778    
779     *lenptr = 0; /* Must have hit the end */
780     return endptr;
781     } /* End of overall switch */
782     }
783    
784    
785    
786     /*************************************************
787     * Find start of previous line *
788     *************************************************/
789    
790     /* This is called when looking back for before lines to print.
791    
792     Arguments:
793     p start of the subsequent line
794     startptr start of available data
795    
796     Returns: pointer to the start of the previous line
797     */
798    
799     static char *
800     previous_line(char *p, char *startptr)
801     {
802     switch(endlinetype)
803     {
804     default: /* Just in case */
805     case EL_LF:
806     p--;
807     while (p > startptr && p[-1] != '\n') p--;
808     return p;
809    
810     case EL_CR:
811     p--;
812     while (p > startptr && p[-1] != '\n') p--;
813     return p;
814    
815     case EL_CRLF:
816     for (;;)
817     {
818     p -= 2;
819     while (p > startptr && p[-1] != '\n') p--;
820     if (p <= startptr + 1 || p[-2] == '\r') return p;
821     }
822     return p; /* But control should never get here */
823    
824     case EL_ANY:
825 ph10 150 case EL_ANYCRLF:
826 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
827     if (utf8) while ((*p & 0xc0) == 0x80) p--;
828    
829     while (p > startptr)
830     {
831     register int c;
832     char *pp = p - 1;
833    
834     if (utf8)
835     {
836     int extra = 0;
837     while ((*pp & 0xc0) == 0x80) pp--;
838     c = *((unsigned char *)pp);
839     if (c >= 0xc0)
840     {
841     int gcii, gcss;
842     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
843     gcss = 6*extra;
844     c = (c & utf8_table3[extra]) << gcss;
845     for (gcii = 1; gcii <= extra; gcii++)
846     {
847     gcss -= 6;
848     c |= (pp[gcii] & 0x3f) << gcss;
849     }
850     }
851     }
852     else c = *((unsigned char *)pp);
853    
854 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
855 nigel 93 {
856     case 0x0a: /* LF */
857 ph10 149 case 0x0d: /* CR */
858     return p;
859 ph10 150
860 ph10 149 default:
861     break;
862 ph10 150 }
863 ph10 149
864     else switch (c)
865     {
866     case 0x0a: /* LF */
867 nigel 93 case 0x0b: /* VT */
868     case 0x0c: /* FF */
869     case 0x0d: /* CR */
870     case 0x85: /* NEL */
871     case 0x2028: /* LS */
872     case 0x2029: /* PS */
873     return p;
874    
875     default:
876     break;
877     }
878    
879     p = pp; /* Back one character */
880     } /* End of loop for ANY case */
881    
882     return startptr; /* Hit start of data */
883     } /* End of overall switch */
884     }
885    
886    
887    
888    
889    
890     /*************************************************
891 nigel 77 * Print the previous "after" lines *
892 nigel 49 *************************************************/
893    
894 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
895 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
896     that a binary zero does not terminate it.
897 nigel 77
898     Arguments:
899     lastmatchnumber the number of the last matching line, plus one
900     lastmatchrestart where we restarted after the last match
901     endptr end of available data
902     printname filename for printing
903    
904     Returns: nothing
905     */
906    
907     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
908     char *endptr, char *printname)
909     {
910     if (after_context > 0 && lastmatchnumber > 0)
911     {
912     int count = 0;
913     while (lastmatchrestart < endptr && count++ < after_context)
914     {
915 nigel 93 int ellength;
916 nigel 77 char *pp = lastmatchrestart;
917     if (printname != NULL) fprintf(stdout, "%s-", printname);
918     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
919 nigel 93 pp = end_of_line(pp, endptr, &ellength);
920 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
921 nigel 93 lastmatchrestart = pp;
922 nigel 77 }
923     hyphenpending = TRUE;
924     }
925     }
926    
927    
928    
929     /*************************************************
930 ph10 378 * Apply patterns to subject till one matches *
931     *************************************************/
932    
933 ph10 392 /* This function is called to run through all patterns, looking for a match. It
934     is used multiple times for the same subject when colouring is enabled, in order
935 ph10 378 to find all possible matches.
936    
937     Arguments:
938 ph10 632 matchptr the start of the subject
939     length the length of the subject to match
940     startoffset where to start matching
941     offsets the offets vector to fill in
942     mrc address of where to put the result of pcre_exec()
943 ph10 392
944     Returns: TRUE if there was a match
945 ph10 378 FALSE if there was no match
946     invert if there was a non-fatal error
947 ph10 392 */
948 ph10 378
949     static BOOL
950 ph10 632 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
951     int *mrc)
952 ph10 378 {
953     int i;
954 ph10 561 size_t slen = length;
955     const char *msg = "this text:\n\n";
956     if (slen > 200)
957     {
958     slen = 200;
959     msg = "text that starts:\n\n";
960 ph10 579 }
961 ph10 378 for (i = 0; i < pattern_count; i++)
962     {
963 ph10 632 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
964     startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
965 ph10 378 if (*mrc >= 0) return TRUE;
966     if (*mrc == PCRE_ERROR_NOMATCH) continue;
967 ph10 561 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
968 ph10 378 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
969 ph10 561 fprintf(stderr, "%s", msg);
970     FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
971     fprintf(stderr, "\n\n");
972     if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
973     resource_error = TRUE;
974 ph10 378 if (error_count++ > 20)
975     {
976 ph10 561 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
977     pcregrep_exit(2);
978 ph10 378 }
979     return invert; /* No more matching; don't show the line again */
980     }
981    
982     return FALSE; /* No match, no errors */
983     }
984    
985    
986    
987     /*************************************************
988 nigel 77 * Grep an individual file *
989     *************************************************/
990    
991     /* This is called from grep_or_recurse() below. It uses a buffer that is three
992     times the value of MBUFTHIRD. The matching point is never allowed to stray into
993     the top third of the buffer, thus keeping more of the file available for
994     context printing or for multiline scanning. For large files, the pointer will
995     be in the middle third most of the time, so the bottom third is available for
996     "before" context printing.
997    
998     Arguments:
999 ph10 286 handle the fopened FILE stream for a normal file
1000     the gzFile pointer when reading is via libz
1001     the BZFILE pointer when reading is via libbz2
1002     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1003 nigel 77 printname the file name if it is to be printed for each match
1004     or NULL if the file name is not to be printed
1005     it cannot be NULL if filenames[_nomatch]_only is set
1006    
1007     Returns: 0 if there was at least one match
1008     1 otherwise (no matches)
1009 ph10 286 2 if there is a read error on a .bz2 file
1010 nigel 77 */
1011    
1012 nigel 49 static int
1013 ph10 286 pcregrep(void *handle, int frtype, char *printname)
1014 nigel 49 {
1015     int rc = 1;
1016 nigel 77 int linenumber = 1;
1017     int lastmatchnumber = 0;
1018 nigel 49 int count = 0;
1019 ph10 280 int filepos = 0;
1020 ph10 378 int offsets[OFFSET_SIZE];
1021 nigel 77 char *lastmatchrestart = NULL;
1022     char buffer[3*MBUFTHIRD];
1023     char *ptr = buffer;
1024     char *endptr;
1025     size_t bufflength;
1026     BOOL endhyphenpending = FALSE;
1027 ph10 519 BOOL input_line_buffered = line_buffered;
1028 ph10 286 FILE *in = NULL; /* Ensure initialized */
1029 nigel 49
1030 ph10 286 #ifdef SUPPORT_LIBZ
1031     gzFile ingz = NULL;
1032     #endif
1033 nigel 77
1034 ph10 286 #ifdef SUPPORT_LIBBZ2
1035     BZFILE *inbz2 = NULL;
1036     #endif
1037    
1038    
1039     /* Do the first read into the start of the buffer and set up the pointer to end
1040     of what we have. In the case of libz, a non-zipped .gz file will be read as a
1041     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1042     fail. */
1043    
1044     #ifdef SUPPORT_LIBZ
1045     if (frtype == FR_LIBZ)
1046     {
1047     ingz = (gzFile)handle;
1048     bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1049     }
1050     else
1051     #endif
1052    
1053     #ifdef SUPPORT_LIBBZ2
1054     if (frtype == FR_LIBBZ2)
1055     {
1056     inbz2 = (BZFILE *)handle;
1057     bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1058     if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1059     } /* without the cast it is unsigned. */
1060     else
1061     #endif
1062    
1063     {
1064     in = (FILE *)handle;
1065 ph10 519 if (is_file_tty(in)) input_line_buffered = TRUE;
1066 ph10 535 bufflength = input_line_buffered?
1067 ph10 519 read_one_line(buffer, 3*MBUFTHIRD, in) :
1068     fread(buffer, 1, 3*MBUFTHIRD, in);
1069 ph10 286 }
1070 ph10 535
1071 nigel 77 endptr = buffer + bufflength;
1072    
1073     /* Loop while the current pointer is not at the end of the file. For large
1074     files, endptr will be at the end of the buffer when we are in the middle of the
1075     file, but ptr will never get there, because as soon as it gets over 2/3 of the
1076     way, the buffer is shifted left and re-filled. */
1077    
1078     while (ptr < endptr)
1079 nigel 49 {
1080 ph10 378 int endlinelength;
1081 nigel 87 int mrc = 0;
1082 ph10 632 int startoffset = 0;
1083 ph10 378 BOOL match;
1084 ph10 286 char *matchptr = ptr;
1085 nigel 77 char *t = ptr;
1086     size_t length, linelength;
1087 nigel 49
1088 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
1089     of the subject string to pass to pcre_exec(). In multiline mode, it is the
1090     length remainder of the data in the buffer. Otherwise, it is the length of
1091 ph10 378 the next line, excluding the terminating newline. After matching, we always
1092     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1093     option is used for compiling, so that any match is constrained to be in the
1094     first line. */
1095 nigel 77
1096 nigel 93 t = end_of_line(t, endptr, &endlinelength);
1097     linelength = t - ptr - endlinelength;
1098 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
1099 nigel 77
1100 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
1101    
1102     #ifdef JFRIEDL_DEBUG
1103     if (jfriedl_XT || jfriedl_XR)
1104     {
1105     #include <sys/time.h>
1106     #include <time.h>
1107     struct timeval start_time, end_time;
1108     struct timezone dummy;
1109 ph10 392 int i;
1110 nigel 89
1111     if (jfriedl_XT)
1112     {
1113     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1114     const char *orig = ptr;
1115     ptr = malloc(newlen + 1);
1116     if (!ptr) {
1117     printf("out of memory");
1118 ph10 561 pcregrep_exit(2);
1119 nigel 89 }
1120     endptr = ptr;
1121     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1122     for (i = 0; i < jfriedl_XT; i++) {
1123     strncpy(endptr, orig, length);
1124     endptr += length;
1125     }
1126     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1127     length = newlen;
1128     }
1129    
1130     if (gettimeofday(&start_time, &dummy) != 0)
1131     perror("bad gettimeofday");
1132    
1133    
1134     for (i = 0; i < jfriedl_XR; i++)
1135 ph10 392 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1136 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1137 nigel 89
1138     if (gettimeofday(&end_time, &dummy) != 0)
1139     perror("bad gettimeofday");
1140    
1141     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1142     -
1143     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1144    
1145     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1146     return 0;
1147     }
1148     #endif
1149    
1150 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
1151 ph10 279 in order to find any further matches in the same line. */
1152 nigel 89
1153 ph10 286 ONLY_MATCHING_RESTART:
1154    
1155 ph10 392 /* Run through all the patterns until one matches or there is an error other
1156 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1157     finding subsequent matches when colouring matched lines. */
1158 ph10 392
1159 ph10 632 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1160 nigel 77
1161 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1162 nigel 77
1163 nigel 49 if (match != invert)
1164     {
1165 nigel 77 BOOL hyphenprinted = FALSE;
1166    
1167 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1168 nigel 77
1169 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1170    
1171     /* Just count if just counting is wanted. */
1172    
1173 nigel 49 if (count_only) count++;
1174    
1175 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1176     in the file. */
1177    
1178 ph10 420 else if (filenames == FN_MATCH_ONLY)
1179 nigel 49 {
1180 nigel 77 fprintf(stdout, "%s\n", printname);
1181 nigel 49 return 0;
1182     }
1183    
1184 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1185    
1186 nigel 77 else if (quiet) return 0;
1187 nigel 49
1188 ph10 579 /* The --only-matching option prints just the substring that matched, or a
1189 ph10 565 captured portion of it, as long as this string is not empty, and the
1190     --file-offsets and --line-offsets options output offsets for the matching
1191     substring (they both force --only-matching = 0). None of these options
1192 ph10 636 prints any context. Afterwards, adjust the start and then jump back to look
1193     for further matches in the same line. If we are in invert mode, however,
1194     nothing is printed and we do not restart - this could still be useful
1195     because the return code is set. */
1196 nigel 87
1197 ph10 565 else if (only_matching >= 0)
1198 nigel 87 {
1199 ph10 279 if (!invert)
1200 ph10 286 {
1201 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1202     if (number) fprintf(stdout, "%d:", linenumber);
1203 ph10 280 if (line_offsets)
1204 ph10 565 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1205 ph10 286 offsets[1] - offsets[0]);
1206 ph10 280 else if (file_offsets)
1207 ph10 579 fprintf(stdout, "%d,%d\n",
1208 ph10 565 (int)(filepos + matchptr + offsets[0] - ptr),
1209 ph10 286 offsets[1] - offsets[0]);
1210 ph10 565 else if (only_matching < mrc)
1211 ph10 377 {
1212 ph10 565 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1213     if (plen > 0)
1214 ph10 579 {
1215 ph10 565 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1216     FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1217     if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1218     fprintf(stdout, "\n");
1219 ph10 579 }
1220 ph10 392 }
1221 ph10 565 else if (printname != NULL || number) fprintf(stdout, "\n");
1222 ph10 286 match = FALSE;
1223 ph10 564 if (line_buffered) fflush(stdout);
1224 ph10 636 rc = 0; /* Had some success */
1225     startoffset = offsets[1]; /* Restart after the match */
1226 ph10 286 goto ONLY_MATCHING_RESTART;
1227     }
1228 nigel 87 }
1229    
1230     /* This is the default case when none of the above options is set. We print
1231     the matching lines(s), possibly preceded and/or followed by other lines of
1232     context. */
1233    
1234 nigel 49 else
1235     {
1236 nigel 77 /* See if there is a requirement to print some "after" lines from a
1237     previous match. We never print any overlaps. */
1238    
1239     if (after_context > 0 && lastmatchnumber > 0)
1240     {
1241 nigel 93 int ellength;
1242 nigel 77 int linecount = 0;
1243     char *p = lastmatchrestart;
1244    
1245     while (p < ptr && linecount < after_context)
1246     {
1247 nigel 93 p = end_of_line(p, ptr, &ellength);
1248 nigel 77 linecount++;
1249     }
1250    
1251     /* It is important to advance lastmatchrestart during this printing so
1252 nigel 87 that it interacts correctly with any "before" printing below. Print
1253     each line's data using fwrite() in case there are binary zeroes. */
1254 nigel 77
1255     while (lastmatchrestart < p)
1256     {
1257     char *pp = lastmatchrestart;
1258     if (printname != NULL) fprintf(stdout, "%s-", printname);
1259     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1260 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1261 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1262 nigel 93 lastmatchrestart = pp;
1263 nigel 77 }
1264     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1265     }
1266    
1267     /* If there were non-contiguous lines printed above, insert hyphens. */
1268    
1269     if (hyphenpending)
1270     {
1271     fprintf(stdout, "--\n");
1272     hyphenpending = FALSE;
1273     hyphenprinted = TRUE;
1274     }
1275    
1276     /* See if there is a requirement to print some "before" lines for this
1277     match. Again, don't print overlaps. */
1278    
1279     if (before_context > 0)
1280     {
1281     int linecount = 0;
1282     char *p = ptr;
1283    
1284     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1285 nigel 87 linecount < before_context)
1286 nigel 77 {
1287 nigel 87 linecount++;
1288 nigel 93 p = previous_line(p, buffer);
1289 nigel 77 }
1290    
1291     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1292     fprintf(stdout, "--\n");
1293    
1294     while (p < ptr)
1295     {
1296 nigel 93 int ellength;
1297 nigel 77 char *pp = p;
1298     if (printname != NULL) fprintf(stdout, "%s-", printname);
1299     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1300 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1301 ph10 515 FWRITE(p, 1, pp - p, stdout);
1302 nigel 93 p = pp;
1303 nigel 77 }
1304     }
1305    
1306     /* Now print the matching line(s); ensure we set hyphenpending at the end
1307 nigel 85 of the file if any context lines are being output. */
1308 nigel 77
1309 nigel 85 if (after_context > 0 || before_context > 0)
1310     endhyphenpending = TRUE;
1311    
1312 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1313 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1314 nigel 77
1315     /* In multiline mode, we want to print to the end of the line in which
1316     the end of the matched string is found, so we adjust linelength and the
1317 ph10 222 line number appropriately, but only when there actually was a match
1318     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1319     the match will always be before the first newline sequence. */
1320 nigel 77
1321 ph10 587 if (multiline & !invert)
1322 nigel 77 {
1323 ph10 587 char *endmatch = ptr + offsets[1];
1324     t = ptr;
1325     while (t < endmatch)
1326 nigel 93 {
1327 ph10 587 t = end_of_line(t, endptr, &endlinelength);
1328     if (t < endmatch) linenumber++; else break;
1329 nigel 93 }
1330 ph10 587 linelength = t - ptr - endlinelength;
1331 nigel 77 }
1332    
1333 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1334     zeroes are treated as just another data character. */
1335    
1336     /* This extra option, for Jeffrey Friedl's debugging requirements,
1337     replaces the matched string, or a specific captured string if it exists,
1338     with X. When this happens, colouring is ignored. */
1339    
1340     #ifdef JFRIEDL_DEBUG
1341     if (S_arg >= 0 && S_arg < mrc)
1342     {
1343     int first = S_arg * 2;
1344     int last = first + 1;
1345 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1346 nigel 87 fprintf(stdout, "X");
1347 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1348 nigel 87 }
1349     else
1350     #endif
1351    
1352 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1353 ph10 585 matches, but not of course if the line is a non-match. */
1354 ph10 589
1355 ph10 585 if (do_colour && !invert)
1356 nigel 87 {
1357 ph10 589 int plength;
1358 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1359 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1360 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1361 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1362 ph10 378 for (;;)
1363     {
1364 ph10 632 startoffset = offsets[1];
1365 ph10 636 if (startoffset >= linelength + endlinelength ||
1366 ph10 632 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1367     break;
1368     FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1369 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1370 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1371 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1372     }
1373 ph10 587
1374     /* In multiline mode, we may have already printed the complete line
1375 ph10 589 and its line-ending characters (if they matched the pattern), so there
1376 ph10 587 may be no more to print. */
1377 ph10 589
1378 ph10 636 plength = (linelength + endlinelength) - startoffset;
1379     if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1380 nigel 87 }
1381 ph10 392
1382 ph10 378 /* Not colouring; no need to search for further matches */
1383 ph10 392
1384 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1385 nigel 49 }
1386    
1387 ph10 519 /* End of doing what has to be done for a match. If --line-buffered was
1388     given, flush the output. */
1389 nigel 87
1390 ph10 519 if (line_buffered) fflush(stdout);
1391 nigel 77 rc = 0; /* Had some success */
1392    
1393     /* Remember where the last match happened for after_context. We remember
1394     where we are about to restart, and that line's number. */
1395    
1396 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1397 nigel 77 lastmatchnumber = linenumber + 1;
1398 nigel 49 }
1399 nigel 77
1400 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1401     anything to be printed), we have to move on to the end of the match before
1402     proceeding. */
1403    
1404     if (multiline && invert && match)
1405     {
1406     int ellength;
1407     char *endmatch = ptr + offsets[1];
1408     t = ptr;
1409     while (t < endmatch)
1410     {
1411     t = end_of_line(t, endptr, &ellength);
1412     if (t <= endmatch) linenumber++; else break;
1413     }
1414     endmatch = end_of_line(endmatch, endptr, &ellength);
1415     linelength = endmatch - ptr - ellength;
1416     }
1417    
1418 ph10 286 /* Advance to after the newline and increment the line number. The file
1419 ph10 280 offset to the current line is maintained in filepos. */
1420 nigel 77
1421 nigel 93 ptr += linelength + endlinelength;
1422 ph10 530 filepos += (int)(linelength + endlinelength);
1423 nigel 77 linenumber++;
1424 ph10 535
1425     /* If input is line buffered, and the buffer is not yet full, read another
1426 ph10 519 line and add it into the buffer. */
1427 ph10 535
1428 ph10 519 if (input_line_buffered && bufflength < sizeof(buffer))
1429     {
1430     int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1431     bufflength += add;
1432 ph10 535 endptr += add;
1433     }
1434 nigel 77
1435     /* If we haven't yet reached the end of the file (the buffer is full), and
1436     the current point is in the top 1/3 of the buffer, slide the buffer down by
1437     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1438     about to be lost, print them. */
1439    
1440     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1441     {
1442     if (after_context > 0 &&
1443     lastmatchnumber > 0 &&
1444     lastmatchrestart < buffer + MBUFTHIRD)
1445     {
1446     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1447     lastmatchnumber = 0;
1448     }
1449    
1450     /* Now do the shuffle */
1451    
1452     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1453     ptr -= MBUFTHIRD;
1454 ph10 286
1455     #ifdef SUPPORT_LIBZ
1456     if (frtype == FR_LIBZ)
1457     bufflength = 2*MBUFTHIRD +
1458     gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1459     else
1460     #endif
1461    
1462     #ifdef SUPPORT_LIBBZ2
1463     if (frtype == FR_LIBBZ2)
1464     bufflength = 2*MBUFTHIRD +
1465     BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1466     else
1467     #endif
1468    
1469 ph10 535 bufflength = 2*MBUFTHIRD +
1470     (input_line_buffered?
1471     read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1472 ph10 519 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1473 nigel 77 endptr = buffer + bufflength;
1474    
1475     /* Adjust any last match point */
1476    
1477     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1478     }
1479     } /* Loop through the whole file */
1480    
1481     /* End of file; print final "after" lines if wanted; do_after_lines sets
1482     hyphenpending if it prints something. */
1483    
1484 ph10 565 if (only_matching < 0 && !count_only)
1485 nigel 87 {
1486     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1487     hyphenpending |= endhyphenpending;
1488     }
1489 nigel 77
1490     /* Print the file name if we are looking for those without matches and there
1491     were none. If we found a match, we won't have got this far. */
1492    
1493 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1494 nigel 77 {
1495     fprintf(stdout, "%s\n", printname);
1496     return 0;
1497 nigel 49 }
1498    
1499 nigel 77 /* Print the match count if wanted */
1500    
1501 nigel 49 if (count_only)
1502     {
1503 ph10 420 if (count > 0 || !omit_zero_count)
1504 ph10 461 {
1505     if (printname != NULL && filenames != FN_NONE)
1506 ph10 420 fprintf(stdout, "%s:", printname);
1507     fprintf(stdout, "%d\n", count);
1508 ph10 461 }
1509 nigel 49 }
1510    
1511     return rc;
1512     }
1513    
1514    
1515    
1516     /*************************************************
1517 nigel 53 * Grep a file or recurse into a directory *
1518     *************************************************/
1519    
1520 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1521     recursing; if it's a file, grep it.
1522    
1523     Arguments:
1524     pathname the path to investigate
1525 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1526 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1527    
1528     Returns: 0 if there was at least one match
1529     1 if there were no matches
1530     2 there was some kind of error
1531    
1532     However, file opening failures are suppressed if "silent" is set.
1533     */
1534    
1535 nigel 53 static int
1536 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1537 nigel 53 {
1538     int rc = 1;
1539     int sep;
1540 ph10 286 int frtype;
1541     int pathlen;
1542     void *handle;
1543     FILE *in = NULL; /* Ensure initialized */
1544 nigel 53
1545 ph10 286 #ifdef SUPPORT_LIBZ
1546     gzFile ingz = NULL;
1547     #endif
1548    
1549     #ifdef SUPPORT_LIBBZ2
1550     BZFILE *inbz2 = NULL;
1551     #endif
1552    
1553 nigel 77 /* If the file name is "-" we scan stdin */
1554 nigel 53
1555 nigel 77 if (strcmp(pathname, "-") == 0)
1556 nigel 53 {
1557 ph10 286 return pcregrep(stdin, FR_PLAIN,
1558 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1559 nigel 77 stdin_name : NULL);
1560     }
1561    
1562 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1563 ph10 325 each file and directory within it, subject to any include or exclude patterns
1564     that were set. The scanning code is localized so it can be made
1565     system-specific. */
1566 nigel 87
1567     if ((sep = isdirectory(pathname)) != 0)
1568 nigel 77 {
1569 nigel 87 if (dee_action == dee_SKIP) return 1;
1570     if (dee_action == dee_RECURSE)
1571 nigel 53 {
1572 nigel 87 char buffer[1024];
1573     char *nextfile;
1574     directory_type *dir = opendirectory(pathname);
1575 nigel 53
1576 nigel 87 if (dir == NULL)
1577     {
1578     if (!silent)
1579     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1580     strerror(errno));
1581     return 2;
1582     }
1583 nigel 77
1584 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1585     {
1586 ph10 324 int frc, nflen;
1587 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1588 ph10 530 nflen = (int)(strlen(nextfile));
1589 ph10 345
1590 ph10 325 if (isdirectory(buffer))
1591     {
1592     if (exclude_dir_compiled != NULL &&
1593     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1594     continue;
1595 ph10 345
1596 ph10 325 if (include_dir_compiled != NULL &&
1597     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1598     continue;
1599     }
1600 ph10 345 else
1601     {
1602 ph10 324 if (exclude_compiled != NULL &&
1603     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1604     continue;
1605 ph10 345
1606 ph10 324 if (include_compiled != NULL &&
1607     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1608     continue;
1609 ph10 345 }
1610 nigel 77
1611 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1612     if (frc > 1) rc = frc;
1613     else if (frc == 0 && rc == 1) rc = 0;
1614     }
1615    
1616     closedirectory(dir);
1617     return rc;
1618 nigel 53 }
1619     }
1620    
1621 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1622     been requested. */
1623 nigel 53
1624 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1625    
1626     /* Control reaches here if we have a regular file, or if we have a directory
1627     and recursion or skipping was not requested, or if we have anything else and
1628     skipping was not requested. The scan proceeds. If this is the first and only
1629     argument at top level, we don't show the file name, unless we are only showing
1630     the file name, or the filename was forced (-H). */
1631    
1632 ph10 530 pathlen = (int)(strlen(pathname));
1633 ph10 286
1634     /* Open using zlib if it is supported and the file name ends with .gz. */
1635    
1636     #ifdef SUPPORT_LIBZ
1637     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1638 nigel 53 {
1639 ph10 286 ingz = gzopen(pathname, "rb");
1640     if (ingz == NULL)
1641     {
1642     if (!silent)
1643     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1644     strerror(errno));
1645     return 2;
1646     }
1647     handle = (void *)ingz;
1648     frtype = FR_LIBZ;
1649     }
1650     else
1651     #endif
1652    
1653     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1654    
1655     #ifdef SUPPORT_LIBBZ2
1656     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1657     {
1658     inbz2 = BZ2_bzopen(pathname, "rb");
1659     handle = (void *)inbz2;
1660     frtype = FR_LIBBZ2;
1661     }
1662     else
1663     #endif
1664    
1665     /* Otherwise use plain fopen(). The label is so that we can come back here if
1666     an attempt to read a .bz2 file indicates that it really is a plain file. */
1667    
1668     #ifdef SUPPORT_LIBBZ2
1669     PLAIN_FILE:
1670     #endif
1671     {
1672 ph10 419 in = fopen(pathname, "rb");
1673 ph10 286 handle = (void *)in;
1674     frtype = FR_PLAIN;
1675     }
1676    
1677     /* All the opening methods return errno when they fail. */
1678    
1679     if (handle == NULL)
1680     {
1681 nigel 77 if (!silent)
1682     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1683     strerror(errno));
1684 nigel 53 return 2;
1685     }
1686    
1687 ph10 286 /* Now grep the file */
1688    
1689     rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1690 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1691 nigel 77
1692 ph10 286 /* Close in an appropriate manner. */
1693    
1694     #ifdef SUPPORT_LIBZ
1695     if (frtype == FR_LIBZ)
1696     gzclose(ingz);
1697     else
1698     #endif
1699    
1700     /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1701     read failed. If the error indicates that the file isn't in fact bzipped, try
1702     again as a normal file. */
1703    
1704     #ifdef SUPPORT_LIBBZ2
1705     if (frtype == FR_LIBBZ2)
1706     {
1707     if (rc == 2)
1708     {
1709     int errnum;
1710     const char *err = BZ2_bzerror(inbz2, &errnum);
1711     if (errnum == BZ_DATA_ERROR_MAGIC)
1712     {
1713     BZ2_bzclose(inbz2);
1714     goto PLAIN_FILE;
1715     }
1716     else if (!silent)
1717     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1718     pathname, err);
1719     }
1720     BZ2_bzclose(inbz2);
1721     }
1722     else
1723     #endif
1724    
1725     /* Normal file close */
1726    
1727 nigel 53 fclose(in);
1728 ph10 286
1729     /* Pass back the yield from pcregrep(). */
1730    
1731 nigel 53 return rc;
1732     }
1733    
1734    
1735    
1736    
1737     /*************************************************
1738 nigel 49 * Usage function *
1739     *************************************************/
1740    
1741     static int
1742     usage(int rc)
1743     {
1744 nigel 87 option_item *op;
1745     fprintf(stderr, "Usage: pcregrep [-");
1746     for (op = optionlist; op->one_char != 0; op++)
1747     {
1748     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1749     }
1750     fprintf(stderr, "] [long options] [pattern] [files]\n");
1751 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1752     "options.\n");
1753 nigel 49 return rc;
1754     }
1755    
1756    
1757    
1758    
1759     /*************************************************
1760 nigel 53 * Help function *
1761     *************************************************/
1762    
1763     static void
1764     help(void)
1765     {
1766     option_item *op;
1767    
1768 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1769 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1770 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1771 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1772    
1773     #ifdef SUPPORT_LIBZ
1774     printf("Files whose names end in .gz are read using zlib.\n");
1775     #endif
1776    
1777     #ifdef SUPPORT_LIBBZ2
1778     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1779     #endif
1780    
1781     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1782     printf("Other files and the standard input are read as plain files.\n\n");
1783     #else
1784     printf("All files are read as plain files, without any interpretation.\n\n");
1785     #endif
1786    
1787 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1788     printf("Options:\n");
1789    
1790     for (op = optionlist; op->one_char != 0; op++)
1791     {
1792     int n;
1793     char s[4];
1794 ph10 579
1795 ph10 571 /* Two options were accidentally implemented and documented with underscores
1796     instead of hyphens in their names, something that was not noticed for quite a
1797 ph10 579 few releases. When fixing this, I left the underscored versions in the list
1798     in case people were using them. However, we don't want to display them in the
1799     help data. There are no other options that contain underscores, and we do not
1800     expect ever to implement such options. Therefore, just omit any option that
1801 ph10 571 contains an underscore. */
1802 ph10 579
1803     if (strchr(op->long_name, '_') != NULL) continue;
1804    
1805 nigel 53 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1806 ph10 571 n = 31 - printf(" %s --%s", s, op->long_name);
1807 nigel 53 if (n < 1) n = 1;
1808 ph10 571 printf("%.*s%s\n", n, " ", op->help_text);
1809 nigel 53 }
1810    
1811 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1812     printf("trailing white space is removed and blank lines are ignored.\n");
1813     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1814 nigel 53
1815 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1816 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1817     }
1818    
1819    
1820    
1821    
1822     /*************************************************
1823 nigel 77 * Handle a single-letter, no data option *
1824 nigel 53 *************************************************/
1825    
1826     static int
1827     handle_option(int letter, int options)
1828     {
1829     switch(letter)
1830     {
1831 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1832 ph10 561 case N_HELP: help(); pcregrep_exit(0);
1833 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1834 ph10 535 case N_LBUFFER: line_buffered = TRUE; break;
1835 nigel 53 case 'c': count_only = TRUE; break;
1836 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1837     case 'H': filenames = FN_FORCE; break;
1838     case 'h': filenames = FN_NONE; break;
1839 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1840 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1841 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
1842 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1843 nigel 53 case 'n': number = TRUE; break;
1844 ph10 565 case 'o': only_matching = 0; break;
1845 nigel 77 case 'q': quiet = TRUE; break;
1846 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1847 nigel 53 case 's': silent = TRUE; break;
1848 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1849 nigel 53 case 'v': invert = TRUE; break;
1850 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1851     case 'x': process_options |= PO_LINE_MATCH; break;
1852 nigel 53
1853     case 'V':
1854 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1855 ph10 561 pcregrep_exit(0);
1856 nigel 53 break;
1857    
1858     default:
1859     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1860 ph10 561 pcregrep_exit(usage(2));
1861 nigel 53 }
1862    
1863     return options;
1864     }
1865    
1866    
1867    
1868    
1869     /*************************************************
1870 nigel 87 * Construct printed ordinal *
1871     *************************************************/
1872    
1873     /* This turns a number into "1st", "3rd", etc. */
1874    
1875     static char *
1876     ordin(int n)
1877     {
1878     static char buffer[8];
1879     char *p = buffer;
1880     sprintf(p, "%d", n);
1881     while (*p != 0) p++;
1882     switch (n%10)
1883     {
1884     case 1: strcpy(p, "st"); break;
1885     case 2: strcpy(p, "nd"); break;
1886     case 3: strcpy(p, "rd"); break;
1887     default: strcpy(p, "th"); break;
1888     }
1889     return buffer;
1890     }
1891    
1892    
1893    
1894     /*************************************************
1895     * Compile a single pattern *
1896     *************************************************/
1897    
1898     /* When the -F option has been used, this is called for each substring.
1899     Otherwise it's called for each supplied pattern.
1900    
1901     Arguments:
1902     pattern the pattern string
1903     options the PCRE options
1904     filename the file name, or NULL for a command-line pattern
1905     count 0 if this is the only command line pattern, or
1906     number of the command line pattern, or
1907     linenumber for a pattern from a file
1908    
1909     Returns: TRUE on success, FALSE after an error
1910     */
1911    
1912     static BOOL
1913     compile_single_pattern(char *pattern, int options, char *filename, int count)
1914     {
1915     char buffer[MBUFTHIRD + 16];
1916     const char *error;
1917     int errptr;
1918    
1919     if (pattern_count >= MAX_PATTERN_COUNT)
1920     {
1921     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1922     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1923     return FALSE;
1924     }
1925    
1926     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1927     suffix[process_options]);
1928     pattern_list[pattern_count] =
1929     pcre_compile(buffer, options, &error, &errptr, pcretables);
1930 ph10 142 if (pattern_list[pattern_count] != NULL)
1931 ph10 141 {
1932 ph10 142 pattern_count++;
1933 ph10 141 return TRUE;
1934 ph10 142 }
1935 nigel 87
1936     /* Handle compile errors */
1937    
1938     errptr -= (int)strlen(prefix[process_options]);
1939     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1940    
1941     if (filename == NULL)
1942     {
1943     if (count == 0)
1944     fprintf(stderr, "pcregrep: Error in command-line regex "
1945     "at offset %d: %s\n", errptr, error);
1946     else
1947     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1948     "at offset %d: %s\n", ordin(count), errptr, error);
1949     }
1950     else
1951     {
1952     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1953     "at offset %d: %s\n", count, filename, errptr, error);
1954     }
1955    
1956     return FALSE;
1957     }
1958    
1959    
1960    
1961     /*************************************************
1962     * Compile one supplied pattern *
1963     *************************************************/
1964    
1965     /* When the -F option has been used, each string may be a list of strings,
1966 nigel 91 separated by line breaks. They will be matched literally.
1967 nigel 87
1968     Arguments:
1969     pattern the pattern string
1970     options the PCRE options
1971     filename the file name, or NULL for a command-line pattern
1972     count 0 if this is the only command line pattern, or
1973     number of the command line pattern, or
1974     linenumber for a pattern from a file
1975    
1976     Returns: TRUE on success, FALSE after an error
1977     */
1978    
1979     static BOOL
1980     compile_pattern(char *pattern, int options, char *filename, int count)
1981     {
1982     if ((process_options & PO_FIXED_STRINGS) != 0)
1983     {
1984 nigel 93 char *eop = pattern + strlen(pattern);
1985 nigel 87 char buffer[MBUFTHIRD];
1986     for(;;)
1987     {
1988 nigel 93 int ellength;
1989     char *p = end_of_line(pattern, eop, &ellength);
1990     if (ellength == 0)
1991 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1992 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1993 nigel 93 pattern = p;
1994 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1995     return FALSE;
1996     }
1997     }
1998     else return compile_single_pattern(pattern, options, filename, count);
1999     }
2000    
2001    
2002    
2003     /*************************************************
2004 nigel 49 * Main program *
2005     *************************************************/
2006    
2007 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2008    
2009 nigel 49 int
2010     main(int argc, char **argv)
2011     {
2012 nigel 53 int i, j;
2013 nigel 49 int rc = 1;
2014 nigel 87 int pcre_options = 0;
2015     int cmd_pattern_count = 0;
2016 ph10 141 int hint_count = 0;
2017 nigel 49 int errptr;
2018 nigel 87 BOOL only_one_at_top;
2019     char *patterns[MAX_PATTERN_COUNT];
2020     const char *locale_from = "--locale";
2021 nigel 49 const char *error;
2022    
2023 nigel 93 /* Set the default line ending value from the default in the PCRE library;
2024     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2025 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
2026 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
2027 ph10 391 rather than escapes such as as '\r'. */
2028 nigel 91
2029     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2030     switch(i)
2031     {
2032 ph10 391 default: newline = (char *)"lf"; break;
2033     case 13: newline = (char *)"cr"; break;
2034     case (13 << 8) | 10: newline = (char *)"crlf"; break;
2035     case -1: newline = (char *)"any"; break;
2036     case -2: newline = (char *)"anycrlf"; break;
2037 nigel 91 }
2038    
2039 nigel 49 /* Process the options */
2040    
2041     for (i = 1; i < argc; i++)
2042     {
2043 nigel 77 option_item *op = NULL;
2044     char *option_data = (char *)""; /* default to keep compiler happy */
2045     BOOL longop;
2046     BOOL longopwasequals = FALSE;
2047    
2048 nigel 49 if (argv[i][0] != '-') break;
2049 nigel 53
2050 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2051 nigel 87 but only if we have previously had -e or -f to define the patterns. */
2052 nigel 63
2053 nigel 77 if (argv[i][1] == 0)
2054     {
2055 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
2056 ph10 561 else pcregrep_exit(usage(2));
2057 nigel 77 }
2058 nigel 63
2059 nigel 77 /* Handle a long name option, or -- to terminate the options */
2060 nigel 53
2061     if (argv[i][1] == '-')
2062 nigel 49 {
2063 nigel 77 char *arg = argv[i] + 2;
2064     char *argequals = strchr(arg, '=');
2065 nigel 53
2066 nigel 77 if (*arg == 0) /* -- terminates options */
2067 nigel 49 {
2068 nigel 77 i++;
2069     break; /* out of the options-handling loop */
2070 nigel 53 }
2071 nigel 49
2072 nigel 77 longop = TRUE;
2073    
2074     /* Some long options have data that follows after =, for example file=name.
2075     Some options have variations in the long name spelling: specifically, we
2076     allow "regexp" because GNU grep allows it, though I personally go along
2077 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2078 ph10 422 These options are entered in the table as "regex(p)". Options can be in
2079     both these categories. */
2080 nigel 77
2081 nigel 53 for (op = optionlist; op->one_char != 0; op++)
2082     {
2083 nigel 77 char *opbra = strchr(op->long_name, '(');
2084     char *equals = strchr(op->long_name, '=');
2085 ph10 461
2086 ph10 422 /* Handle options with only one spelling of the name */
2087 ph10 461
2088 ph10 422 if (opbra == NULL) /* Does not contain '(' */
2089 nigel 53 {
2090 nigel 77 if (equals == NULL) /* Not thing=data case */
2091     {
2092     if (strcmp(arg, op->long_name) == 0) break;
2093     }
2094     else /* Special case xxx=data */
2095     {
2096 ph10 530 int oplen = (int)(equals - op->long_name);
2097 ph10 535 int arglen = (argequals == NULL)?
2098 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2099 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2100     {
2101     option_data = arg + arglen;
2102     if (*option_data == '=')
2103     {
2104     option_data++;
2105     longopwasequals = TRUE;
2106     }
2107     break;
2108     }
2109     }
2110 nigel 53 }
2111 ph10 461
2112 ph10 422 /* Handle options with an alternate spelling of the name */
2113 ph10 461
2114     else
2115 nigel 77 {
2116     char buff1[24];
2117     char buff2[24];
2118 ph10 461
2119 ph10 530 int baselen = (int)(opbra - op->long_name);
2120     int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2121 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
2122 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2123 ph10 461
2124 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
2125 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2126 ph10 461
2127     if (strncmp(arg, buff1, arglen) == 0 ||
2128 ph10 422 strncmp(arg, buff2, arglen) == 0)
2129     {
2130     if (equals != NULL && argequals != NULL)
2131     {
2132 ph10 461 option_data = argequals;
2133 ph10 422 if (*option_data == '=')
2134     {
2135 ph10 461 option_data++;
2136 ph10 422 longopwasequals = TRUE;
2137 ph10 461 }
2138     }
2139 nigel 77 break;
2140 ph10 461 }
2141 nigel 77 }
2142 nigel 53 }
2143 nigel 77
2144 nigel 53 if (op->one_char == 0)
2145     {
2146     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2147 ph10 561 pcregrep_exit(usage(2));
2148 nigel 53 }
2149     }
2150 nigel 49
2151 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2152     are not in the right form for putting in the option table because they use
2153     only one hyphen, yet are more than one character long. By putting them
2154     separately here, they will not get displayed as part of the help() output,
2155     but I don't think Jeffrey will care about that. */
2156    
2157     #ifdef JFRIEDL_DEBUG
2158     else if (strcmp(argv[i], "-pre") == 0) {
2159     jfriedl_prefix = argv[++i];
2160     continue;
2161     } else if (strcmp(argv[i], "-post") == 0) {
2162     jfriedl_postfix = argv[++i];
2163     continue;
2164     } else if (strcmp(argv[i], "-XT") == 0) {
2165     sscanf(argv[++i], "%d", &jfriedl_XT);
2166     continue;
2167     } else if (strcmp(argv[i], "-XR") == 0) {
2168     sscanf(argv[++i], "%d", &jfriedl_XR);
2169     continue;
2170     }
2171     #endif
2172    
2173    
2174 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2175     continue till we hit the last one or one that needs data. */
2176 nigel 53
2177     else
2178     {
2179     char *s = argv[i] + 1;
2180 nigel 77 longop = FALSE;
2181 nigel 53 while (*s != 0)
2182     {
2183 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2184 ph10 579 {
2185     if (*s == op->one_char) break;
2186 ph10 565 }
2187 nigel 77 if (op->one_char == 0)
2188 nigel 53 {
2189 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2190     *s, argv[i]);
2191 ph10 561 pcregrep_exit(usage(2));
2192 nigel 77 }
2193 ph10 579
2194 ph10 565 /* Check for a single-character option that has data: OP_OP_NUMBER
2195 ph10 579 is used for one that either has a numerical number or defaults, i.e. the
2196 ph10 565 data is optional. If a digit follows, there is data; if not, carry on
2197     with other single-character options in the same string. */
2198 ph10 579
2199 ph10 565 option_data = s+1;
2200     if (op->type == OP_OP_NUMBER)
2201 ph10 579 {
2202     if (isdigit((unsigned char)s[1])) break;
2203 nigel 53 }
2204 ph10 565 else /* Check for end or a dataless option */
2205 ph10 579 {
2206 ph10 565 if (op->type != OP_NODATA || s[1] == 0) break;
2207 ph10 579 }
2208    
2209     /* Handle a single-character option with no data, then loop for the
2210 ph10 565 next character in the string. */
2211    
2212 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2213 nigel 49 }
2214     }
2215 nigel 77
2216 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2217     is NO_DATA, it means that there is no data, and the option might set
2218     something in the PCRE options. */
2219 nigel 77
2220     if (op->type == OP_NODATA)
2221     {
2222 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2223     continue;
2224     }
2225    
2226     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2227     either has a value or defaults to something. It cannot have data in a
2228 ph10 579 separate item. At the moment, the only such options are "colo(u)r",
2229 ph10 565 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2230 nigel 87
2231     if (*option_data == 0 &&
2232     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2233     {
2234     switch (op->one_char)
2235 nigel 77 {
2236 nigel 87 case N_COLOUR:
2237     colour_option = (char *)"auto";
2238     break;
2239 ph10 579
2240 ph10 565 case 'o':
2241     only_matching = 0;
2242 ph10 579 break;
2243    
2244 nigel 87 #ifdef JFRIEDL_DEBUG
2245     case 'S':
2246     S_arg = 0;
2247     break;
2248     #endif
2249 nigel 77 }
2250 nigel 87 continue;
2251     }
2252 nigel 77
2253 nigel 87 /* Otherwise, find the data string for the option. */
2254    
2255     if (*option_data == 0)
2256     {
2257     if (i >= argc - 1 || longopwasequals)
2258 nigel 77 {
2259 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2260 ph10 561 pcregrep_exit(usage(2));
2261 nigel 87 }
2262     option_data = argv[++i];
2263     }
2264    
2265     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2266     multiple times to create a list of patterns. */
2267    
2268     if (op->type == OP_PATLIST)
2269     {
2270     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2271     {
2272     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2273     MAX_PATTERN_COUNT);
2274     return 2;
2275     }
2276     patterns[cmd_pattern_count++] = option_data;
2277     }
2278    
2279     /* Otherwise, deal with single string or numeric data values. */
2280    
2281 ph10 584 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2282     op->type != OP_OP_NUMBER)
2283 nigel 87 {
2284     *((char **)op->dataptr) = option_data;
2285     }
2286 ph10 558
2287     /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2288     only for unpicking arguments, so just keep it simple. */
2289    
2290 nigel 87 else
2291     {
2292 ph10 561 unsigned long int n = 0;
2293 ph10 558 char *endptr = option_data;
2294     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2295     while (isdigit((unsigned char)(*endptr)))
2296     n = n * 10 + (int)(*endptr++ - '0');
2297 nigel 87 if (*endptr != 0)
2298     {
2299     if (longop)
2300 nigel 77 {
2301 nigel 87 char *equals = strchr(op->long_name, '=');
2302     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2303 ph10 530 (int)(equals - op->long_name);
2304 nigel 87 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2305     option_data, nlen, op->long_name);
2306 nigel 77 }
2307 nigel 87 else
2308     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2309     option_data, op->one_char);
2310 ph10 561 pcregrep_exit(usage(2));
2311 nigel 77 }
2312 ph10 584 if (op->type == OP_LONGNUMBER)
2313     *((unsigned long int *)op->dataptr) = n;
2314     else
2315     *((int *)op->dataptr) = n;
2316 nigel 77 }
2317 nigel 49 }
2318    
2319 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2320     for -A and -B. */
2321    
2322     if (both_context > 0)
2323     {
2324     if (after_context == 0) after_context = both_context;
2325     if (before_context == 0) before_context = both_context;
2326     }
2327 ph10 286
2328     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2329 ph10 565 However, the latter two set only_matching. */
2330 nigel 77
2331 ph10 565 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2332 ph10 286 (file_offsets && line_offsets))
2333 ph10 280 {
2334     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2335     "and/or --line-offsets\n");
2336 ph10 561 pcregrep_exit(usage(2));
2337 ph10 280 }
2338    
2339 ph10 565 if (file_offsets || line_offsets) only_matching = 0;
2340 ph10 286
2341 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2342     LC_ALL environment variable is set, and if so, use it. */
2343 nigel 49
2344 nigel 87 if (locale == NULL)
2345 nigel 53 {
2346 nigel 87 locale = getenv("LC_ALL");
2347     locale_from = "LCC_ALL";
2348 nigel 53 }
2349 nigel 49
2350 nigel 87 if (locale == NULL)
2351     {
2352     locale = getenv("LC_CTYPE");
2353     locale_from = "LC_CTYPE";
2354     }
2355 nigel 49
2356 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2357     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2358    
2359     if (locale != NULL)
2360 nigel 49 {
2361 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2362 nigel 53 {
2363 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2364     locale, locale_from);
2365 nigel 53 return 2;
2366     }
2367 nigel 87 pcretables = pcre_maketables();
2368     }
2369 nigel 77
2370 nigel 87 /* Sort out colouring */
2371    
2372     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2373     {
2374     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2375     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2376     else
2377 nigel 53 {
2378 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2379     colour_option);
2380     return 2;
2381 nigel 77 }
2382 nigel 87 if (do_colour)
2383 nigel 77 {
2384 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2385     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2386     if (cs != NULL) colour_string = cs;
2387 nigel 77 }
2388 nigel 87 }
2389 ph10 535
2390 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2391    
2392     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2393     {
2394     pcre_options |= PCRE_NEWLINE_CR;
2395 nigel 93 endlinetype = EL_CR;
2396 nigel 91 }
2397     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2398     {
2399     pcre_options |= PCRE_NEWLINE_LF;
2400 nigel 93 endlinetype = EL_LF;
2401 nigel 91 }
2402     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2403     {
2404     pcre_options |= PCRE_NEWLINE_CRLF;
2405 nigel 93 endlinetype = EL_CRLF;
2406 nigel 91 }
2407 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2408     {
2409     pcre_options |= PCRE_NEWLINE_ANY;
2410     endlinetype = EL_ANY;
2411     }
2412 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2413     {
2414     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2415     endlinetype = EL_ANYCRLF;
2416     }
2417 nigel 91 else
2418     {
2419     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2420     return 2;
2421     }
2422    
2423 nigel 87 /* Interpret the text values for -d and -D */
2424    
2425     if (dee_option != NULL)
2426     {
2427     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2428     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2429     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2430     else
2431 nigel 77 {
2432 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2433     return 2;
2434 nigel 53 }
2435 nigel 49 }
2436    
2437 nigel 87 if (DEE_option != NULL)
2438     {
2439     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2440     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2441     else
2442     {
2443     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2444     return 2;
2445     }
2446     }
2447 nigel 49
2448 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2449 nigel 87
2450     #ifdef JFRIEDL_DEBUG
2451     if (S_arg > 9)
2452 nigel 49 {
2453 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2454     return 2;
2455     }
2456 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2457     {
2458     if (jfriedl_XT == 0) jfriedl_XT = 1;
2459     if (jfriedl_XR == 0) jfriedl_XR = 1;
2460     }
2461 nigel 87 #endif
2462 nigel 77
2463 nigel 87 /* Get memory to store the pattern and hints lists. */
2464    
2465     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2466     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2467    
2468     if (pattern_list == NULL || hints_list == NULL)
2469     {
2470     fprintf(stderr, "pcregrep: malloc failed\n");
2471 ph10 123 goto EXIT2;
2472 nigel 87 }
2473    
2474     /* If no patterns were provided by -e, and there is no file provided by -f,
2475     the first argument is the one and only pattern, and it must exist. */
2476    
2477     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2478     {
2479 nigel 63 if (i >= argc) return usage(2);
2480 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2481     }
2482 nigel 77
2483 nigel 87 /* Compile the patterns that were provided on the command line, either by
2484     multiple uses of -e or as a single unkeyed pattern. */
2485    
2486     for (j = 0; j < cmd_pattern_count; j++)
2487     {
2488     if (!compile_pattern(patterns[j], pcre_options, NULL,
2489     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2490 ph10 123 goto EXIT2;
2491 nigel 87 }
2492    
2493     /* Compile the regular expressions that are provided in a file. */
2494    
2495     if (pattern_filename != NULL)
2496     {
2497     int linenumber = 0;
2498     FILE *f;
2499     char *filename;
2500     char buffer[MBUFTHIRD];
2501    
2502     if (strcmp(pattern_filename, "-") == 0)
2503 nigel 77 {
2504 nigel 87 f = stdin;
2505     filename = stdin_name;
2506 nigel 77 }
2507 nigel 87 else
2508 nigel 77 {
2509 nigel 87 f = fopen(pattern_filename, "r");
2510     if (f == NULL)
2511     {
2512     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2513     strerror(errno));
2514 ph10 123 goto EXIT2;
2515 nigel 87 }
2516     filename = pattern_filename;
2517 nigel 77 }
2518    
2519 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2520 nigel 53 {
2521 nigel 87 char *s = buffer + (int)strlen(buffer);
2522     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2523     *s = 0;
2524     linenumber++;
2525     if (buffer[0] == 0) continue; /* Skip blank lines */
2526     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2527 ph10 121 goto EXIT2;
2528 nigel 53 }
2529 nigel 87
2530     if (f != stdin) fclose(f);
2531 nigel 49 }
2532    
2533 nigel 77 /* Study the regular expressions, as we will be running them many times */
2534 nigel 53
2535     for (j = 0; j < pattern_count; j++)
2536     {
2537     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2538     if (error != NULL)
2539     {
2540     char s[16];
2541     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2542     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2543 ph10 121 goto EXIT2;
2544 nigel 53 }
2545 ph10 142 hint_count++;
2546 nigel 53 }
2547 ph10 579
2548 ph10 561 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2549     pcre_extra block for each pattern. */
2550 nigel 53
2551 ph10 561 if (match_limit > 0 || match_limit_recursion > 0)
2552     {
2553     for (j = 0; j < pattern_count; j++)
2554     {
2555     if (hints_list[j] == NULL)
2556     {
2557     hints_list[j] = malloc(sizeof(pcre_extra));
2558 ph10 579 if (hints_list[j] == NULL)
2559 ph10 561 {
2560     fprintf(stderr, "pcregrep: malloc failed\n");
2561     pcregrep_exit(2);
2562     }
2563     }
2564     if (match_limit > 0)
2565 ph10 579 {
2566 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2567     hints_list[j]->match_limit = match_limit;
2568 ph10 579 }
2569 ph10 561 if (match_limit_recursion > 0)
2570 ph10 579 {
2571 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2572     hints_list[j]->match_limit_recursion = match_limit_recursion;
2573 ph10 579 }
2574 ph10 561 }
2575 ph10 579 }
2576 ph10 561
2577 nigel 77 /* If there are include or exclude patterns, compile them. */
2578    
2579     if (exclude_pattern != NULL)
2580     {
2581 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2582     pcretables);
2583 nigel 77 if (exclude_compiled == NULL)
2584     {
2585     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2586     errptr, error);
2587 ph10 121 goto EXIT2;
2588 nigel 77 }
2589     }
2590    
2591     if (include_pattern != NULL)
2592     {
2593 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2594     pcretables);
2595 nigel 77 if (include_compiled == NULL)
2596     {
2597     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2598     errptr, error);
2599 ph10 121 goto EXIT2;
2600 nigel 77 }
2601     }
2602    
2603 ph10 325 if (exclude_dir_pattern != NULL)
2604     {
2605     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2606     pcretables);
2607     if (exclude_dir_compiled == NULL)
2608     {
2609     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2610     errptr, error);
2611     goto EXIT2;
2612     }
2613     }
2614    
2615     if (include_dir_pattern != NULL)
2616     {
2617     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2618     pcretables);
2619     if (include_dir_compiled == NULL)
2620     {
2621     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2622     errptr, error);
2623     goto EXIT2;
2624     }
2625     }
2626    
2627 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2628 nigel 49
2629 nigel 87 if (i >= argc)
2630 ph10 121 {
2631 ph10 286 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2632 ph10 121 goto EXIT;
2633 ph10 123 }
2634 nigel 49
2635 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2636     Pass in the fact that there is only one argument at top level - this suppresses
2637 nigel 87 the file name if the argument is not a directory and filenames are not
2638     otherwise forced. */
2639 nigel 49
2640 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2641 nigel 49
2642     for (; i < argc; i++)
2643     {
2644 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2645     only_one_at_top);
2646 nigel 77 if (frc > 1) rc = frc;
2647     else if (frc == 0 && rc == 1) rc = 0;
2648 nigel 49 }
2649    
2650 ph10 121 EXIT:
2651     if (pattern_list != NULL)
2652     {
2653 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2654 ph10 121 free(pattern_list);
2655 ph10 123 }
2656 ph10 121 if (hints_list != NULL)
2657     {
2658 ph10 579 for (i = 0; i < hint_count; i++)
2659 ph10 561 {
2660     if (hints_list[i] != NULL) free(hints_list[i]);
2661 ph10 579 }
2662 ph10 121 free(hints_list);
2663 ph10 123 }
2664 ph10 561 pcregrep_exit(rc);
2665 ph10 121
2666     EXIT2:
2667     rc = 2;
2668     goto EXIT;
2669 nigel 49 }
2670    
2671 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12