/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 632 - (hide annotations) (download)
Fri Jul 22 17:47:49 2011 UTC (2 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 77500 byte(s)
Fix pcregrep repeated match in same line bug.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 584 Copyright (c) 1997-2011 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 ph10 378 #define OFFSET_SIZE 99
75 nigel 49
76 nigel 77 #if BUFSIZ > 8192
77     #define MBUFTHIRD BUFSIZ
78     #else
79     #define MBUFTHIRD 8192
80     #endif
81 nigel 49
82 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
83     output. The order is important; it is assumed that a file name is wanted for
84     all values greater than FN_DEFAULT. */
85 nigel 77
86 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87 nigel 87
88 ph10 286 /* File reading styles */
89    
90     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92 nigel 87 /* Actions for the -d and -D options */
93    
94     enum { dee_READ, dee_SKIP, dee_RECURSE };
95     enum { DEE_READ, DEE_SKIP };
96    
97     /* Actions for special processing options (flag bits) */
98    
99     #define PO_WORD_MATCH 0x0001
100     #define PO_LINE_MATCH 0x0002
101     #define PO_FIXED_STRINGS 0x0004
102    
103 nigel 93 /* Line ending types */
104 nigel 87
105 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 nigel 87
107 ph10 535 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108     environments), a warning is issued if the value of fwrite() is ignored.
109     Unfortunately, casting to (void) does not suppress the warning. To get round
110     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 ph10 515 apply to fprintf(). */
112 nigel 93
113 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114 nigel 93
115 ph10 515
116    
117 nigel 49 /*************************************************
118     * Global variables *
119     *************************************************/
120    
121 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
122     regular code. */
123    
124     #ifdef JFRIEDL_DEBUG
125     static int S_arg = -1;
126 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128     static const char *jfriedl_prefix = "";
129     static const char *jfriedl_postfix = "";
130 nigel 87 #endif
131    
132 nigel 93 static int endlinetype;
133 nigel 91
134 nigel 87 static char *colour_string = (char *)"1;31";
135     static char *colour_option = NULL;
136     static char *dee_option = NULL;
137     static char *DEE_option = NULL;
138 nigel 91 static char *newline = NULL;
139 nigel 53 static char *pattern_filename = NULL;
140 nigel 77 static char *stdin_name = (char *)"(standard input)";
141 nigel 87 static char *locale = NULL;
142    
143     static const unsigned char *pcretables = NULL;
144    
145 nigel 53 static int pattern_count = 0;
146 ph10 121 static pcre **pattern_list = NULL;
147     static pcre_extra **hints_list = NULL;
148 nigel 49
149 nigel 77 static char *include_pattern = NULL;
150     static char *exclude_pattern = NULL;
151 ph10 325 static char *include_dir_pattern = NULL;
152     static char *exclude_dir_pattern = NULL;
153 nigel 77
154     static pcre *include_compiled = NULL;
155     static pcre *exclude_compiled = NULL;
156 ph10 325 static pcre *include_dir_compiled = NULL;
157     static pcre *exclude_dir_compiled = NULL;
158 nigel 77
159     static int after_context = 0;
160     static int before_context = 0;
161     static int both_context = 0;
162 nigel 87 static int dee_action = dee_READ;
163     static int DEE_action = DEE_READ;
164     static int error_count = 0;
165     static int filenames = FN_DEFAULT;
166 ph10 565 static int only_matching = -1;
167 nigel 87 static int process_options = 0;
168 nigel 77
169 ph10 561 static unsigned long int match_limit = 0;
170     static unsigned long int match_limit_recursion = 0;
171    
172 nigel 49 static BOOL count_only = FALSE;
173 nigel 87 static BOOL do_colour = FALSE;
174 ph10 280 static BOOL file_offsets = FALSE;
175 nigel 77 static BOOL hyphenpending = FALSE;
176 nigel 49 static BOOL invert = FALSE;
177 ph10 519 static BOOL line_buffered = FALSE;
178 ph10 280 static BOOL line_offsets = FALSE;
179 nigel 77 static BOOL multiline = FALSE;
180 nigel 49 static BOOL number = FALSE;
181 ph10 420 static BOOL omit_zero_count = FALSE;
182 ph10 561 static BOOL resource_error = FALSE;
183 nigel 77 static BOOL quiet = FALSE;
184 nigel 49 static BOOL silent = FALSE;
185 nigel 93 static BOOL utf8 = FALSE;
186 nigel 49
187 nigel 53 /* Structure for options and list of them */
188 nigel 49
189 ph10 584 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
190     OP_OP_NUMBER, OP_PATLIST };
191 nigel 77
192 nigel 53 typedef struct option_item {
193 nigel 77 int type;
194 nigel 53 int one_char;
195 nigel 77 void *dataptr;
196 nigel 67 const char *long_name;
197     const char *help_text;
198 nigel 53 } option_item;
199 nigel 49
200 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
201     used to identify them. */
202    
203 ph10 325 #define N_COLOUR (-1)
204     #define N_EXCLUDE (-2)
205     #define N_EXCLUDE_DIR (-3)
206     #define N_HELP (-4)
207     #define N_INCLUDE (-5)
208     #define N_INCLUDE_DIR (-6)
209     #define N_LABEL (-7)
210     #define N_LOCALE (-8)
211     #define N_NULL (-9)
212     #define N_LOFFSETS (-10)
213     #define N_FOFFSETS (-11)
214 ph10 519 #define N_LBUFFER (-12)
215 ph10 561 #define N_M_LIMIT (-13)
216     #define N_M_LIMIT_REC (-14)
217 nigel 87
218 nigel 53 static option_item optionlist[] = {
219 ph10 584 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
220     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
221     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
222     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
223     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
224     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
225     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
226     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
227     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
228     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
229     { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
230     { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
231     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
232     { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
233     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
234     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
235     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
236     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
237     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
238     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
239     { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
240     { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
241     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
242     { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
243     { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
244     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
245     { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
246     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
247     { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
248     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
249     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
250     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
251     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
252     { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
253     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
254 ph10 571
255     /* These two were accidentally implemented with underscores instead of
256     hyphens in the option names. As this was not discovered for several releases,
257     the incorrect versions are left in the table for compatibility. However, the
258     --help function misses out any option that has an underscore in its name. */
259 ph10 579
260 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
261     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
262 ph10 571
263 nigel 87 #ifdef JFRIEDL_DEBUG
264     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
265     #endif
266     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
267     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
268     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
269     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
270     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
271     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
272     { OP_NODATA, 0, NULL, NULL, NULL }
273 nigel 53 };
274    
275 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
276     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
277     that the combination of -w and -x has the same effect as -x on its own, so we
278     can treat them as the same. */
279 nigel 53
280 nigel 87 static const char *prefix[] = {
281     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
282    
283     static const char *suffix[] = {
284     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
285    
286 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
287 nigel 87
288 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
289 nigel 87
290 nigel 93 const char utf8_table4[] = {
291     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
292     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
293     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
294     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
295    
296    
297    
298 nigel 53 /*************************************************
299 ph10 586 * Exit from the program *
300     *************************************************/
301    
302     /* If there has been a resource error, give a suitable message.
303    
304     Argument: the return code
305     Returns: does not return
306     */
307    
308     static void
309     pcregrep_exit(int rc)
310     {
311     if (resource_error)
312     {
313     fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
314     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
315     fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
316     }
317    
318     exit(rc);
319     }
320    
321    
322     /*************************************************
323 nigel 87 * OS-specific functions *
324 nigel 53 *************************************************/
325    
326     /* These functions are defined so that they can be made system specific,
327 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
328 nigel 53
329    
330     /************* Directory scanning in Unix ***********/
331    
332 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
333 nigel 53 #include <sys/types.h>
334     #include <sys/stat.h>
335     #include <dirent.h>
336    
337     typedef DIR directory_type;
338    
339 nigel 67 static int
340 nigel 53 isdirectory(char *filename)
341     {
342     struct stat statbuf;
343     if (stat(filename, &statbuf) < 0)
344     return 0; /* In the expectation that opening as a file will fail */
345     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
346     }
347    
348 nigel 67 static directory_type *
349 nigel 53 opendirectory(char *filename)
350     {
351     return opendir(filename);
352     }
353    
354 nigel 67 static char *
355 nigel 53 readdirectory(directory_type *dir)
356     {
357     for (;;)
358     {
359     struct dirent *dent = readdir(dir);
360     if (dent == NULL) return NULL;
361     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
362     return dent->d_name;
363     }
364 ph10 151 /* Control never reaches here */
365 nigel 53 }
366    
367 nigel 67 static void
368 nigel 53 closedirectory(directory_type *dir)
369     {
370     closedir(dir);
371     }
372    
373    
374 nigel 87 /************* Test for regular file in Unix **********/
375    
376     static int
377     isregfile(char *filename)
378     {
379     struct stat statbuf;
380     if (stat(filename, &statbuf) < 0)
381     return 1; /* In the expectation that opening as a file will fail */
382     return (statbuf.st_mode & S_IFMT) == S_IFREG;
383     }
384    
385    
386 ph10 519 /************* Test for a terminal in Unix **********/
387 nigel 87
388     static BOOL
389     is_stdout_tty(void)
390     {
391     return isatty(fileno(stdout));
392     }
393    
394 ph10 519 static BOOL
395     is_file_tty(FILE *f)
396     {
397     return isatty(fileno(f));
398     }
399 nigel 87
400 ph10 519
401 nigel 63 /************* Directory scanning in Win32 ***********/
402 nigel 53
403 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
404 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
405 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
406     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
407 ph10 558 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
408     undefined when it is indeed undefined. */
409 nigel 53
410 ph10 558 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
411 nigel 63
412     #ifndef STRICT
413     # define STRICT
414     #endif
415     #ifndef WIN32_LEAN_AND_MEAN
416     # define WIN32_LEAN_AND_MEAN
417     #endif
418 ph10 283
419     #include <windows.h>
420    
421 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
422     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
423     #endif
424    
425 nigel 63 typedef struct directory_type
426     {
427     HANDLE handle;
428     BOOL first;
429     WIN32_FIND_DATA data;
430     } directory_type;
431    
432     int
433     isdirectory(char *filename)
434     {
435     DWORD attr = GetFileAttributes(filename);
436     if (attr == INVALID_FILE_ATTRIBUTES)
437     return 0;
438     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
439     }
440    
441     directory_type *
442     opendirectory(char *filename)
443     {
444     size_t len;
445     char *pattern;
446     directory_type *dir;
447     DWORD err;
448     len = strlen(filename);
449     pattern = (char *) malloc(len + 3);
450     dir = (directory_type *) malloc(sizeof(*dir));
451     if ((pattern == NULL) || (dir == NULL))
452     {
453     fprintf(stderr, "pcregrep: malloc failed\n");
454 ph10 561 pcregrep_exit(2);
455 nigel 63 }
456     memcpy(pattern, filename, len);
457     memcpy(&(pattern[len]), "\\*", 3);
458     dir->handle = FindFirstFile(pattern, &(dir->data));
459     if (dir->handle != INVALID_HANDLE_VALUE)
460     {
461     free(pattern);
462     dir->first = TRUE;
463     return dir;
464     }
465     err = GetLastError();
466     free(pattern);
467     free(dir);
468     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
469     return NULL;
470     }
471    
472     char *
473     readdirectory(directory_type *dir)
474     {
475     for (;;)
476     {
477     if (!dir->first)
478     {
479     if (!FindNextFile(dir->handle, &(dir->data)))
480     return NULL;
481     }
482     else
483     {
484     dir->first = FALSE;
485     }
486     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
487     return dir->data.cFileName;
488     }
489     #ifndef _MSC_VER
490     return NULL; /* Keep compiler happy; never executed */
491     #endif
492     }
493    
494     void
495     closedirectory(directory_type *dir)
496     {
497     FindClose(dir->handle);
498     free(dir);
499     }
500    
501    
502 nigel 87 /************* Test for regular file in Win32 **********/
503    
504     /* I don't know how to do this, or if it can be done; assume all paths are
505     regular if they are not directories. */
506    
507     int isregfile(char *filename)
508     {
509 ph10 283 return !isdirectory(filename);
510 nigel 87 }
511    
512    
513 ph10 519 /************* Test for a terminal in Win32 **********/
514 nigel 87
515     /* I don't know how to do this; assume never */
516    
517     static BOOL
518     is_stdout_tty(void)
519     {
520 ph10 283 return FALSE;
521 nigel 87 }
522    
523 ph10 519 static BOOL
524     is_file_tty(FILE *f)
525     {
526     return FALSE;
527     }
528 nigel 87
529 ph10 519
530 nigel 53 /************* Directory scanning when we can't do it ***********/
531    
532     /* The type is void, and apart from isdirectory(), the functions do nothing. */
533    
534 nigel 63 #else
535    
536 nigel 53 typedef void directory_type;
537    
538 nigel 87 int isdirectory(char *filename) { return 0; }
539 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
540     char *readdirectory(directory_type *dir) { return (char*)0;}
541 nigel 53 void closedirectory(directory_type *dir) {}
542    
543 nigel 87
544     /************* Test for regular when we can't do it **********/
545    
546     /* Assume all files are regular. */
547    
548     int isregfile(char *filename) { return 1; }
549    
550    
551 ph10 519 /************* Test for a terminal when we can't do it **********/
552 nigel 87
553     static BOOL
554     is_stdout_tty(void)
555     {
556     return FALSE;
557     }
558    
559 ph10 519 static BOOL
560     is_file_tty(FILE *f)
561     {
562     return FALSE;
563     }
564 nigel 87
565 nigel 53 #endif
566    
567    
568    
569 ph10 137 #ifndef HAVE_STRERROR
570 nigel 49 /*************************************************
571     * Provide strerror() for non-ANSI libraries *
572     *************************************************/
573    
574     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
575     in their libraries, but can provide the same facility by this simple
576     alternative function. */
577    
578     extern int sys_nerr;
579     extern char *sys_errlist[];
580    
581     char *
582     strerror(int n)
583     {
584     if (n < 0 || n >= sys_nerr) return "unknown error number";
585     return sys_errlist[n];
586     }
587     #endif /* HAVE_STRERROR */
588    
589    
590    
591     /*************************************************
592 ph10 519 * Read one line of input *
593     *************************************************/
594    
595 ph10 535 /* Normally, input is read using fread() into a large buffer, so many lines may
596     be read at once. However, doing this for tty input means that no output appears
597 ph10 519 until a lot of input has been typed. Instead, tty input is handled line by
598     line. We cannot use fgets() for this, because it does not stop at a binary
599 ph10 535 zero, and therefore there is no way of telling how many characters it has read,
600 ph10 519 because there may be binary zeros embedded in the data.
601    
602     Arguments:
603     buffer the buffer to read into
604     length the maximum number of characters to read
605     f the file
606 ph10 535
607 ph10 519 Returns: the number of characters read, zero at end of file
608 ph10 535 */
609 ph10 519
610     static int
611     read_one_line(char *buffer, int length, FILE *f)
612     {
613     int c;
614     int yield = 0;
615     while ((c = fgetc(f)) != EOF)
616     {
617     buffer[yield++] = c;
618 ph10 535 if (c == '\n' || yield >= length) break;
619     }
620     return yield;
621 ph10 519 }
622    
623    
624    
625     /*************************************************
626 nigel 93 * Find end of line *
627     *************************************************/
628    
629     /* The length of the endline sequence that is found is set via lenptr. This may
630     be zero at the very end of the file if there is no line-ending sequence there.
631    
632     Arguments:
633     p current position in line
634     endptr end of available data
635     lenptr where to put the length of the eol sequence
636    
637 ph10 587 Returns: pointer to the last byte of the line, including the newline byte(s)
638 nigel 93 */
639    
640     static char *
641     end_of_line(char *p, char *endptr, int *lenptr)
642     {
643     switch(endlinetype)
644     {
645     default: /* Just in case */
646     case EL_LF:
647     while (p < endptr && *p != '\n') p++;
648     if (p < endptr)
649     {
650     *lenptr = 1;
651     return p + 1;
652     }
653     *lenptr = 0;
654     return endptr;
655    
656     case EL_CR:
657     while (p < endptr && *p != '\r') p++;
658     if (p < endptr)
659     {
660     *lenptr = 1;
661     return p + 1;
662     }
663     *lenptr = 0;
664     return endptr;
665    
666     case EL_CRLF:
667     for (;;)
668     {
669     while (p < endptr && *p != '\r') p++;
670     if (++p >= endptr)
671     {
672     *lenptr = 0;
673     return endptr;
674     }
675     if (*p == '\n')
676     {
677     *lenptr = 2;
678     return p + 1;
679     }
680     }
681     break;
682    
683 ph10 149 case EL_ANYCRLF:
684     while (p < endptr)
685     {
686     int extra = 0;
687     register int c = *((unsigned char *)p);
688    
689     if (utf8 && c >= 0xc0)
690     {
691     int gcii, gcss;
692     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
693     gcss = 6*extra;
694     c = (c & utf8_table3[extra]) << gcss;
695     for (gcii = 1; gcii <= extra; gcii++)
696     {
697     gcss -= 6;
698     c |= (p[gcii] & 0x3f) << gcss;
699     }
700     }
701    
702     p += 1 + extra;
703    
704     switch (c)
705     {
706     case 0x0a: /* LF */
707     *lenptr = 1;
708     return p;
709    
710     case 0x0d: /* CR */
711     if (p < endptr && *p == 0x0a)
712     {
713     *lenptr = 2;
714     p++;
715     }
716     else *lenptr = 1;
717     return p;
718 ph10 150
719 ph10 149 default:
720     break;
721     }
722     } /* End of loop for ANYCRLF case */
723 ph10 150
724 ph10 149 *lenptr = 0; /* Must have hit the end */
725     return endptr;
726    
727 nigel 93 case EL_ANY:
728     while (p < endptr)
729     {
730     int extra = 0;
731     register int c = *((unsigned char *)p);
732    
733     if (utf8 && c >= 0xc0)
734     {
735     int gcii, gcss;
736     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
737     gcss = 6*extra;
738     c = (c & utf8_table3[extra]) << gcss;
739     for (gcii = 1; gcii <= extra; gcii++)
740     {
741     gcss -= 6;
742     c |= (p[gcii] & 0x3f) << gcss;
743     }
744     }
745    
746     p += 1 + extra;
747    
748     switch (c)
749     {
750     case 0x0a: /* LF */
751     case 0x0b: /* VT */
752     case 0x0c: /* FF */
753     *lenptr = 1;
754     return p;
755    
756     case 0x0d: /* CR */
757     if (p < endptr && *p == 0x0a)
758     {
759     *lenptr = 2;
760     p++;
761     }
762     else *lenptr = 1;
763     return p;
764    
765     case 0x85: /* NEL */
766     *lenptr = utf8? 2 : 1;
767     return p;
768    
769     case 0x2028: /* LS */
770     case 0x2029: /* PS */
771     *lenptr = 3;
772     return p;
773    
774     default:
775     break;
776     }
777     } /* End of loop for ANY case */
778    
779     *lenptr = 0; /* Must have hit the end */
780     return endptr;
781     } /* End of overall switch */
782     }
783    
784    
785    
786     /*************************************************
787     * Find start of previous line *
788     *************************************************/
789    
790     /* This is called when looking back for before lines to print.
791    
792     Arguments:
793     p start of the subsequent line
794     startptr start of available data
795    
796     Returns: pointer to the start of the previous line
797     */
798    
799     static char *
800     previous_line(char *p, char *startptr)
801     {
802     switch(endlinetype)
803     {
804     default: /* Just in case */
805     case EL_LF:
806     p--;
807     while (p > startptr && p[-1] != '\n') p--;
808     return p;
809    
810     case EL_CR:
811     p--;
812     while (p > startptr && p[-1] != '\n') p--;
813     return p;
814    
815     case EL_CRLF:
816     for (;;)
817     {
818     p -= 2;
819     while (p > startptr && p[-1] != '\n') p--;
820     if (p <= startptr + 1 || p[-2] == '\r') return p;
821     }
822     return p; /* But control should never get here */
823    
824     case EL_ANY:
825 ph10 150 case EL_ANYCRLF:
826 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
827     if (utf8) while ((*p & 0xc0) == 0x80) p--;
828    
829     while (p > startptr)
830     {
831     register int c;
832     char *pp = p - 1;
833    
834     if (utf8)
835     {
836     int extra = 0;
837     while ((*pp & 0xc0) == 0x80) pp--;
838     c = *((unsigned char *)pp);
839     if (c >= 0xc0)
840     {
841     int gcii, gcss;
842     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
843     gcss = 6*extra;
844     c = (c & utf8_table3[extra]) << gcss;
845     for (gcii = 1; gcii <= extra; gcii++)
846     {
847     gcss -= 6;
848     c |= (pp[gcii] & 0x3f) << gcss;
849     }
850     }
851     }
852     else c = *((unsigned char *)pp);
853    
854 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
855 nigel 93 {
856     case 0x0a: /* LF */
857 ph10 149 case 0x0d: /* CR */
858     return p;
859 ph10 150
860 ph10 149 default:
861     break;
862 ph10 150 }
863 ph10 149
864     else switch (c)
865     {
866     case 0x0a: /* LF */
867 nigel 93 case 0x0b: /* VT */
868     case 0x0c: /* FF */
869     case 0x0d: /* CR */
870     case 0x85: /* NEL */
871     case 0x2028: /* LS */
872     case 0x2029: /* PS */
873     return p;
874    
875     default:
876     break;
877     }
878    
879     p = pp; /* Back one character */
880     } /* End of loop for ANY case */
881    
882     return startptr; /* Hit start of data */
883     } /* End of overall switch */
884     }
885    
886    
887    
888    
889    
890     /*************************************************
891 nigel 77 * Print the previous "after" lines *
892 nigel 49 *************************************************/
893    
894 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
895 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
896     that a binary zero does not terminate it.
897 nigel 77
898     Arguments:
899     lastmatchnumber the number of the last matching line, plus one
900     lastmatchrestart where we restarted after the last match
901     endptr end of available data
902     printname filename for printing
903    
904     Returns: nothing
905     */
906    
907     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
908     char *endptr, char *printname)
909     {
910     if (after_context > 0 && lastmatchnumber > 0)
911     {
912     int count = 0;
913     while (lastmatchrestart < endptr && count++ < after_context)
914     {
915 nigel 93 int ellength;
916 nigel 77 char *pp = lastmatchrestart;
917     if (printname != NULL) fprintf(stdout, "%s-", printname);
918     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
919 nigel 93 pp = end_of_line(pp, endptr, &ellength);
920 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
921 nigel 93 lastmatchrestart = pp;
922 nigel 77 }
923     hyphenpending = TRUE;
924     }
925     }
926    
927    
928    
929     /*************************************************
930 ph10 378 * Apply patterns to subject till one matches *
931     *************************************************/
932    
933 ph10 392 /* This function is called to run through all patterns, looking for a match. It
934     is used multiple times for the same subject when colouring is enabled, in order
935 ph10 378 to find all possible matches.
936    
937     Arguments:
938 ph10 632 matchptr the start of the subject
939     length the length of the subject to match
940     startoffset where to start matching
941     offsets the offets vector to fill in
942     mrc address of where to put the result of pcre_exec()
943 ph10 392
944     Returns: TRUE if there was a match
945 ph10 378 FALSE if there was no match
946     invert if there was a non-fatal error
947 ph10 392 */
948 ph10 378
949     static BOOL
950 ph10 632 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
951     int *mrc)
952 ph10 378 {
953     int i;
954 ph10 561 size_t slen = length;
955     const char *msg = "this text:\n\n";
956     if (slen > 200)
957     {
958     slen = 200;
959     msg = "text that starts:\n\n";
960 ph10 579 }
961 ph10 378 for (i = 0; i < pattern_count; i++)
962     {
963 ph10 632 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
964     startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
965 ph10 378 if (*mrc >= 0) return TRUE;
966     if (*mrc == PCRE_ERROR_NOMATCH) continue;
967 ph10 561 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
968 ph10 378 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
969 ph10 561 fprintf(stderr, "%s", msg);
970     FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
971     fprintf(stderr, "\n\n");
972     if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
973     resource_error = TRUE;
974 ph10 378 if (error_count++ > 20)
975     {
976 ph10 561 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
977     pcregrep_exit(2);
978 ph10 378 }
979     return invert; /* No more matching; don't show the line again */
980     }
981    
982     return FALSE; /* No match, no errors */
983     }
984    
985    
986    
987     /*************************************************
988 nigel 77 * Grep an individual file *
989     *************************************************/
990    
991     /* This is called from grep_or_recurse() below. It uses a buffer that is three
992     times the value of MBUFTHIRD. The matching point is never allowed to stray into
993     the top third of the buffer, thus keeping more of the file available for
994     context printing or for multiline scanning. For large files, the pointer will
995     be in the middle third most of the time, so the bottom third is available for
996     "before" context printing.
997    
998     Arguments:
999 ph10 286 handle the fopened FILE stream for a normal file
1000     the gzFile pointer when reading is via libz
1001     the BZFILE pointer when reading is via libbz2
1002     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1003 nigel 77 printname the file name if it is to be printed for each match
1004     or NULL if the file name is not to be printed
1005     it cannot be NULL if filenames[_nomatch]_only is set
1006    
1007     Returns: 0 if there was at least one match
1008     1 otherwise (no matches)
1009 ph10 286 2 if there is a read error on a .bz2 file
1010 nigel 77 */
1011    
1012 nigel 49 static int
1013 ph10 286 pcregrep(void *handle, int frtype, char *printname)
1014 nigel 49 {
1015     int rc = 1;
1016 nigel 77 int linenumber = 1;
1017     int lastmatchnumber = 0;
1018 nigel 49 int count = 0;
1019 ph10 280 int filepos = 0;
1020 ph10 378 int offsets[OFFSET_SIZE];
1021 nigel 77 char *lastmatchrestart = NULL;
1022     char buffer[3*MBUFTHIRD];
1023     char *ptr = buffer;
1024     char *endptr;
1025     size_t bufflength;
1026     BOOL endhyphenpending = FALSE;
1027 ph10 519 BOOL input_line_buffered = line_buffered;
1028 ph10 286 FILE *in = NULL; /* Ensure initialized */
1029 nigel 49
1030 ph10 286 #ifdef SUPPORT_LIBZ
1031     gzFile ingz = NULL;
1032     #endif
1033 nigel 77
1034 ph10 286 #ifdef SUPPORT_LIBBZ2
1035     BZFILE *inbz2 = NULL;
1036     #endif
1037    
1038    
1039     /* Do the first read into the start of the buffer and set up the pointer to end
1040     of what we have. In the case of libz, a non-zipped .gz file will be read as a
1041     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1042     fail. */
1043    
1044     #ifdef SUPPORT_LIBZ
1045     if (frtype == FR_LIBZ)
1046     {
1047     ingz = (gzFile)handle;
1048     bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1049     }
1050     else
1051     #endif
1052    
1053     #ifdef SUPPORT_LIBBZ2
1054     if (frtype == FR_LIBBZ2)
1055     {
1056     inbz2 = (BZFILE *)handle;
1057     bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1058     if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1059     } /* without the cast it is unsigned. */
1060     else
1061     #endif
1062    
1063     {
1064     in = (FILE *)handle;
1065 ph10 519 if (is_file_tty(in)) input_line_buffered = TRUE;
1066 ph10 535 bufflength = input_line_buffered?
1067 ph10 519 read_one_line(buffer, 3*MBUFTHIRD, in) :
1068     fread(buffer, 1, 3*MBUFTHIRD, in);
1069 ph10 286 }
1070 ph10 535
1071 nigel 77 endptr = buffer + bufflength;
1072    
1073     /* Loop while the current pointer is not at the end of the file. For large
1074     files, endptr will be at the end of the buffer when we are in the middle of the
1075     file, but ptr will never get there, because as soon as it gets over 2/3 of the
1076     way, the buffer is shifted left and re-filled. */
1077    
1078     while (ptr < endptr)
1079 nigel 49 {
1080 ph10 378 int endlinelength;
1081 nigel 87 int mrc = 0;
1082 ph10 632 int startoffset = 0;
1083 ph10 378 BOOL match;
1084 ph10 286 char *matchptr = ptr;
1085 nigel 77 char *t = ptr;
1086     size_t length, linelength;
1087 nigel 49
1088 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
1089     of the subject string to pass to pcre_exec(). In multiline mode, it is the
1090     length remainder of the data in the buffer. Otherwise, it is the length of
1091 ph10 378 the next line, excluding the terminating newline. After matching, we always
1092     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1093     option is used for compiling, so that any match is constrained to be in the
1094     first line. */
1095 nigel 77
1096 nigel 93 t = end_of_line(t, endptr, &endlinelength);
1097     linelength = t - ptr - endlinelength;
1098 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
1099 nigel 77
1100 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
1101    
1102     #ifdef JFRIEDL_DEBUG
1103     if (jfriedl_XT || jfriedl_XR)
1104     {
1105     #include <sys/time.h>
1106     #include <time.h>
1107     struct timeval start_time, end_time;
1108     struct timezone dummy;
1109 ph10 392 int i;
1110 nigel 89
1111     if (jfriedl_XT)
1112     {
1113     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1114     const char *orig = ptr;
1115     ptr = malloc(newlen + 1);
1116     if (!ptr) {
1117     printf("out of memory");
1118 ph10 561 pcregrep_exit(2);
1119 nigel 89 }
1120     endptr = ptr;
1121     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1122     for (i = 0; i < jfriedl_XT; i++) {
1123     strncpy(endptr, orig, length);
1124     endptr += length;
1125     }
1126     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1127     length = newlen;
1128     }
1129    
1130     if (gettimeofday(&start_time, &dummy) != 0)
1131     perror("bad gettimeofday");
1132    
1133    
1134     for (i = 0; i < jfriedl_XR; i++)
1135 ph10 392 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1136 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1137 nigel 89
1138     if (gettimeofday(&end_time, &dummy) != 0)
1139     perror("bad gettimeofday");
1140    
1141     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1142     -
1143     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1144    
1145     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1146     return 0;
1147     }
1148     #endif
1149    
1150 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
1151 ph10 279 in order to find any further matches in the same line. */
1152 nigel 89
1153 ph10 286 ONLY_MATCHING_RESTART:
1154    
1155 ph10 392 /* Run through all the patterns until one matches or there is an error other
1156 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1157     finding subsequent matches when colouring matched lines. */
1158 ph10 392
1159 ph10 632 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1160 nigel 77
1161 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1162 nigel 77
1163 nigel 49 if (match != invert)
1164     {
1165 nigel 77 BOOL hyphenprinted = FALSE;
1166    
1167 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1168 nigel 77
1169 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1170    
1171     /* Just count if just counting is wanted. */
1172    
1173 nigel 49 if (count_only) count++;
1174    
1175 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1176     in the file. */
1177    
1178 ph10 420 else if (filenames == FN_MATCH_ONLY)
1179 nigel 49 {
1180 nigel 77 fprintf(stdout, "%s\n", printname);
1181 nigel 49 return 0;
1182     }
1183    
1184 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1185    
1186 nigel 77 else if (quiet) return 0;
1187 nigel 49
1188 ph10 579 /* The --only-matching option prints just the substring that matched, or a
1189 ph10 565 captured portion of it, as long as this string is not empty, and the
1190     --file-offsets and --line-offsets options output offsets for the matching
1191     substring (they both force --only-matching = 0). None of these options
1192 ph10 280 prints any context. Afterwards, adjust the start and length, and then jump
1193     back to look for further matches in the same line. If we are in invert
1194 ph10 565 mode, however, nothing is printed and we do not restart - this could still
1195     be useful because the return code is set. */
1196 nigel 87
1197 ph10 565 else if (only_matching >= 0)
1198 nigel 87 {
1199 ph10 279 if (!invert)
1200 ph10 286 {
1201 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1202     if (number) fprintf(stdout, "%d:", linenumber);
1203 ph10 280 if (line_offsets)
1204 ph10 565 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1205 ph10 286 offsets[1] - offsets[0]);
1206 ph10 280 else if (file_offsets)
1207 ph10 579 fprintf(stdout, "%d,%d\n",
1208 ph10 565 (int)(filepos + matchptr + offsets[0] - ptr),
1209 ph10 286 offsets[1] - offsets[0]);
1210 ph10 565 else if (only_matching < mrc)
1211 ph10 377 {
1212 ph10 565 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1213     if (plen > 0)
1214 ph10 579 {
1215 ph10 565 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1216     FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1217     if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1218     fprintf(stdout, "\n");
1219 ph10 579 }
1220 ph10 392 }
1221 ph10 565 else if (printname != NULL || number) fprintf(stdout, "\n");
1222 ph10 632 /*
1223 ph10 279 matchptr += offsets[1];
1224     length -= offsets[1];
1225 ph10 632 */
1226 ph10 286 match = FALSE;
1227 ph10 564 if (line_buffered) fflush(stdout);
1228 ph10 632 rc = 0; /* Had some success */
1229     startoffset = offsets[1];
1230 ph10 286 goto ONLY_MATCHING_RESTART;
1231     }
1232 nigel 87 }
1233    
1234     /* This is the default case when none of the above options is set. We print
1235     the matching lines(s), possibly preceded and/or followed by other lines of
1236     context. */
1237    
1238 nigel 49 else
1239     {
1240 nigel 77 /* See if there is a requirement to print some "after" lines from a
1241     previous match. We never print any overlaps. */
1242    
1243     if (after_context > 0 && lastmatchnumber > 0)
1244     {
1245 nigel 93 int ellength;
1246 nigel 77 int linecount = 0;
1247     char *p = lastmatchrestart;
1248    
1249     while (p < ptr && linecount < after_context)
1250     {
1251 nigel 93 p = end_of_line(p, ptr, &ellength);
1252 nigel 77 linecount++;
1253     }
1254    
1255     /* It is important to advance lastmatchrestart during this printing so
1256 nigel 87 that it interacts correctly with any "before" printing below. Print
1257     each line's data using fwrite() in case there are binary zeroes. */
1258 nigel 77
1259     while (lastmatchrestart < p)
1260     {
1261     char *pp = lastmatchrestart;
1262     if (printname != NULL) fprintf(stdout, "%s-", printname);
1263     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1264 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1265 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1266 nigel 93 lastmatchrestart = pp;
1267 nigel 77 }
1268     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1269     }
1270    
1271     /* If there were non-contiguous lines printed above, insert hyphens. */
1272    
1273     if (hyphenpending)
1274     {
1275     fprintf(stdout, "--\n");
1276     hyphenpending = FALSE;
1277     hyphenprinted = TRUE;
1278     }
1279    
1280     /* See if there is a requirement to print some "before" lines for this
1281     match. Again, don't print overlaps. */
1282    
1283     if (before_context > 0)
1284     {
1285     int linecount = 0;
1286     char *p = ptr;
1287    
1288     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1289 nigel 87 linecount < before_context)
1290 nigel 77 {
1291 nigel 87 linecount++;
1292 nigel 93 p = previous_line(p, buffer);
1293 nigel 77 }
1294    
1295     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1296     fprintf(stdout, "--\n");
1297    
1298     while (p < ptr)
1299     {
1300 nigel 93 int ellength;
1301 nigel 77 char *pp = p;
1302     if (printname != NULL) fprintf(stdout, "%s-", printname);
1303     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1304 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1305 ph10 515 FWRITE(p, 1, pp - p, stdout);
1306 nigel 93 p = pp;
1307 nigel 77 }
1308     }
1309    
1310     /* Now print the matching line(s); ensure we set hyphenpending at the end
1311 nigel 85 of the file if any context lines are being output. */
1312 nigel 77
1313 nigel 85 if (after_context > 0 || before_context > 0)
1314     endhyphenpending = TRUE;
1315    
1316 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1317 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1318 nigel 77
1319     /* In multiline mode, we want to print to the end of the line in which
1320     the end of the matched string is found, so we adjust linelength and the
1321 ph10 222 line number appropriately, but only when there actually was a match
1322     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1323     the match will always be before the first newline sequence. */
1324 nigel 77
1325 ph10 587 if (multiline & !invert)
1326 nigel 77 {
1327 ph10 587 char *endmatch = ptr + offsets[1];
1328     t = ptr;
1329     while (t < endmatch)
1330 nigel 93 {
1331 ph10 587 t = end_of_line(t, endptr, &endlinelength);
1332     if (t < endmatch) linenumber++; else break;
1333 nigel 93 }
1334 ph10 587 linelength = t - ptr - endlinelength;
1335 nigel 77 }
1336    
1337 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1338     zeroes are treated as just another data character. */
1339    
1340     /* This extra option, for Jeffrey Friedl's debugging requirements,
1341     replaces the matched string, or a specific captured string if it exists,
1342     with X. When this happens, colouring is ignored. */
1343    
1344     #ifdef JFRIEDL_DEBUG
1345     if (S_arg >= 0 && S_arg < mrc)
1346     {
1347     int first = S_arg * 2;
1348     int last = first + 1;
1349 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1350 nigel 87 fprintf(stdout, "X");
1351 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1352 nigel 87 }
1353     else
1354     #endif
1355    
1356 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1357 ph10 585 matches, but not of course if the line is a non-match. */
1358 ph10 589
1359 ph10 585 if (do_colour && !invert)
1360 nigel 87 {
1361 ph10 589 int plength;
1362 ph10 392 int last_offset = 0;
1363 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1364 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1365 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1366 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1367 ph10 378 for (;;)
1368     {
1369 ph10 632 /*
1370 ph10 392 last_offset += offsets[1];
1371 ph10 378 matchptr += offsets[1];
1372     length -= offsets[1];
1373 ph10 632 */
1374    
1375     startoffset = offsets[1];
1376     last_offset = startoffset;
1377 ph10 588 if (last_offset >= linelength + endlinelength ||
1378 ph10 632 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1379     break;
1380     FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1381 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1382 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1383 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1384     }
1385 ph10 587
1386     /* In multiline mode, we may have already printed the complete line
1387 ph10 589 and its line-ending characters (if they matched the pattern), so there
1388 ph10 587 may be no more to print. */
1389 ph10 589
1390 ph10 587 plength = (linelength + endlinelength) - last_offset;
1391     if (plength > 0)
1392     FWRITE(ptr + last_offset, 1, plength, stdout);
1393 nigel 87 }
1394 ph10 392
1395 ph10 378 /* Not colouring; no need to search for further matches */
1396 ph10 392
1397 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1398 nigel 49 }
1399    
1400 ph10 519 /* End of doing what has to be done for a match. If --line-buffered was
1401     given, flush the output. */
1402 nigel 87
1403 ph10 519 if (line_buffered) fflush(stdout);
1404 nigel 77 rc = 0; /* Had some success */
1405    
1406     /* Remember where the last match happened for after_context. We remember
1407     where we are about to restart, and that line's number. */
1408    
1409 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1410 nigel 77 lastmatchnumber = linenumber + 1;
1411 nigel 49 }
1412 nigel 77
1413 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1414     anything to be printed), we have to move on to the end of the match before
1415     proceeding. */
1416    
1417     if (multiline && invert && match)
1418     {
1419     int ellength;
1420     char *endmatch = ptr + offsets[1];
1421     t = ptr;
1422     while (t < endmatch)
1423     {
1424     t = end_of_line(t, endptr, &ellength);
1425     if (t <= endmatch) linenumber++; else break;
1426     }
1427     endmatch = end_of_line(endmatch, endptr, &ellength);
1428     linelength = endmatch - ptr - ellength;
1429     }
1430    
1431 ph10 286 /* Advance to after the newline and increment the line number. The file
1432 ph10 280 offset to the current line is maintained in filepos. */
1433 nigel 77
1434 nigel 93 ptr += linelength + endlinelength;
1435 ph10 530 filepos += (int)(linelength + endlinelength);
1436 nigel 77 linenumber++;
1437 ph10 535
1438     /* If input is line buffered, and the buffer is not yet full, read another
1439 ph10 519 line and add it into the buffer. */
1440 ph10 535
1441 ph10 519 if (input_line_buffered && bufflength < sizeof(buffer))
1442     {
1443     int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1444     bufflength += add;
1445 ph10 535 endptr += add;
1446     }
1447 nigel 77
1448     /* If we haven't yet reached the end of the file (the buffer is full), and
1449     the current point is in the top 1/3 of the buffer, slide the buffer down by
1450     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1451     about to be lost, print them. */
1452    
1453     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1454     {
1455     if (after_context > 0 &&
1456     lastmatchnumber > 0 &&
1457     lastmatchrestart < buffer + MBUFTHIRD)
1458     {
1459     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1460     lastmatchnumber = 0;
1461     }
1462    
1463     /* Now do the shuffle */
1464    
1465     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1466     ptr -= MBUFTHIRD;
1467 ph10 286
1468     #ifdef SUPPORT_LIBZ
1469     if (frtype == FR_LIBZ)
1470     bufflength = 2*MBUFTHIRD +
1471     gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1472     else
1473     #endif
1474    
1475     #ifdef SUPPORT_LIBBZ2
1476     if (frtype == FR_LIBBZ2)
1477     bufflength = 2*MBUFTHIRD +
1478     BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1479     else
1480     #endif
1481    
1482 ph10 535 bufflength = 2*MBUFTHIRD +
1483     (input_line_buffered?
1484     read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1485 ph10 519 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1486 nigel 77 endptr = buffer + bufflength;
1487    
1488     /* Adjust any last match point */
1489    
1490     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1491     }
1492     } /* Loop through the whole file */
1493    
1494     /* End of file; print final "after" lines if wanted; do_after_lines sets
1495     hyphenpending if it prints something. */
1496    
1497 ph10 565 if (only_matching < 0 && !count_only)
1498 nigel 87 {
1499     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1500     hyphenpending |= endhyphenpending;
1501     }
1502 nigel 77
1503     /* Print the file name if we are looking for those without matches and there
1504     were none. If we found a match, we won't have got this far. */
1505    
1506 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1507 nigel 77 {
1508     fprintf(stdout, "%s\n", printname);
1509     return 0;
1510 nigel 49 }
1511    
1512 nigel 77 /* Print the match count if wanted */
1513    
1514 nigel 49 if (count_only)
1515     {
1516 ph10 420 if (count > 0 || !omit_zero_count)
1517 ph10 461 {
1518     if (printname != NULL && filenames != FN_NONE)
1519 ph10 420 fprintf(stdout, "%s:", printname);
1520     fprintf(stdout, "%d\n", count);
1521 ph10 461 }
1522 nigel 49 }
1523    
1524     return rc;
1525     }
1526    
1527    
1528    
1529     /*************************************************
1530 nigel 53 * Grep a file or recurse into a directory *
1531     *************************************************/
1532    
1533 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1534     recursing; if it's a file, grep it.
1535    
1536     Arguments:
1537     pathname the path to investigate
1538 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1539 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1540    
1541     Returns: 0 if there was at least one match
1542     1 if there were no matches
1543     2 there was some kind of error
1544    
1545     However, file opening failures are suppressed if "silent" is set.
1546     */
1547    
1548 nigel 53 static int
1549 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1550 nigel 53 {
1551     int rc = 1;
1552     int sep;
1553 ph10 286 int frtype;
1554     int pathlen;
1555     void *handle;
1556     FILE *in = NULL; /* Ensure initialized */
1557 nigel 53
1558 ph10 286 #ifdef SUPPORT_LIBZ
1559     gzFile ingz = NULL;
1560     #endif
1561    
1562     #ifdef SUPPORT_LIBBZ2
1563     BZFILE *inbz2 = NULL;
1564     #endif
1565    
1566 nigel 77 /* If the file name is "-" we scan stdin */
1567 nigel 53
1568 nigel 77 if (strcmp(pathname, "-") == 0)
1569 nigel 53 {
1570 ph10 286 return pcregrep(stdin, FR_PLAIN,
1571 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1572 nigel 77 stdin_name : NULL);
1573     }
1574    
1575 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1576 ph10 325 each file and directory within it, subject to any include or exclude patterns
1577     that were set. The scanning code is localized so it can be made
1578     system-specific. */
1579 nigel 87
1580     if ((sep = isdirectory(pathname)) != 0)
1581 nigel 77 {
1582 nigel 87 if (dee_action == dee_SKIP) return 1;
1583     if (dee_action == dee_RECURSE)
1584 nigel 53 {
1585 nigel 87 char buffer[1024];
1586     char *nextfile;
1587     directory_type *dir = opendirectory(pathname);
1588 nigel 53
1589 nigel 87 if (dir == NULL)
1590     {
1591     if (!silent)
1592     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1593     strerror(errno));
1594     return 2;
1595     }
1596 nigel 77
1597 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1598     {
1599 ph10 324 int frc, nflen;
1600 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1601 ph10 530 nflen = (int)(strlen(nextfile));
1602 ph10 345
1603 ph10 325 if (isdirectory(buffer))
1604     {
1605     if (exclude_dir_compiled != NULL &&
1606     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1607     continue;
1608 ph10 345
1609 ph10 325 if (include_dir_compiled != NULL &&
1610     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1611     continue;
1612     }
1613 ph10 345 else
1614     {
1615 ph10 324 if (exclude_compiled != NULL &&
1616     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1617     continue;
1618 ph10 345
1619 ph10 324 if (include_compiled != NULL &&
1620     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1621     continue;
1622 ph10 345 }
1623 nigel 77
1624 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1625     if (frc > 1) rc = frc;
1626     else if (frc == 0 && rc == 1) rc = 0;
1627     }
1628    
1629     closedirectory(dir);
1630     return rc;
1631 nigel 53 }
1632     }
1633    
1634 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1635     been requested. */
1636 nigel 53
1637 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1638    
1639     /* Control reaches here if we have a regular file, or if we have a directory
1640     and recursion or skipping was not requested, or if we have anything else and
1641     skipping was not requested. The scan proceeds. If this is the first and only
1642     argument at top level, we don't show the file name, unless we are only showing
1643     the file name, or the filename was forced (-H). */
1644    
1645 ph10 530 pathlen = (int)(strlen(pathname));
1646 ph10 286
1647     /* Open using zlib if it is supported and the file name ends with .gz. */
1648    
1649     #ifdef SUPPORT_LIBZ
1650     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1651 nigel 53 {
1652 ph10 286 ingz = gzopen(pathname, "rb");
1653     if (ingz == NULL)
1654     {
1655     if (!silent)
1656     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1657     strerror(errno));
1658     return 2;
1659     }
1660     handle = (void *)ingz;
1661     frtype = FR_LIBZ;
1662     }
1663     else
1664     #endif
1665    
1666     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1667    
1668     #ifdef SUPPORT_LIBBZ2
1669     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1670     {
1671     inbz2 = BZ2_bzopen(pathname, "rb");
1672     handle = (void *)inbz2;
1673     frtype = FR_LIBBZ2;
1674     }
1675     else
1676     #endif
1677    
1678     /* Otherwise use plain fopen(). The label is so that we can come back here if
1679     an attempt to read a .bz2 file indicates that it really is a plain file. */
1680    
1681     #ifdef SUPPORT_LIBBZ2
1682     PLAIN_FILE:
1683     #endif
1684     {
1685 ph10 419 in = fopen(pathname, "rb");
1686 ph10 286 handle = (void *)in;
1687     frtype = FR_PLAIN;
1688     }
1689    
1690     /* All the opening methods return errno when they fail. */
1691    
1692     if (handle == NULL)
1693     {
1694 nigel 77 if (!silent)
1695     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1696     strerror(errno));
1697 nigel 53 return 2;
1698     }
1699    
1700 ph10 286 /* Now grep the file */
1701    
1702     rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1703 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1704 nigel 77
1705 ph10 286 /* Close in an appropriate manner. */
1706    
1707     #ifdef SUPPORT_LIBZ
1708     if (frtype == FR_LIBZ)
1709     gzclose(ingz);
1710     else
1711     #endif
1712    
1713     /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1714     read failed. If the error indicates that the file isn't in fact bzipped, try
1715     again as a normal file. */
1716    
1717     #ifdef SUPPORT_LIBBZ2
1718     if (frtype == FR_LIBBZ2)
1719     {
1720     if (rc == 2)
1721     {
1722     int errnum;
1723     const char *err = BZ2_bzerror(inbz2, &errnum);
1724     if (errnum == BZ_DATA_ERROR_MAGIC)
1725     {
1726     BZ2_bzclose(inbz2);
1727     goto PLAIN_FILE;
1728     }
1729     else if (!silent)
1730     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1731     pathname, err);
1732     }
1733     BZ2_bzclose(inbz2);
1734     }
1735     else
1736     #endif
1737    
1738     /* Normal file close */
1739    
1740 nigel 53 fclose(in);
1741 ph10 286
1742     /* Pass back the yield from pcregrep(). */
1743    
1744 nigel 53 return rc;
1745     }
1746    
1747    
1748    
1749    
1750     /*************************************************
1751 nigel 49 * Usage function *
1752     *************************************************/
1753    
1754     static int
1755     usage(int rc)
1756     {
1757 nigel 87 option_item *op;
1758     fprintf(stderr, "Usage: pcregrep [-");
1759     for (op = optionlist; op->one_char != 0; op++)
1760     {
1761     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1762     }
1763     fprintf(stderr, "] [long options] [pattern] [files]\n");
1764 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1765     "options.\n");
1766 nigel 49 return rc;
1767     }
1768    
1769    
1770    
1771    
1772     /*************************************************
1773 nigel 53 * Help function *
1774     *************************************************/
1775    
1776     static void
1777     help(void)
1778     {
1779     option_item *op;
1780    
1781 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1782 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1783 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1784 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1785    
1786     #ifdef SUPPORT_LIBZ
1787     printf("Files whose names end in .gz are read using zlib.\n");
1788     #endif
1789    
1790     #ifdef SUPPORT_LIBBZ2
1791     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1792     #endif
1793    
1794     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1795     printf("Other files and the standard input are read as plain files.\n\n");
1796     #else
1797     printf("All files are read as plain files, without any interpretation.\n\n");
1798     #endif
1799    
1800 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1801     printf("Options:\n");
1802    
1803     for (op = optionlist; op->one_char != 0; op++)
1804     {
1805     int n;
1806     char s[4];
1807 ph10 579
1808 ph10 571 /* Two options were accidentally implemented and documented with underscores
1809     instead of hyphens in their names, something that was not noticed for quite a
1810 ph10 579 few releases. When fixing this, I left the underscored versions in the list
1811     in case people were using them. However, we don't want to display them in the
1812     help data. There are no other options that contain underscores, and we do not
1813     expect ever to implement such options. Therefore, just omit any option that
1814 ph10 571 contains an underscore. */
1815 ph10 579
1816     if (strchr(op->long_name, '_') != NULL) continue;
1817    
1818 nigel 53 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1819 ph10 571 n = 31 - printf(" %s --%s", s, op->long_name);
1820 nigel 53 if (n < 1) n = 1;
1821 ph10 571 printf("%.*s%s\n", n, " ", op->help_text);
1822 nigel 53 }
1823    
1824 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1825     printf("trailing white space is removed and blank lines are ignored.\n");
1826     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1827 nigel 53
1828 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1829 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1830     }
1831    
1832    
1833    
1834    
1835     /*************************************************
1836 nigel 77 * Handle a single-letter, no data option *
1837 nigel 53 *************************************************/
1838    
1839     static int
1840     handle_option(int letter, int options)
1841     {
1842     switch(letter)
1843     {
1844 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1845 ph10 561 case N_HELP: help(); pcregrep_exit(0);
1846 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1847 ph10 535 case N_LBUFFER: line_buffered = TRUE; break;
1848 nigel 53 case 'c': count_only = TRUE; break;
1849 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1850     case 'H': filenames = FN_FORCE; break;
1851     case 'h': filenames = FN_NONE; break;
1852 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1853 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1854 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
1855 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1856 nigel 53 case 'n': number = TRUE; break;
1857 ph10 565 case 'o': only_matching = 0; break;
1858 nigel 77 case 'q': quiet = TRUE; break;
1859 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1860 nigel 53 case 's': silent = TRUE; break;
1861 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1862 nigel 53 case 'v': invert = TRUE; break;
1863 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1864     case 'x': process_options |= PO_LINE_MATCH; break;
1865 nigel 53
1866     case 'V':
1867 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1868 ph10 561 pcregrep_exit(0);
1869 nigel 53 break;
1870    
1871     default:
1872     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1873 ph10 561 pcregrep_exit(usage(2));
1874 nigel 53 }
1875    
1876     return options;
1877     }
1878    
1879    
1880    
1881    
1882     /*************************************************
1883 nigel 87 * Construct printed ordinal *
1884     *************************************************/
1885    
1886     /* This turns a number into "1st", "3rd", etc. */
1887    
1888     static char *
1889     ordin(int n)
1890     {
1891     static char buffer[8];
1892     char *p = buffer;
1893     sprintf(p, "%d", n);
1894     while (*p != 0) p++;
1895     switch (n%10)
1896     {
1897     case 1: strcpy(p, "st"); break;
1898     case 2: strcpy(p, "nd"); break;
1899     case 3: strcpy(p, "rd"); break;
1900     default: strcpy(p, "th"); break;
1901     }
1902     return buffer;
1903     }
1904    
1905    
1906    
1907     /*************************************************
1908     * Compile a single pattern *
1909     *************************************************/
1910    
1911     /* When the -F option has been used, this is called for each substring.
1912     Otherwise it's called for each supplied pattern.
1913    
1914     Arguments:
1915     pattern the pattern string
1916     options the PCRE options
1917     filename the file name, or NULL for a command-line pattern
1918     count 0 if this is the only command line pattern, or
1919     number of the command line pattern, or
1920     linenumber for a pattern from a file
1921    
1922     Returns: TRUE on success, FALSE after an error
1923     */
1924    
1925     static BOOL
1926     compile_single_pattern(char *pattern, int options, char *filename, int count)
1927     {
1928     char buffer[MBUFTHIRD + 16];
1929     const char *error;
1930     int errptr;
1931    
1932     if (pattern_count >= MAX_PATTERN_COUNT)
1933     {
1934     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1935     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1936     return FALSE;
1937     }
1938    
1939     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1940     suffix[process_options]);
1941     pattern_list[pattern_count] =
1942     pcre_compile(buffer, options, &error, &errptr, pcretables);
1943 ph10 142 if (pattern_list[pattern_count] != NULL)
1944 ph10 141 {
1945 ph10 142 pattern_count++;
1946 ph10 141 return TRUE;
1947 ph10 142 }
1948 nigel 87
1949     /* Handle compile errors */
1950    
1951     errptr -= (int)strlen(prefix[process_options]);
1952     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1953    
1954     if (filename == NULL)
1955     {
1956     if (count == 0)
1957     fprintf(stderr, "pcregrep: Error in command-line regex "
1958     "at offset %d: %s\n", errptr, error);
1959     else
1960     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1961     "at offset %d: %s\n", ordin(count), errptr, error);
1962     }
1963     else
1964     {
1965     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1966     "at offset %d: %s\n", count, filename, errptr, error);
1967     }
1968    
1969     return FALSE;
1970     }
1971    
1972    
1973    
1974     /*************************************************
1975     * Compile one supplied pattern *
1976     *************************************************/
1977    
1978     /* When the -F option has been used, each string may be a list of strings,
1979 nigel 91 separated by line breaks. They will be matched literally.
1980 nigel 87
1981     Arguments:
1982     pattern the pattern string
1983     options the PCRE options
1984     filename the file name, or NULL for a command-line pattern
1985     count 0 if this is the only command line pattern, or
1986     number of the command line pattern, or
1987     linenumber for a pattern from a file
1988    
1989     Returns: TRUE on success, FALSE after an error
1990     */
1991    
1992     static BOOL
1993     compile_pattern(char *pattern, int options, char *filename, int count)
1994     {
1995     if ((process_options & PO_FIXED_STRINGS) != 0)
1996     {
1997 nigel 93 char *eop = pattern + strlen(pattern);
1998 nigel 87 char buffer[MBUFTHIRD];
1999     for(;;)
2000     {
2001 nigel 93 int ellength;
2002     char *p = end_of_line(pattern, eop, &ellength);
2003     if (ellength == 0)
2004 nigel 87 return compile_single_pattern(pattern, options, filename, count);
2005 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2006 nigel 93 pattern = p;
2007 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
2008     return FALSE;
2009     }
2010     }
2011     else return compile_single_pattern(pattern, options, filename, count);
2012     }
2013    
2014    
2015    
2016     /*************************************************
2017 nigel 49 * Main program *
2018     *************************************************/
2019    
2020 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2021    
2022 nigel 49 int
2023     main(int argc, char **argv)
2024     {
2025 nigel 53 int i, j;
2026 nigel 49 int rc = 1;
2027 nigel 87 int pcre_options = 0;
2028     int cmd_pattern_count = 0;
2029 ph10 141 int hint_count = 0;
2030 nigel 49 int errptr;
2031 nigel 87 BOOL only_one_at_top;
2032     char *patterns[MAX_PATTERN_COUNT];
2033     const char *locale_from = "--locale";
2034 nigel 49 const char *error;
2035    
2036 nigel 93 /* Set the default line ending value from the default in the PCRE library;
2037     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2038 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
2039 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
2040 ph10 391 rather than escapes such as as '\r'. */
2041 nigel 91
2042     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2043     switch(i)
2044     {
2045 ph10 391 default: newline = (char *)"lf"; break;
2046     case 13: newline = (char *)"cr"; break;
2047     case (13 << 8) | 10: newline = (char *)"crlf"; break;
2048     case -1: newline = (char *)"any"; break;
2049     case -2: newline = (char *)"anycrlf"; break;
2050 nigel 91 }
2051    
2052 nigel 49 /* Process the options */
2053    
2054     for (i = 1; i < argc; i++)
2055     {
2056 nigel 77 option_item *op = NULL;
2057     char *option_data = (char *)""; /* default to keep compiler happy */
2058     BOOL longop;
2059     BOOL longopwasequals = FALSE;
2060    
2061 nigel 49 if (argv[i][0] != '-') break;
2062 nigel 53
2063 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2064 nigel 87 but only if we have previously had -e or -f to define the patterns. */
2065 nigel 63
2066 nigel 77 if (argv[i][1] == 0)
2067     {
2068 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
2069 ph10 561 else pcregrep_exit(usage(2));
2070 nigel 77 }
2071 nigel 63
2072 nigel 77 /* Handle a long name option, or -- to terminate the options */
2073 nigel 53
2074     if (argv[i][1] == '-')
2075 nigel 49 {
2076 nigel 77 char *arg = argv[i] + 2;
2077     char *argequals = strchr(arg, '=');
2078 nigel 53
2079 nigel 77 if (*arg == 0) /* -- terminates options */
2080 nigel 49 {
2081 nigel 77 i++;
2082     break; /* out of the options-handling loop */
2083 nigel 53 }
2084 nigel 49
2085 nigel 77 longop = TRUE;
2086    
2087     /* Some long options have data that follows after =, for example file=name.
2088     Some options have variations in the long name spelling: specifically, we
2089     allow "regexp" because GNU grep allows it, though I personally go along
2090 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2091 ph10 422 These options are entered in the table as "regex(p)". Options can be in
2092     both these categories. */
2093 nigel 77
2094 nigel 53 for (op = optionlist; op->one_char != 0; op++)
2095     {
2096 nigel 77 char *opbra = strchr(op->long_name, '(');
2097     char *equals = strchr(op->long_name, '=');
2098 ph10 461
2099 ph10 422 /* Handle options with only one spelling of the name */
2100 ph10 461
2101 ph10 422 if (opbra == NULL) /* Does not contain '(' */
2102 nigel 53 {
2103 nigel 77 if (equals == NULL) /* Not thing=data case */
2104     {
2105     if (strcmp(arg, op->long_name) == 0) break;
2106     }
2107     else /* Special case xxx=data */
2108     {
2109 ph10 530 int oplen = (int)(equals - op->long_name);
2110 ph10 535 int arglen = (argequals == NULL)?
2111 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2112 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2113     {
2114     option_data = arg + arglen;
2115     if (*option_data == '=')
2116     {
2117     option_data++;
2118     longopwasequals = TRUE;
2119     }
2120     break;
2121     }
2122     }
2123 nigel 53 }
2124 ph10 461
2125 ph10 422 /* Handle options with an alternate spelling of the name */
2126 ph10 461
2127     else
2128 nigel 77 {
2129     char buff1[24];
2130     char buff2[24];
2131 ph10 461
2132 ph10 530 int baselen = (int)(opbra - op->long_name);
2133     int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2134 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
2135 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2136 ph10 461
2137 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
2138 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2139 ph10 461
2140     if (strncmp(arg, buff1, arglen) == 0 ||
2141 ph10 422 strncmp(arg, buff2, arglen) == 0)
2142     {
2143     if (equals != NULL && argequals != NULL)
2144     {
2145 ph10 461 option_data = argequals;
2146 ph10 422 if (*option_data == '=')
2147     {
2148 ph10 461 option_data++;
2149 ph10 422 longopwasequals = TRUE;
2150 ph10 461 }
2151     }
2152 nigel 77 break;
2153 ph10 461 }
2154 nigel 77 }
2155 nigel 53 }
2156 nigel 77
2157 nigel 53 if (op->one_char == 0)
2158     {
2159     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2160 ph10 561 pcregrep_exit(usage(2));
2161 nigel 53 }
2162     }
2163 nigel 49
2164 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2165     are not in the right form for putting in the option table because they use
2166     only one hyphen, yet are more than one character long. By putting them
2167     separately here, they will not get displayed as part of the help() output,
2168     but I don't think Jeffrey will care about that. */
2169    
2170     #ifdef JFRIEDL_DEBUG
2171     else if (strcmp(argv[i], "-pre") == 0) {
2172     jfriedl_prefix = argv[++i];
2173     continue;
2174     } else if (strcmp(argv[i], "-post") == 0) {
2175     jfriedl_postfix = argv[++i];
2176     continue;
2177     } else if (strcmp(argv[i], "-XT") == 0) {
2178     sscanf(argv[++i], "%d", &jfriedl_XT);
2179     continue;
2180     } else if (strcmp(argv[i], "-XR") == 0) {
2181     sscanf(argv[++i], "%d", &jfriedl_XR);
2182     continue;
2183     }
2184     #endif
2185    
2186    
2187 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2188     continue till we hit the last one or one that needs data. */
2189 nigel 53
2190     else
2191     {
2192     char *s = argv[i] + 1;
2193 nigel 77 longop = FALSE;
2194 nigel 53 while (*s != 0)
2195     {
2196 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2197 ph10 579 {
2198     if (*s == op->one_char) break;
2199 ph10 565 }
2200 nigel 77 if (op->one_char == 0)
2201 nigel 53 {
2202 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2203     *s, argv[i]);
2204 ph10 561 pcregrep_exit(usage(2));
2205 nigel 77 }
2206 ph10 579
2207 ph10 565 /* Check for a single-character option that has data: OP_OP_NUMBER
2208 ph10 579 is used for one that either has a numerical number or defaults, i.e. the
2209 ph10 565 data is optional. If a digit follows, there is data; if not, carry on
2210     with other single-character options in the same string. */
2211 ph10 579
2212 ph10 565 option_data = s+1;
2213     if (op->type == OP_OP_NUMBER)
2214 ph10 579 {
2215     if (isdigit((unsigned char)s[1])) break;
2216 nigel 53 }
2217 ph10 565 else /* Check for end or a dataless option */
2218 ph10 579 {
2219 ph10 565 if (op->type != OP_NODATA || s[1] == 0) break;
2220 ph10 579 }
2221    
2222     /* Handle a single-character option with no data, then loop for the
2223 ph10 565 next character in the string. */
2224    
2225 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2226 nigel 49 }
2227     }
2228 nigel 77
2229 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2230     is NO_DATA, it means that there is no data, and the option might set
2231     something in the PCRE options. */
2232 nigel 77
2233     if (op->type == OP_NODATA)
2234     {
2235 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2236     continue;
2237     }
2238    
2239     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2240     either has a value or defaults to something. It cannot have data in a
2241 ph10 579 separate item. At the moment, the only such options are "colo(u)r",
2242 ph10 565 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2243 nigel 87
2244     if (*option_data == 0 &&
2245     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2246     {
2247     switch (op->one_char)
2248 nigel 77 {
2249 nigel 87 case N_COLOUR:
2250     colour_option = (char *)"auto";
2251     break;
2252 ph10 579
2253 ph10 565 case 'o':
2254     only_matching = 0;
2255 ph10 579 break;
2256    
2257 nigel 87 #ifdef JFRIEDL_DEBUG
2258     case 'S':
2259     S_arg = 0;
2260     break;
2261     #endif
2262 nigel 77 }
2263 nigel 87 continue;
2264     }
2265 nigel 77
2266 nigel 87 /* Otherwise, find the data string for the option. */
2267    
2268     if (*option_data == 0)
2269     {
2270     if (i >= argc - 1 || longopwasequals)
2271 nigel 77 {
2272 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2273 ph10 561 pcregrep_exit(usage(2));
2274 nigel 87 }
2275     option_data = argv[++i];
2276     }
2277    
2278     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2279     multiple times to create a list of patterns. */
2280    
2281     if (op->type == OP_PATLIST)
2282     {
2283     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2284     {
2285     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2286     MAX_PATTERN_COUNT);
2287     return 2;
2288     }
2289     patterns[cmd_pattern_count++] = option_data;
2290     }
2291    
2292     /* Otherwise, deal with single string or numeric data values. */
2293    
2294 ph10 584 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2295     op->type != OP_OP_NUMBER)
2296 nigel 87 {
2297     *((char **)op->dataptr) = option_data;
2298     }
2299 ph10 558
2300     /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2301     only for unpicking arguments, so just keep it simple. */
2302    
2303 nigel 87 else
2304     {
2305 ph10 561 unsigned long int n = 0;
2306 ph10 558 char *endptr = option_data;
2307     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2308     while (isdigit((unsigned char)(*endptr)))
2309     n = n * 10 + (int)(*endptr++ - '0');
2310 nigel 87 if (*endptr != 0)
2311     {
2312     if (longop)
2313 nigel 77 {
2314 nigel 87 char *equals = strchr(op->long_name, '=');
2315     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2316 ph10 530 (int)(equals - op->long_name);
2317 nigel 87 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2318     option_data, nlen, op->long_name);
2319 nigel 77 }
2320 nigel 87 else
2321     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2322     option_data, op->one_char);
2323 ph10 561 pcregrep_exit(usage(2));
2324 nigel 77 }
2325 ph10 584 if (op->type == OP_LONGNUMBER)
2326     *((unsigned long int *)op->dataptr) = n;
2327     else
2328     *((int *)op->dataptr) = n;
2329 nigel 77 }
2330 nigel 49 }
2331    
2332 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2333     for -A and -B. */
2334    
2335     if (both_context > 0)
2336     {
2337     if (after_context == 0) after_context = both_context;
2338     if (before_context == 0) before_context = both_context;
2339     }
2340 ph10 286
2341     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2342 ph10 565 However, the latter two set only_matching. */
2343 nigel 77
2344 ph10 565 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2345 ph10 286 (file_offsets && line_offsets))
2346 ph10 280 {
2347     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2348     "and/or --line-offsets\n");
2349 ph10 561 pcregrep_exit(usage(2));
2350 ph10 280 }
2351    
2352 ph10 565 if (file_offsets || line_offsets) only_matching = 0;
2353 ph10 286
2354 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2355     LC_ALL environment variable is set, and if so, use it. */
2356 nigel 49
2357 nigel 87 if (locale == NULL)
2358 nigel 53 {
2359 nigel 87 locale = getenv("LC_ALL");
2360     locale_from = "LCC_ALL";
2361 nigel 53 }
2362 nigel 49
2363 nigel 87 if (locale == NULL)
2364     {
2365     locale = getenv("LC_CTYPE");
2366     locale_from = "LC_CTYPE";
2367     }
2368 nigel 49
2369 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2370     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2371    
2372     if (locale != NULL)
2373 nigel 49 {
2374 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2375 nigel 53 {
2376 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2377     locale, locale_from);
2378 nigel 53 return 2;
2379     }
2380 nigel 87 pcretables = pcre_maketables();
2381     }
2382 nigel 77
2383 nigel 87 /* Sort out colouring */
2384    
2385     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2386     {
2387     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2388     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2389     else
2390 nigel 53 {
2391 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2392     colour_option);
2393     return 2;
2394 nigel 77 }
2395 nigel 87 if (do_colour)
2396 nigel 77 {
2397 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2398     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2399     if (cs != NULL) colour_string = cs;
2400 nigel 77 }
2401 nigel 87 }
2402 ph10 535
2403 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2404    
2405     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2406     {
2407     pcre_options |= PCRE_NEWLINE_CR;
2408 nigel 93 endlinetype = EL_CR;
2409 nigel 91 }
2410     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2411     {
2412     pcre_options |= PCRE_NEWLINE_LF;
2413 nigel 93 endlinetype = EL_LF;
2414 nigel 91 }
2415     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2416     {
2417     pcre_options |= PCRE_NEWLINE_CRLF;
2418 nigel 93 endlinetype = EL_CRLF;
2419 nigel 91 }
2420 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2421     {
2422     pcre_options |= PCRE_NEWLINE_ANY;
2423     endlinetype = EL_ANY;
2424     }
2425 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2426     {
2427     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2428     endlinetype = EL_ANYCRLF;
2429     }
2430 nigel 91 else
2431     {
2432     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2433     return 2;
2434     }
2435    
2436 nigel 87 /* Interpret the text values for -d and -D */
2437    
2438     if (dee_option != NULL)
2439     {
2440     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2441     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2442     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2443     else
2444 nigel 77 {
2445 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2446     return 2;
2447 nigel 53 }
2448 nigel 49 }
2449    
2450 nigel 87 if (DEE_option != NULL)
2451     {
2452     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2453     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2454     else
2455     {
2456     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2457     return 2;
2458     }
2459     }
2460 nigel 49
2461 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2462 nigel 87
2463     #ifdef JFRIEDL_DEBUG
2464     if (S_arg > 9)
2465 nigel 49 {
2466 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2467     return 2;
2468     }
2469 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2470     {
2471     if (jfriedl_XT == 0) jfriedl_XT = 1;
2472     if (jfriedl_XR == 0) jfriedl_XR = 1;
2473     }
2474 nigel 87 #endif
2475 nigel 77
2476 nigel 87 /* Get memory to store the pattern and hints lists. */
2477    
2478     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2479     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2480    
2481     if (pattern_list == NULL || hints_list == NULL)
2482     {
2483     fprintf(stderr, "pcregrep: malloc failed\n");
2484 ph10 123 goto EXIT2;
2485 nigel 87 }
2486    
2487     /* If no patterns were provided by -e, and there is no file provided by -f,
2488     the first argument is the one and only pattern, and it must exist. */
2489    
2490     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2491     {
2492 nigel 63 if (i >= argc) return usage(2);
2493 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2494     }
2495 nigel 77
2496 nigel 87 /* Compile the patterns that were provided on the command line, either by
2497     multiple uses of -e or as a single unkeyed pattern. */
2498    
2499     for (j = 0; j < cmd_pattern_count; j++)
2500     {
2501     if (!compile_pattern(patterns[j], pcre_options, NULL,
2502     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2503 ph10 123 goto EXIT2;
2504 nigel 87 }
2505    
2506     /* Compile the regular expressions that are provided in a file. */
2507    
2508     if (pattern_filename != NULL)
2509     {
2510     int linenumber = 0;
2511     FILE *f;
2512     char *filename;
2513     char buffer[MBUFTHIRD];
2514    
2515     if (strcmp(pattern_filename, "-") == 0)
2516 nigel 77 {
2517 nigel 87 f = stdin;
2518     filename = stdin_name;
2519 nigel 77 }
2520 nigel 87 else
2521 nigel 77 {
2522 nigel 87 f = fopen(pattern_filename, "r");
2523     if (f == NULL)
2524     {
2525     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2526     strerror(errno));
2527 ph10 123 goto EXIT2;
2528 nigel 87 }
2529     filename = pattern_filename;
2530 nigel 77 }
2531    
2532 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2533 nigel 53 {
2534 nigel 87 char *s = buffer + (int)strlen(buffer);
2535     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2536     *s = 0;
2537     linenumber++;
2538     if (buffer[0] == 0) continue; /* Skip blank lines */
2539     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2540 ph10 121 goto EXIT2;
2541 nigel 53 }
2542 nigel 87
2543     if (f != stdin) fclose(f);
2544 nigel 49 }
2545    
2546 nigel 77 /* Study the regular expressions, as we will be running them many times */
2547 nigel 53
2548     for (j = 0; j < pattern_count; j++)
2549     {
2550     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2551     if (error != NULL)
2552     {
2553     char s[16];
2554     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2555     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2556 ph10 121 goto EXIT2;
2557 nigel 53 }
2558 ph10 142 hint_count++;
2559 nigel 53 }
2560 ph10 579
2561 ph10 561 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2562     pcre_extra block for each pattern. */
2563 nigel 53
2564 ph10 561 if (match_limit > 0 || match_limit_recursion > 0)
2565     {
2566     for (j = 0; j < pattern_count; j++)
2567     {
2568     if (hints_list[j] == NULL)
2569     {
2570     hints_list[j] = malloc(sizeof(pcre_extra));
2571 ph10 579 if (hints_list[j] == NULL)
2572 ph10 561 {
2573     fprintf(stderr, "pcregrep: malloc failed\n");
2574     pcregrep_exit(2);
2575     }
2576     }
2577     if (match_limit > 0)
2578 ph10 579 {
2579 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2580     hints_list[j]->match_limit = match_limit;
2581 ph10 579 }
2582 ph10 561 if (match_limit_recursion > 0)
2583 ph10 579 {
2584 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2585     hints_list[j]->match_limit_recursion = match_limit_recursion;
2586 ph10 579 }
2587 ph10 561 }
2588 ph10 579 }
2589 ph10 561
2590 nigel 77 /* If there are include or exclude patterns, compile them. */
2591    
2592     if (exclude_pattern != NULL)
2593     {
2594 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2595     pcretables);
2596 nigel 77 if (exclude_compiled == NULL)
2597     {
2598     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2599     errptr, error);
2600 ph10 121 goto EXIT2;
2601 nigel 77 }
2602     }
2603    
2604     if (include_pattern != NULL)
2605     {
2606 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2607     pcretables);
2608 nigel 77 if (include_compiled == NULL)
2609     {
2610     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2611     errptr, error);
2612 ph10 121 goto EXIT2;
2613 nigel 77 }
2614     }
2615    
2616 ph10 325 if (exclude_dir_pattern != NULL)
2617     {
2618     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2619     pcretables);
2620     if (exclude_dir_compiled == NULL)
2621     {
2622     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2623     errptr, error);
2624     goto EXIT2;
2625     }
2626     }
2627    
2628     if (include_dir_pattern != NULL)
2629     {
2630     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2631     pcretables);
2632     if (include_dir_compiled == NULL)
2633     {
2634     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2635     errptr, error);
2636     goto EXIT2;
2637     }
2638     }
2639    
2640 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2641 nigel 49
2642 nigel 87 if (i >= argc)
2643 ph10 121 {
2644 ph10 286 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2645 ph10 121 goto EXIT;
2646 ph10 123 }
2647 nigel 49
2648 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2649     Pass in the fact that there is only one argument at top level - this suppresses
2650 nigel 87 the file name if the argument is not a directory and filenames are not
2651     otherwise forced. */
2652 nigel 49
2653 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2654 nigel 49
2655     for (; i < argc; i++)
2656     {
2657 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2658     only_one_at_top);
2659 nigel 77 if (frc > 1) rc = frc;
2660     else if (frc == 0 && rc == 1) rc = 0;
2661 nigel 49 }
2662    
2663 ph10 121 EXIT:
2664     if (pattern_list != NULL)
2665     {
2666 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2667 ph10 121 free(pattern_list);
2668 ph10 123 }
2669 ph10 121 if (hints_list != NULL)
2670     {
2671 ph10 579 for (i = 0; i < hint_count; i++)
2672 ph10 561 {
2673     if (hints_list[i] != NULL) free(hints_list[i]);
2674 ph10 579 }
2675 ph10 121 free(hints_list);
2676 ph10 123 }
2677 ph10 561 pcregrep_exit(rc);
2678 ph10 121
2679     EXIT2:
2680     rc = 2;
2681     goto EXIT;
2682 nigel 49 }
2683    
2684 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12