/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 654 - (hide annotations) (download)
Tue Aug 2 11:00:40 2011 UTC (21 months, 3 weeks ago) by ph10
File MIME type: text/plain
File size: 78780 byte(s)
Documentation and general text tidies in preparation for test release.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 584 Copyright (c) 1997-2011 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 ph10 378 #define OFFSET_SIZE 99
75 nigel 49
76 nigel 77 #if BUFSIZ > 8192
77 ph10 644 #define PATBUFSIZE BUFSIZ
78 nigel 77 #else
79 ph10 644 #define PATBUFSIZE 8192
80 nigel 77 #endif
81 nigel 49
82 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
83     output. The order is important; it is assumed that a file name is wanted for
84     all values greater than FN_DEFAULT. */
85 nigel 77
86 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87 nigel 87
88 ph10 286 /* File reading styles */
89    
90     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92 nigel 87 /* Actions for the -d and -D options */
93    
94     enum { dee_READ, dee_SKIP, dee_RECURSE };
95     enum { DEE_READ, DEE_SKIP };
96    
97     /* Actions for special processing options (flag bits) */
98    
99     #define PO_WORD_MATCH 0x0001
100     #define PO_LINE_MATCH 0x0002
101     #define PO_FIXED_STRINGS 0x0004
102    
103 nigel 93 /* Line ending types */
104 nigel 87
105 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 nigel 87
107 ph10 535 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108     environments), a warning is issued if the value of fwrite() is ignored.
109     Unfortunately, casting to (void) does not suppress the warning. To get round
110     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 ph10 515 apply to fprintf(). */
112 nigel 93
113 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114 nigel 93
115 ph10 515
116    
117 nigel 49 /*************************************************
118     * Global variables *
119     *************************************************/
120    
121 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
122     regular code. */
123    
124     #ifdef JFRIEDL_DEBUG
125     static int S_arg = -1;
126 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128     static const char *jfriedl_prefix = "";
129     static const char *jfriedl_postfix = "";
130 nigel 87 #endif
131    
132 nigel 93 static int endlinetype;
133 nigel 91
134 nigel 87 static char *colour_string = (char *)"1;31";
135     static char *colour_option = NULL;
136     static char *dee_option = NULL;
137     static char *DEE_option = NULL;
138 ph10 644 static char *main_buffer = NULL;
139 nigel 91 static char *newline = NULL;
140 nigel 53 static char *pattern_filename = NULL;
141 nigel 77 static char *stdin_name = (char *)"(standard input)";
142 nigel 87 static char *locale = NULL;
143    
144     static const unsigned char *pcretables = NULL;
145    
146 nigel 53 static int pattern_count = 0;
147 ph10 121 static pcre **pattern_list = NULL;
148     static pcre_extra **hints_list = NULL;
149 nigel 49
150 nigel 77 static char *include_pattern = NULL;
151     static char *exclude_pattern = NULL;
152 ph10 325 static char *include_dir_pattern = NULL;
153     static char *exclude_dir_pattern = NULL;
154 nigel 77
155     static pcre *include_compiled = NULL;
156     static pcre *exclude_compiled = NULL;
157 ph10 325 static pcre *include_dir_compiled = NULL;
158     static pcre *exclude_dir_compiled = NULL;
159 nigel 77
160     static int after_context = 0;
161     static int before_context = 0;
162     static int both_context = 0;
163 ph10 644 static int bufthird = PCREGREP_BUFSIZE;
164     static int bufsize = 3*PCREGREP_BUFSIZE;
165 nigel 87 static int dee_action = dee_READ;
166     static int DEE_action = DEE_READ;
167     static int error_count = 0;
168     static int filenames = FN_DEFAULT;
169 ph10 565 static int only_matching = -1;
170 nigel 87 static int process_options = 0;
171 nigel 77
172 ph10 561 static unsigned long int match_limit = 0;
173     static unsigned long int match_limit_recursion = 0;
174    
175 nigel 49 static BOOL count_only = FALSE;
176 nigel 87 static BOOL do_colour = FALSE;
177 ph10 280 static BOOL file_offsets = FALSE;
178 nigel 77 static BOOL hyphenpending = FALSE;
179 nigel 49 static BOOL invert = FALSE;
180 ph10 519 static BOOL line_buffered = FALSE;
181 ph10 280 static BOOL line_offsets = FALSE;
182 nigel 77 static BOOL multiline = FALSE;
183 nigel 49 static BOOL number = FALSE;
184 ph10 420 static BOOL omit_zero_count = FALSE;
185 ph10 561 static BOOL resource_error = FALSE;
186 nigel 77 static BOOL quiet = FALSE;
187 nigel 49 static BOOL silent = FALSE;
188 nigel 93 static BOOL utf8 = FALSE;
189 nigel 49
190 nigel 53 /* Structure for options and list of them */
191 nigel 49
192 ph10 584 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
193     OP_OP_NUMBER, OP_PATLIST };
194 nigel 77
195 nigel 53 typedef struct option_item {
196 nigel 77 int type;
197 nigel 53 int one_char;
198 nigel 77 void *dataptr;
199 nigel 67 const char *long_name;
200     const char *help_text;
201 nigel 53 } option_item;
202 nigel 49
203 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
204     used to identify them. */
205    
206 ph10 325 #define N_COLOUR (-1)
207     #define N_EXCLUDE (-2)
208     #define N_EXCLUDE_DIR (-3)
209     #define N_HELP (-4)
210     #define N_INCLUDE (-5)
211     #define N_INCLUDE_DIR (-6)
212     #define N_LABEL (-7)
213     #define N_LOCALE (-8)
214     #define N_NULL (-9)
215     #define N_LOFFSETS (-10)
216     #define N_FOFFSETS (-11)
217 ph10 519 #define N_LBUFFER (-12)
218 ph10 561 #define N_M_LIMIT (-13)
219     #define N_M_LIMIT_REC (-14)
220 ph10 644 #define N_BUFSIZE (-15)
221 nigel 87
222 nigel 53 static option_item optionlist[] = {
223 ph10 584 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
224     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
225     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
226     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
227 ph10 644 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
228 ph10 584 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
229     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
230     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
231     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
232     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
233     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
234     { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
235     { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
236     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
237     { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
238     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
239     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
240     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
241     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
242     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
243     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
244     { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
245     { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
246     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
247     { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
248     { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
249     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
250     { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
251     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
252     { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
253     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
254     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
255     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
256     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
257     { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
258     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
259 ph10 571
260     /* These two were accidentally implemented with underscores instead of
261     hyphens in the option names. As this was not discovered for several releases,
262     the incorrect versions are left in the table for compatibility. However, the
263     --help function misses out any option that has an underscore in its name. */
264 ph10 579
265 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
266     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
267 ph10 571
268 nigel 87 #ifdef JFRIEDL_DEBUG
269     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
270     #endif
271     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
272     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
273     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
274     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
275     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
276     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
277     { OP_NODATA, 0, NULL, NULL, NULL }
278 nigel 53 };
279    
280 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
281     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
282     that the combination of -w and -x has the same effect as -x on its own, so we
283     can treat them as the same. */
284 nigel 53
285 nigel 87 static const char *prefix[] = {
286     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
287    
288     static const char *suffix[] = {
289     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
290    
291 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
292 nigel 87
293 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
294 nigel 87
295 nigel 93 const char utf8_table4[] = {
296     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
297     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
298     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
299     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
300    
301    
302    
303 nigel 53 /*************************************************
304 ph10 586 * Exit from the program *
305     *************************************************/
306    
307     /* If there has been a resource error, give a suitable message.
308    
309     Argument: the return code
310     Returns: does not return
311     */
312    
313     static void
314     pcregrep_exit(int rc)
315     {
316     if (resource_error)
317     {
318     fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
319     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
320     fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
321     }
322    
323     exit(rc);
324     }
325    
326    
327     /*************************************************
328 nigel 87 * OS-specific functions *
329 nigel 53 *************************************************/
330    
331     /* These functions are defined so that they can be made system specific,
332 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
333 nigel 53
334    
335     /************* Directory scanning in Unix ***********/
336    
337 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
338 nigel 53 #include <sys/types.h>
339     #include <sys/stat.h>
340     #include <dirent.h>
341    
342     typedef DIR directory_type;
343    
344 nigel 67 static int
345 nigel 53 isdirectory(char *filename)
346     {
347     struct stat statbuf;
348     if (stat(filename, &statbuf) < 0)
349     return 0; /* In the expectation that opening as a file will fail */
350     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
351     }
352    
353 nigel 67 static directory_type *
354 nigel 53 opendirectory(char *filename)
355     {
356     return opendir(filename);
357     }
358    
359 nigel 67 static char *
360 nigel 53 readdirectory(directory_type *dir)
361     {
362     for (;;)
363     {
364     struct dirent *dent = readdir(dir);
365     if (dent == NULL) return NULL;
366     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
367     return dent->d_name;
368     }
369 ph10 151 /* Control never reaches here */
370 nigel 53 }
371    
372 nigel 67 static void
373 nigel 53 closedirectory(directory_type *dir)
374     {
375     closedir(dir);
376     }
377    
378    
379 nigel 87 /************* Test for regular file in Unix **********/
380    
381     static int
382     isregfile(char *filename)
383     {
384     struct stat statbuf;
385     if (stat(filename, &statbuf) < 0)
386     return 1; /* In the expectation that opening as a file will fail */
387     return (statbuf.st_mode & S_IFMT) == S_IFREG;
388     }
389    
390    
391 ph10 519 /************* Test for a terminal in Unix **********/
392 nigel 87
393     static BOOL
394     is_stdout_tty(void)
395     {
396     return isatty(fileno(stdout));
397     }
398    
399 ph10 519 static BOOL
400     is_file_tty(FILE *f)
401     {
402     return isatty(fileno(f));
403     }
404 nigel 87
405 ph10 519
406 nigel 63 /************* Directory scanning in Win32 ***********/
407 nigel 53
408 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
409 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
410 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
411     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
412 ph10 558 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
413     undefined when it is indeed undefined. */
414 nigel 53
415 ph10 558 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
416 nigel 63
417     #ifndef STRICT
418     # define STRICT
419     #endif
420     #ifndef WIN32_LEAN_AND_MEAN
421     # define WIN32_LEAN_AND_MEAN
422     #endif
423 ph10 283
424     #include <windows.h>
425    
426 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
427     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
428     #endif
429    
430 nigel 63 typedef struct directory_type
431     {
432     HANDLE handle;
433     BOOL first;
434     WIN32_FIND_DATA data;
435     } directory_type;
436    
437     int
438     isdirectory(char *filename)
439     {
440     DWORD attr = GetFileAttributes(filename);
441     if (attr == INVALID_FILE_ATTRIBUTES)
442     return 0;
443     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
444     }
445    
446     directory_type *
447     opendirectory(char *filename)
448     {
449     size_t len;
450     char *pattern;
451     directory_type *dir;
452     DWORD err;
453     len = strlen(filename);
454     pattern = (char *) malloc(len + 3);
455     dir = (directory_type *) malloc(sizeof(*dir));
456     if ((pattern == NULL) || (dir == NULL))
457     {
458     fprintf(stderr, "pcregrep: malloc failed\n");
459 ph10 561 pcregrep_exit(2);
460 nigel 63 }
461     memcpy(pattern, filename, len);
462     memcpy(&(pattern[len]), "\\*", 3);
463     dir->handle = FindFirstFile(pattern, &(dir->data));
464     if (dir->handle != INVALID_HANDLE_VALUE)
465     {
466     free(pattern);
467     dir->first = TRUE;
468     return dir;
469     }
470     err = GetLastError();
471     free(pattern);
472     free(dir);
473     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
474     return NULL;
475     }
476    
477     char *
478     readdirectory(directory_type *dir)
479     {
480     for (;;)
481     {
482     if (!dir->first)
483     {
484     if (!FindNextFile(dir->handle, &(dir->data)))
485     return NULL;
486     }
487     else
488     {
489     dir->first = FALSE;
490     }
491     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
492     return dir->data.cFileName;
493     }
494     #ifndef _MSC_VER
495     return NULL; /* Keep compiler happy; never executed */
496     #endif
497     }
498    
499     void
500     closedirectory(directory_type *dir)
501     {
502     FindClose(dir->handle);
503     free(dir);
504     }
505    
506    
507 nigel 87 /************* Test for regular file in Win32 **********/
508    
509     /* I don't know how to do this, or if it can be done; assume all paths are
510     regular if they are not directories. */
511    
512     int isregfile(char *filename)
513     {
514 ph10 283 return !isdirectory(filename);
515 nigel 87 }
516    
517    
518 ph10 519 /************* Test for a terminal in Win32 **********/
519 nigel 87
520     /* I don't know how to do this; assume never */
521    
522     static BOOL
523     is_stdout_tty(void)
524     {
525 ph10 283 return FALSE;
526 nigel 87 }
527    
528 ph10 519 static BOOL
529     is_file_tty(FILE *f)
530     {
531     return FALSE;
532     }
533 nigel 87
534 ph10 519
535 nigel 53 /************* Directory scanning when we can't do it ***********/
536    
537     /* The type is void, and apart from isdirectory(), the functions do nothing. */
538    
539 nigel 63 #else
540    
541 nigel 53 typedef void directory_type;
542    
543 nigel 87 int isdirectory(char *filename) { return 0; }
544 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
545     char *readdirectory(directory_type *dir) { return (char*)0;}
546 nigel 53 void closedirectory(directory_type *dir) {}
547    
548 nigel 87
549     /************* Test for regular when we can't do it **********/
550    
551     /* Assume all files are regular. */
552    
553     int isregfile(char *filename) { return 1; }
554    
555    
556 ph10 519 /************* Test for a terminal when we can't do it **********/
557 nigel 87
558     static BOOL
559     is_stdout_tty(void)
560     {
561     return FALSE;
562     }
563    
564 ph10 519 static BOOL
565     is_file_tty(FILE *f)
566     {
567     return FALSE;
568     }
569 nigel 87
570 nigel 53 #endif
571    
572    
573    
574 ph10 137 #ifndef HAVE_STRERROR
575 nigel 49 /*************************************************
576     * Provide strerror() for non-ANSI libraries *
577     *************************************************/
578    
579     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
580     in their libraries, but can provide the same facility by this simple
581     alternative function. */
582    
583     extern int sys_nerr;
584     extern char *sys_errlist[];
585    
586     char *
587     strerror(int n)
588     {
589     if (n < 0 || n >= sys_nerr) return "unknown error number";
590     return sys_errlist[n];
591     }
592     #endif /* HAVE_STRERROR */
593    
594    
595    
596     /*************************************************
597 ph10 519 * Read one line of input *
598     *************************************************/
599    
600 ph10 535 /* Normally, input is read using fread() into a large buffer, so many lines may
601     be read at once. However, doing this for tty input means that no output appears
602 ph10 519 until a lot of input has been typed. Instead, tty input is handled line by
603     line. We cannot use fgets() for this, because it does not stop at a binary
604 ph10 535 zero, and therefore there is no way of telling how many characters it has read,
605 ph10 519 because there may be binary zeros embedded in the data.
606    
607     Arguments:
608     buffer the buffer to read into
609     length the maximum number of characters to read
610     f the file
611 ph10 535
612 ph10 519 Returns: the number of characters read, zero at end of file
613 ph10 535 */
614 ph10 519
615     static int
616     read_one_line(char *buffer, int length, FILE *f)
617     {
618     int c;
619     int yield = 0;
620     while ((c = fgetc(f)) != EOF)
621     {
622     buffer[yield++] = c;
623 ph10 535 if (c == '\n' || yield >= length) break;
624     }
625     return yield;
626 ph10 519 }
627    
628    
629    
630     /*************************************************
631 nigel 93 * Find end of line *
632     *************************************************/
633    
634     /* The length of the endline sequence that is found is set via lenptr. This may
635     be zero at the very end of the file if there is no line-ending sequence there.
636    
637     Arguments:
638     p current position in line
639     endptr end of available data
640     lenptr where to put the length of the eol sequence
641    
642 ph10 654 Returns: pointer after the last byte of the line,
643 ph10 644 including the newline byte(s)
644 nigel 93 */
645    
646     static char *
647     end_of_line(char *p, char *endptr, int *lenptr)
648     {
649     switch(endlinetype)
650     {
651     default: /* Just in case */
652     case EL_LF:
653     while (p < endptr && *p != '\n') p++;
654     if (p < endptr)
655     {
656     *lenptr = 1;
657     return p + 1;
658     }
659     *lenptr = 0;
660     return endptr;
661    
662     case EL_CR:
663     while (p < endptr && *p != '\r') p++;
664     if (p < endptr)
665     {
666     *lenptr = 1;
667     return p + 1;
668     }
669     *lenptr = 0;
670     return endptr;
671    
672     case EL_CRLF:
673     for (;;)
674     {
675     while (p < endptr && *p != '\r') p++;
676     if (++p >= endptr)
677     {
678     *lenptr = 0;
679     return endptr;
680     }
681     if (*p == '\n')
682     {
683     *lenptr = 2;
684     return p + 1;
685     }
686     }
687     break;
688    
689 ph10 149 case EL_ANYCRLF:
690     while (p < endptr)
691     {
692     int extra = 0;
693     register int c = *((unsigned char *)p);
694    
695     if (utf8 && c >= 0xc0)
696     {
697     int gcii, gcss;
698     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
699     gcss = 6*extra;
700     c = (c & utf8_table3[extra]) << gcss;
701     for (gcii = 1; gcii <= extra; gcii++)
702     {
703     gcss -= 6;
704     c |= (p[gcii] & 0x3f) << gcss;
705     }
706     }
707    
708     p += 1 + extra;
709    
710     switch (c)
711     {
712     case 0x0a: /* LF */
713     *lenptr = 1;
714     return p;
715    
716     case 0x0d: /* CR */
717     if (p < endptr && *p == 0x0a)
718     {
719     *lenptr = 2;
720     p++;
721     }
722     else *lenptr = 1;
723     return p;
724 ph10 150
725 ph10 149 default:
726     break;
727     }
728     } /* End of loop for ANYCRLF case */
729 ph10 150
730 ph10 149 *lenptr = 0; /* Must have hit the end */
731     return endptr;
732    
733 nigel 93 case EL_ANY:
734     while (p < endptr)
735     {
736     int extra = 0;
737     register int c = *((unsigned char *)p);
738    
739     if (utf8 && c >= 0xc0)
740     {
741     int gcii, gcss;
742     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
743     gcss = 6*extra;
744     c = (c & utf8_table3[extra]) << gcss;
745     for (gcii = 1; gcii <= extra; gcii++)
746     {
747     gcss -= 6;
748     c |= (p[gcii] & 0x3f) << gcss;
749     }
750     }
751    
752     p += 1 + extra;
753    
754     switch (c)
755     {
756     case 0x0a: /* LF */
757     case 0x0b: /* VT */
758     case 0x0c: /* FF */
759     *lenptr = 1;
760     return p;
761    
762     case 0x0d: /* CR */
763     if (p < endptr && *p == 0x0a)
764     {
765     *lenptr = 2;
766     p++;
767     }
768     else *lenptr = 1;
769     return p;
770    
771     case 0x85: /* NEL */
772     *lenptr = utf8? 2 : 1;
773     return p;
774    
775     case 0x2028: /* LS */
776     case 0x2029: /* PS */
777     *lenptr = 3;
778     return p;
779    
780     default:
781     break;
782     }
783     } /* End of loop for ANY case */
784    
785     *lenptr = 0; /* Must have hit the end */
786     return endptr;
787     } /* End of overall switch */
788     }
789    
790    
791    
792     /*************************************************
793     * Find start of previous line *
794     *************************************************/
795    
796     /* This is called when looking back for before lines to print.
797    
798     Arguments:
799     p start of the subsequent line
800     startptr start of available data
801    
802     Returns: pointer to the start of the previous line
803     */
804    
805     static char *
806     previous_line(char *p, char *startptr)
807     {
808     switch(endlinetype)
809     {
810     default: /* Just in case */
811     case EL_LF:
812     p--;
813     while (p > startptr && p[-1] != '\n') p--;
814     return p;
815    
816     case EL_CR:
817     p--;
818     while (p > startptr && p[-1] != '\n') p--;
819     return p;
820    
821     case EL_CRLF:
822     for (;;)
823     {
824     p -= 2;
825     while (p > startptr && p[-1] != '\n') p--;
826     if (p <= startptr + 1 || p[-2] == '\r') return p;
827     }
828     return p; /* But control should never get here */
829    
830     case EL_ANY:
831 ph10 150 case EL_ANYCRLF:
832 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
833     if (utf8) while ((*p & 0xc0) == 0x80) p--;
834    
835     while (p > startptr)
836     {
837     register int c;
838     char *pp = p - 1;
839    
840     if (utf8)
841     {
842     int extra = 0;
843     while ((*pp & 0xc0) == 0x80) pp--;
844     c = *((unsigned char *)pp);
845     if (c >= 0xc0)
846     {
847     int gcii, gcss;
848     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
849     gcss = 6*extra;
850     c = (c & utf8_table3[extra]) << gcss;
851     for (gcii = 1; gcii <= extra; gcii++)
852     {
853     gcss -= 6;
854     c |= (pp[gcii] & 0x3f) << gcss;
855     }
856     }
857     }
858     else c = *((unsigned char *)pp);
859    
860 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
861 nigel 93 {
862     case 0x0a: /* LF */
863 ph10 149 case 0x0d: /* CR */
864     return p;
865 ph10 150
866 ph10 149 default:
867     break;
868 ph10 150 }
869 ph10 149
870     else switch (c)
871     {
872     case 0x0a: /* LF */
873 nigel 93 case 0x0b: /* VT */
874     case 0x0c: /* FF */
875     case 0x0d: /* CR */
876     case 0x85: /* NEL */
877     case 0x2028: /* LS */
878     case 0x2029: /* PS */
879     return p;
880    
881     default:
882     break;
883     }
884    
885     p = pp; /* Back one character */
886     } /* End of loop for ANY case */
887    
888     return startptr; /* Hit start of data */
889     } /* End of overall switch */
890     }
891    
892    
893    
894    
895    
896     /*************************************************
897 nigel 77 * Print the previous "after" lines *
898 nigel 49 *************************************************/
899    
900 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
901 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
902     that a binary zero does not terminate it.
903 nigel 77
904     Arguments:
905     lastmatchnumber the number of the last matching line, plus one
906     lastmatchrestart where we restarted after the last match
907     endptr end of available data
908     printname filename for printing
909    
910     Returns: nothing
911     */
912    
913     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
914     char *endptr, char *printname)
915     {
916     if (after_context > 0 && lastmatchnumber > 0)
917     {
918     int count = 0;
919     while (lastmatchrestart < endptr && count++ < after_context)
920     {
921 nigel 93 int ellength;
922 nigel 77 char *pp = lastmatchrestart;
923     if (printname != NULL) fprintf(stdout, "%s-", printname);
924     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
925 nigel 93 pp = end_of_line(pp, endptr, &ellength);
926 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
927 nigel 93 lastmatchrestart = pp;
928 nigel 77 }
929     hyphenpending = TRUE;
930     }
931     }
932    
933    
934    
935     /*************************************************
936 ph10 378 * Apply patterns to subject till one matches *
937     *************************************************/
938    
939 ph10 392 /* This function is called to run through all patterns, looking for a match. It
940     is used multiple times for the same subject when colouring is enabled, in order
941 ph10 378 to find all possible matches.
942    
943     Arguments:
944 ph10 632 matchptr the start of the subject
945     length the length of the subject to match
946     startoffset where to start matching
947     offsets the offets vector to fill in
948     mrc address of where to put the result of pcre_exec()
949 ph10 392
950     Returns: TRUE if there was a match
951 ph10 378 FALSE if there was no match
952     invert if there was a non-fatal error
953 ph10 392 */
954 ph10 378
955     static BOOL
956 ph10 654 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
957 ph10 632 int *mrc)
958 ph10 378 {
959     int i;
960 ph10 561 size_t slen = length;
961     const char *msg = "this text:\n\n";
962     if (slen > 200)
963     {
964     slen = 200;
965     msg = "text that starts:\n\n";
966 ph10 579 }
967 ph10 378 for (i = 0; i < pattern_count; i++)
968     {
969 ph10 632 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
970     startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
971 ph10 378 if (*mrc >= 0) return TRUE;
972     if (*mrc == PCRE_ERROR_NOMATCH) continue;
973 ph10 561 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
974 ph10 378 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
975 ph10 561 fprintf(stderr, "%s", msg);
976     FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
977     fprintf(stderr, "\n\n");
978     if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
979     resource_error = TRUE;
980 ph10 378 if (error_count++ > 20)
981     {
982 ph10 561 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
983     pcregrep_exit(2);
984 ph10 378 }
985     return invert; /* No more matching; don't show the line again */
986     }
987    
988     return FALSE; /* No match, no errors */
989     }
990    
991    
992    
993     /*************************************************
994 nigel 77 * Grep an individual file *
995     *************************************************/
996    
997     /* This is called from grep_or_recurse() below. It uses a buffer that is three
998 ph10 644 times the value of bufthird. The matching point is never allowed to stray into
999 nigel 77 the top third of the buffer, thus keeping more of the file available for
1000     context printing or for multiline scanning. For large files, the pointer will
1001     be in the middle third most of the time, so the bottom third is available for
1002     "before" context printing.
1003    
1004     Arguments:
1005 ph10 286 handle the fopened FILE stream for a normal file
1006     the gzFile pointer when reading is via libz
1007     the BZFILE pointer when reading is via libbz2
1008     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1009 ph10 644 filename the file name or NULL (for errors)
1010 nigel 77 printname the file name if it is to be printed for each match
1011     or NULL if the file name is not to be printed
1012     it cannot be NULL if filenames[_nomatch]_only is set
1013    
1014     Returns: 0 if there was at least one match
1015     1 otherwise (no matches)
1016 ph10 654 2 if an overlong line is encountered
1017 ph10 644 3 if there is a read error on a .bz2 file
1018 nigel 77 */
1019    
1020 nigel 49 static int
1021 ph10 644 pcregrep(void *handle, int frtype, char *filename, char *printname)
1022 nigel 49 {
1023     int rc = 1;
1024 nigel 77 int linenumber = 1;
1025     int lastmatchnumber = 0;
1026 nigel 49 int count = 0;
1027 ph10 280 int filepos = 0;
1028 ph10 378 int offsets[OFFSET_SIZE];
1029 nigel 77 char *lastmatchrestart = NULL;
1030 ph10 644 char *ptr = main_buffer;
1031 nigel 77 char *endptr;
1032     size_t bufflength;
1033     BOOL endhyphenpending = FALSE;
1034 ph10 519 BOOL input_line_buffered = line_buffered;
1035 ph10 286 FILE *in = NULL; /* Ensure initialized */
1036 nigel 49
1037 ph10 286 #ifdef SUPPORT_LIBZ
1038     gzFile ingz = NULL;
1039     #endif
1040 nigel 77
1041 ph10 286 #ifdef SUPPORT_LIBBZ2
1042     BZFILE *inbz2 = NULL;
1043     #endif
1044    
1045    
1046     /* Do the first read into the start of the buffer and set up the pointer to end
1047     of what we have. In the case of libz, a non-zipped .gz file will be read as a
1048     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1049     fail. */
1050    
1051     #ifdef SUPPORT_LIBZ
1052     if (frtype == FR_LIBZ)
1053     {
1054     ingz = (gzFile)handle;
1055 ph10 644 bufflength = gzread (ingz, main_buffer, bufsize);
1056 ph10 286 }
1057     else
1058     #endif
1059    
1060     #ifdef SUPPORT_LIBBZ2
1061     if (frtype == FR_LIBBZ2)
1062     {
1063     inbz2 = (BZFILE *)handle;
1064 ph10 644 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1065 ph10 286 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1066     } /* without the cast it is unsigned. */
1067     else
1068     #endif
1069    
1070     {
1071     in = (FILE *)handle;
1072 ph10 519 if (is_file_tty(in)) input_line_buffered = TRUE;
1073 ph10 535 bufflength = input_line_buffered?
1074 ph10 644 read_one_line(main_buffer, bufsize, in) :
1075     fread(main_buffer, 1, bufsize, in);
1076 ph10 286 }
1077 ph10 535
1078 ph10 644 endptr = main_buffer + bufflength;
1079 nigel 77
1080     /* Loop while the current pointer is not at the end of the file. For large
1081     files, endptr will be at the end of the buffer when we are in the middle of the
1082     file, but ptr will never get there, because as soon as it gets over 2/3 of the
1083     way, the buffer is shifted left and re-filled. */
1084    
1085     while (ptr < endptr)
1086 nigel 49 {
1087 ph10 378 int endlinelength;
1088 nigel 87 int mrc = 0;
1089 ph10 654 int startoffset = 0;
1090 ph10 378 BOOL match;
1091 ph10 286 char *matchptr = ptr;
1092 nigel 77 char *t = ptr;
1093     size_t length, linelength;
1094 nigel 49
1095 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
1096     of the subject string to pass to pcre_exec(). In multiline mode, it is the
1097     length remainder of the data in the buffer. Otherwise, it is the length of
1098 ph10 378 the next line, excluding the terminating newline. After matching, we always
1099     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1100     option is used for compiling, so that any match is constrained to be in the
1101     first line. */
1102 nigel 77
1103 nigel 93 t = end_of_line(t, endptr, &endlinelength);
1104     linelength = t - ptr - endlinelength;
1105 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
1106 ph10 654
1107     /* Check to see if the line we are looking at extends right to the very end
1108     of the buffer without a line terminator. This means the line is too long to
1109 ph10 644 handle. */
1110 ph10 654
1111 ph10 644 if (endlinelength == 0 && t == main_buffer + bufsize)
1112     {
1113     fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1114 ph10 646 "pcregrep: check the --buffer-size option\n",
1115 ph10 654 linenumber,
1116 ph10 644 (filename == NULL)? "" : " of file ",
1117     (filename == NULL)? "" : filename);
1118     return 2;
1119 ph10 654 }
1120 nigel 77
1121 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
1122    
1123     #ifdef JFRIEDL_DEBUG
1124     if (jfriedl_XT || jfriedl_XR)
1125     {
1126     #include <sys/time.h>
1127     #include <time.h>
1128     struct timeval start_time, end_time;
1129     struct timezone dummy;
1130 ph10 392 int i;
1131 nigel 89
1132     if (jfriedl_XT)
1133     {
1134     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1135     const char *orig = ptr;
1136     ptr = malloc(newlen + 1);
1137     if (!ptr) {
1138     printf("out of memory");
1139 ph10 561 pcregrep_exit(2);
1140 nigel 89 }
1141     endptr = ptr;
1142     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1143     for (i = 0; i < jfriedl_XT; i++) {
1144     strncpy(endptr, orig, length);
1145     endptr += length;
1146     }
1147     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1148     length = newlen;
1149     }
1150    
1151     if (gettimeofday(&start_time, &dummy) != 0)
1152     perror("bad gettimeofday");
1153    
1154    
1155     for (i = 0; i < jfriedl_XR; i++)
1156 ph10 392 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1157 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1158 nigel 89
1159     if (gettimeofday(&end_time, &dummy) != 0)
1160     perror("bad gettimeofday");
1161    
1162     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1163     -
1164     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1165    
1166     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1167     return 0;
1168     }
1169     #endif
1170    
1171 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
1172 ph10 279 in order to find any further matches in the same line. */
1173 nigel 89
1174 ph10 286 ONLY_MATCHING_RESTART:
1175    
1176 ph10 392 /* Run through all the patterns until one matches or there is an error other
1177 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1178     finding subsequent matches when colouring matched lines. */
1179 ph10 392
1180 ph10 632 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1181 nigel 77
1182 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1183 nigel 77
1184 nigel 49 if (match != invert)
1185     {
1186 nigel 77 BOOL hyphenprinted = FALSE;
1187    
1188 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1189 nigel 77
1190 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1191    
1192     /* Just count if just counting is wanted. */
1193    
1194 nigel 49 if (count_only) count++;
1195    
1196 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1197     in the file. */
1198    
1199 ph10 420 else if (filenames == FN_MATCH_ONLY)
1200 nigel 49 {
1201 nigel 77 fprintf(stdout, "%s\n", printname);
1202 nigel 49 return 0;
1203     }
1204    
1205 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1206    
1207 nigel 77 else if (quiet) return 0;
1208 nigel 49
1209 ph10 579 /* The --only-matching option prints just the substring that matched, or a
1210 ph10 565 captured portion of it, as long as this string is not empty, and the
1211     --file-offsets and --line-offsets options output offsets for the matching
1212     substring (they both force --only-matching = 0). None of these options
1213 ph10 636 prints any context. Afterwards, adjust the start and then jump back to look
1214     for further matches in the same line. If we are in invert mode, however,
1215     nothing is printed and we do not restart - this could still be useful
1216     because the return code is set. */
1217 nigel 87
1218 ph10 565 else if (only_matching >= 0)
1219 nigel 87 {
1220 ph10 279 if (!invert)
1221 ph10 286 {
1222 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1223     if (number) fprintf(stdout, "%d:", linenumber);
1224 ph10 280 if (line_offsets)
1225 ph10 565 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1226 ph10 286 offsets[1] - offsets[0]);
1227 ph10 280 else if (file_offsets)
1228 ph10 579 fprintf(stdout, "%d,%d\n",
1229 ph10 565 (int)(filepos + matchptr + offsets[0] - ptr),
1230 ph10 286 offsets[1] - offsets[0]);
1231 ph10 565 else if (only_matching < mrc)
1232 ph10 377 {
1233 ph10 565 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1234     if (plen > 0)
1235 ph10 579 {
1236 ph10 565 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1237     FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1238     if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1239     fprintf(stdout, "\n");
1240 ph10 579 }
1241 ph10 392 }
1242 ph10 565 else if (printname != NULL || number) fprintf(stdout, "\n");
1243 ph10 286 match = FALSE;
1244 ph10 564 if (line_buffered) fflush(stdout);
1245 ph10 636 rc = 0; /* Had some success */
1246     startoffset = offsets[1]; /* Restart after the match */
1247 ph10 286 goto ONLY_MATCHING_RESTART;
1248     }
1249 nigel 87 }
1250    
1251     /* This is the default case when none of the above options is set. We print
1252     the matching lines(s), possibly preceded and/or followed by other lines of
1253     context. */
1254    
1255 nigel 49 else
1256     {
1257 nigel 77 /* See if there is a requirement to print some "after" lines from a
1258     previous match. We never print any overlaps. */
1259    
1260     if (after_context > 0 && lastmatchnumber > 0)
1261     {
1262 nigel 93 int ellength;
1263 nigel 77 int linecount = 0;
1264     char *p = lastmatchrestart;
1265    
1266     while (p < ptr && linecount < after_context)
1267     {
1268 nigel 93 p = end_of_line(p, ptr, &ellength);
1269 nigel 77 linecount++;
1270     }
1271    
1272     /* It is important to advance lastmatchrestart during this printing so
1273 nigel 87 that it interacts correctly with any "before" printing below. Print
1274     each line's data using fwrite() in case there are binary zeroes. */
1275 nigel 77
1276     while (lastmatchrestart < p)
1277     {
1278     char *pp = lastmatchrestart;
1279     if (printname != NULL) fprintf(stdout, "%s-", printname);
1280     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1281 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1282 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1283 nigel 93 lastmatchrestart = pp;
1284 nigel 77 }
1285     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1286     }
1287    
1288     /* If there were non-contiguous lines printed above, insert hyphens. */
1289    
1290     if (hyphenpending)
1291     {
1292     fprintf(stdout, "--\n");
1293     hyphenpending = FALSE;
1294     hyphenprinted = TRUE;
1295     }
1296    
1297     /* See if there is a requirement to print some "before" lines for this
1298     match. Again, don't print overlaps. */
1299    
1300     if (before_context > 0)
1301     {
1302     int linecount = 0;
1303     char *p = ptr;
1304    
1305 ph10 644 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1306 nigel 87 linecount < before_context)
1307 nigel 77 {
1308 nigel 87 linecount++;
1309 ph10 644 p = previous_line(p, main_buffer);
1310 nigel 77 }
1311    
1312     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1313     fprintf(stdout, "--\n");
1314    
1315     while (p < ptr)
1316     {
1317 nigel 93 int ellength;
1318 nigel 77 char *pp = p;
1319     if (printname != NULL) fprintf(stdout, "%s-", printname);
1320     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1321 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1322 ph10 515 FWRITE(p, 1, pp - p, stdout);
1323 nigel 93 p = pp;
1324 nigel 77 }
1325     }
1326    
1327     /* Now print the matching line(s); ensure we set hyphenpending at the end
1328 nigel 85 of the file if any context lines are being output. */
1329 nigel 77
1330 nigel 85 if (after_context > 0 || before_context > 0)
1331     endhyphenpending = TRUE;
1332    
1333 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1334 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1335 nigel 77
1336     /* In multiline mode, we want to print to the end of the line in which
1337     the end of the matched string is found, so we adjust linelength and the
1338 ph10 222 line number appropriately, but only when there actually was a match
1339     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1340     the match will always be before the first newline sequence. */
1341 nigel 77
1342 ph10 587 if (multiline & !invert)
1343 nigel 77 {
1344 ph10 587 char *endmatch = ptr + offsets[1];
1345     t = ptr;
1346     while (t < endmatch)
1347 nigel 93 {
1348 ph10 587 t = end_of_line(t, endptr, &endlinelength);
1349     if (t < endmatch) linenumber++; else break;
1350 nigel 93 }
1351 ph10 587 linelength = t - ptr - endlinelength;
1352 nigel 77 }
1353    
1354 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1355     zeroes are treated as just another data character. */
1356    
1357     /* This extra option, for Jeffrey Friedl's debugging requirements,
1358     replaces the matched string, or a specific captured string if it exists,
1359     with X. When this happens, colouring is ignored. */
1360    
1361     #ifdef JFRIEDL_DEBUG
1362     if (S_arg >= 0 && S_arg < mrc)
1363     {
1364     int first = S_arg * 2;
1365     int last = first + 1;
1366 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1367 nigel 87 fprintf(stdout, "X");
1368 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1369 nigel 87 }
1370     else
1371     #endif
1372    
1373 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1374 ph10 585 matches, but not of course if the line is a non-match. */
1375 ph10 589
1376 ph10 585 if (do_colour && !invert)
1377 nigel 87 {
1378 ph10 589 int plength;
1379 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1380 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1381 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1382 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1383 ph10 378 for (;;)
1384     {
1385 ph10 632 startoffset = offsets[1];
1386 ph10 636 if (startoffset >= linelength + endlinelength ||
1387 ph10 654 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1388 ph10 632 break;
1389     FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1390 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1391 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1392 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1393     }
1394 ph10 587
1395     /* In multiline mode, we may have already printed the complete line
1396 ph10 589 and its line-ending characters (if they matched the pattern), so there
1397 ph10 587 may be no more to print. */
1398 ph10 589
1399 ph10 636 plength = (linelength + endlinelength) - startoffset;
1400     if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1401 nigel 87 }
1402 ph10 392
1403 ph10 378 /* Not colouring; no need to search for further matches */
1404 ph10 392
1405 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1406 nigel 49 }
1407    
1408 ph10 519 /* End of doing what has to be done for a match. If --line-buffered was
1409     given, flush the output. */
1410 nigel 87
1411 ph10 519 if (line_buffered) fflush(stdout);
1412 nigel 77 rc = 0; /* Had some success */
1413    
1414     /* Remember where the last match happened for after_context. We remember
1415     where we are about to restart, and that line's number. */
1416    
1417 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1418 nigel 77 lastmatchnumber = linenumber + 1;
1419 nigel 49 }
1420 nigel 77
1421 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1422     anything to be printed), we have to move on to the end of the match before
1423     proceeding. */
1424    
1425     if (multiline && invert && match)
1426     {
1427     int ellength;
1428     char *endmatch = ptr + offsets[1];
1429     t = ptr;
1430     while (t < endmatch)
1431     {
1432     t = end_of_line(t, endptr, &ellength);
1433     if (t <= endmatch) linenumber++; else break;
1434     }
1435     endmatch = end_of_line(endmatch, endptr, &ellength);
1436     linelength = endmatch - ptr - ellength;
1437     }
1438    
1439 ph10 286 /* Advance to after the newline and increment the line number. The file
1440 ph10 280 offset to the current line is maintained in filepos. */
1441 nigel 77
1442 nigel 93 ptr += linelength + endlinelength;
1443 ph10 530 filepos += (int)(linelength + endlinelength);
1444 nigel 77 linenumber++;
1445 ph10 535
1446     /* If input is line buffered, and the buffer is not yet full, read another
1447 ph10 519 line and add it into the buffer. */
1448 ph10 535
1449 ph10 644 if (input_line_buffered && bufflength < bufsize)
1450 ph10 519 {
1451 ph10 644 int add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
1452 ph10 519 bufflength += add;
1453 ph10 535 endptr += add;
1454     }
1455 nigel 77
1456     /* If we haven't yet reached the end of the file (the buffer is full), and
1457     the current point is in the top 1/3 of the buffer, slide the buffer down by
1458     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1459     about to be lost, print them. */
1460    
1461 ph10 644 if (bufflength >= bufsize && ptr > main_buffer + 2*bufthird)
1462 nigel 77 {
1463     if (after_context > 0 &&
1464     lastmatchnumber > 0 &&
1465 ph10 644 lastmatchrestart < main_buffer + bufthird)
1466 nigel 77 {
1467     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1468     lastmatchnumber = 0;
1469     }
1470    
1471     /* Now do the shuffle */
1472    
1473 ph10 644 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1474     ptr -= bufthird;
1475 ph10 286
1476     #ifdef SUPPORT_LIBZ
1477     if (frtype == FR_LIBZ)
1478 ph10 644 bufflength = 2*bufthird +
1479     gzread (ingz, main_buffer + 2*bufthird, bufthird);
1480 ph10 286 else
1481     #endif
1482    
1483     #ifdef SUPPORT_LIBBZ2
1484     if (frtype == FR_LIBBZ2)
1485 ph10 644 bufflength = 2*bufthird +
1486     BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1487 ph10 286 else
1488     #endif
1489    
1490 ph10 644 bufflength = 2*bufthird +
1491 ph10 535 (input_line_buffered?
1492 ph10 644 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1493     fread(main_buffer + 2*bufthird, 1, bufthird, in));
1494     endptr = main_buffer + bufflength;
1495 nigel 77
1496     /* Adjust any last match point */
1497    
1498 ph10 644 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1499 nigel 77 }
1500     } /* Loop through the whole file */
1501    
1502     /* End of file; print final "after" lines if wanted; do_after_lines sets
1503     hyphenpending if it prints something. */
1504    
1505 ph10 565 if (only_matching < 0 && !count_only)
1506 nigel 87 {
1507     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1508     hyphenpending |= endhyphenpending;
1509     }
1510 nigel 77
1511     /* Print the file name if we are looking for those without matches and there
1512     were none. If we found a match, we won't have got this far. */
1513    
1514 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1515 nigel 77 {
1516     fprintf(stdout, "%s\n", printname);
1517     return 0;
1518 nigel 49 }
1519    
1520 nigel 77 /* Print the match count if wanted */
1521    
1522 nigel 49 if (count_only)
1523     {
1524 ph10 420 if (count > 0 || !omit_zero_count)
1525 ph10 461 {
1526     if (printname != NULL && filenames != FN_NONE)
1527 ph10 420 fprintf(stdout, "%s:", printname);
1528     fprintf(stdout, "%d\n", count);
1529 ph10 461 }
1530 nigel 49 }
1531    
1532     return rc;
1533     }
1534    
1535    
1536    
1537     /*************************************************
1538 nigel 53 * Grep a file or recurse into a directory *
1539     *************************************************/
1540    
1541 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1542     recursing; if it's a file, grep it.
1543    
1544     Arguments:
1545     pathname the path to investigate
1546 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1547 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1548    
1549     Returns: 0 if there was at least one match
1550     1 if there were no matches
1551     2 there was some kind of error
1552    
1553     However, file opening failures are suppressed if "silent" is set.
1554     */
1555    
1556 nigel 53 static int
1557 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1558 nigel 53 {
1559     int rc = 1;
1560     int sep;
1561 ph10 286 int frtype;
1562     int pathlen;
1563     void *handle;
1564     FILE *in = NULL; /* Ensure initialized */
1565 nigel 53
1566 ph10 286 #ifdef SUPPORT_LIBZ
1567     gzFile ingz = NULL;
1568     #endif
1569    
1570     #ifdef SUPPORT_LIBBZ2
1571     BZFILE *inbz2 = NULL;
1572     #endif
1573    
1574 nigel 77 /* If the file name is "-" we scan stdin */
1575 nigel 53
1576 nigel 77 if (strcmp(pathname, "-") == 0)
1577 nigel 53 {
1578 ph10 644 return pcregrep(stdin, FR_PLAIN, stdin_name,
1579 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1580 nigel 77 stdin_name : NULL);
1581     }
1582    
1583 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1584 ph10 325 each file and directory within it, subject to any include or exclude patterns
1585     that were set. The scanning code is localized so it can be made
1586     system-specific. */
1587 nigel 87
1588     if ((sep = isdirectory(pathname)) != 0)
1589 nigel 77 {
1590 nigel 87 if (dee_action == dee_SKIP) return 1;
1591     if (dee_action == dee_RECURSE)
1592 nigel 53 {
1593 nigel 87 char buffer[1024];
1594     char *nextfile;
1595     directory_type *dir = opendirectory(pathname);
1596 nigel 53
1597 nigel 87 if (dir == NULL)
1598     {
1599     if (!silent)
1600     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1601     strerror(errno));
1602     return 2;
1603     }
1604 nigel 77
1605 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1606     {
1607 ph10 324 int frc, nflen;
1608 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1609 ph10 530 nflen = (int)(strlen(nextfile));
1610 ph10 345
1611 ph10 325 if (isdirectory(buffer))
1612     {
1613     if (exclude_dir_compiled != NULL &&
1614     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1615     continue;
1616 ph10 345
1617 ph10 325 if (include_dir_compiled != NULL &&
1618     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1619     continue;
1620     }
1621 ph10 345 else
1622     {
1623 ph10 324 if (exclude_compiled != NULL &&
1624     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1625     continue;
1626 ph10 345
1627 ph10 324 if (include_compiled != NULL &&
1628     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1629     continue;
1630 ph10 345 }
1631 nigel 77
1632 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1633     if (frc > 1) rc = frc;
1634     else if (frc == 0 && rc == 1) rc = 0;
1635     }
1636    
1637     closedirectory(dir);
1638     return rc;
1639 nigel 53 }
1640     }
1641    
1642 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1643     been requested. */
1644 nigel 53
1645 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1646    
1647     /* Control reaches here if we have a regular file, or if we have a directory
1648     and recursion or skipping was not requested, or if we have anything else and
1649     skipping was not requested. The scan proceeds. If this is the first and only
1650     argument at top level, we don't show the file name, unless we are only showing
1651     the file name, or the filename was forced (-H). */
1652    
1653 ph10 530 pathlen = (int)(strlen(pathname));
1654 ph10 286
1655     /* Open using zlib if it is supported and the file name ends with .gz. */
1656    
1657     #ifdef SUPPORT_LIBZ
1658     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1659 nigel 53 {
1660 ph10 286 ingz = gzopen(pathname, "rb");
1661     if (ingz == NULL)
1662     {
1663     if (!silent)
1664     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1665     strerror(errno));
1666     return 2;
1667     }
1668     handle = (void *)ingz;
1669     frtype = FR_LIBZ;
1670     }
1671     else
1672     #endif
1673    
1674     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1675    
1676     #ifdef SUPPORT_LIBBZ2
1677     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1678     {
1679     inbz2 = BZ2_bzopen(pathname, "rb");
1680     handle = (void *)inbz2;
1681     frtype = FR_LIBBZ2;
1682     }
1683     else
1684     #endif
1685    
1686     /* Otherwise use plain fopen(). The label is so that we can come back here if
1687     an attempt to read a .bz2 file indicates that it really is a plain file. */
1688    
1689     #ifdef SUPPORT_LIBBZ2
1690     PLAIN_FILE:
1691     #endif
1692     {
1693 ph10 419 in = fopen(pathname, "rb");
1694 ph10 286 handle = (void *)in;
1695     frtype = FR_PLAIN;
1696     }
1697    
1698     /* All the opening methods return errno when they fail. */
1699    
1700     if (handle == NULL)
1701     {
1702 nigel 77 if (!silent)
1703     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1704     strerror(errno));
1705 nigel 53 return 2;
1706     }
1707    
1708 ph10 286 /* Now grep the file */
1709    
1710 ph10 644 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1711 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1712 nigel 77
1713 ph10 286 /* Close in an appropriate manner. */
1714    
1715     #ifdef SUPPORT_LIBZ
1716     if (frtype == FR_LIBZ)
1717     gzclose(ingz);
1718     else
1719     #endif
1720    
1721 ph10 644 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1722 ph10 286 read failed. If the error indicates that the file isn't in fact bzipped, try
1723     again as a normal file. */
1724    
1725     #ifdef SUPPORT_LIBBZ2
1726     if (frtype == FR_LIBBZ2)
1727     {
1728 ph10 644 if (rc == 3)
1729 ph10 286 {
1730     int errnum;
1731     const char *err = BZ2_bzerror(inbz2, &errnum);
1732     if (errnum == BZ_DATA_ERROR_MAGIC)
1733     {
1734     BZ2_bzclose(inbz2);
1735     goto PLAIN_FILE;
1736     }
1737     else if (!silent)
1738     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1739     pathname, err);
1740 ph10 654 rc = 2; /* The normal "something went wrong" code */
1741 ph10 286 }
1742     BZ2_bzclose(inbz2);
1743     }
1744     else
1745     #endif
1746    
1747     /* Normal file close */
1748    
1749 nigel 53 fclose(in);
1750 ph10 286
1751     /* Pass back the yield from pcregrep(). */
1752    
1753 nigel 53 return rc;
1754     }
1755    
1756    
1757    
1758    
1759     /*************************************************
1760 nigel 49 * Usage function *
1761     *************************************************/
1762    
1763     static int
1764     usage(int rc)
1765     {
1766 nigel 87 option_item *op;
1767     fprintf(stderr, "Usage: pcregrep [-");
1768     for (op = optionlist; op->one_char != 0; op++)
1769     {
1770     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1771     }
1772     fprintf(stderr, "] [long options] [pattern] [files]\n");
1773 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1774     "options.\n");
1775 nigel 49 return rc;
1776     }
1777    
1778    
1779    
1780    
1781     /*************************************************
1782 nigel 53 * Help function *
1783     *************************************************/
1784    
1785     static void
1786     help(void)
1787     {
1788     option_item *op;
1789    
1790 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1791 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1792 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1793 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1794    
1795     #ifdef SUPPORT_LIBZ
1796     printf("Files whose names end in .gz are read using zlib.\n");
1797     #endif
1798    
1799     #ifdef SUPPORT_LIBBZ2
1800     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1801     #endif
1802    
1803     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1804     printf("Other files and the standard input are read as plain files.\n\n");
1805     #else
1806     printf("All files are read as plain files, without any interpretation.\n\n");
1807     #endif
1808    
1809 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1810     printf("Options:\n");
1811    
1812     for (op = optionlist; op->one_char != 0; op++)
1813     {
1814     int n;
1815     char s[4];
1816 ph10 579
1817 ph10 571 /* Two options were accidentally implemented and documented with underscores
1818     instead of hyphens in their names, something that was not noticed for quite a
1819 ph10 579 few releases. When fixing this, I left the underscored versions in the list
1820     in case people were using them. However, we don't want to display them in the
1821     help data. There are no other options that contain underscores, and we do not
1822     expect ever to implement such options. Therefore, just omit any option that
1823 ph10 571 contains an underscore. */
1824 ph10 579
1825     if (strchr(op->long_name, '_') != NULL) continue;
1826    
1827 nigel 53 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1828 ph10 571 n = 31 - printf(" %s --%s", s, op->long_name);
1829 nigel 53 if (n < 1) n = 1;
1830 ph10 571 printf("%.*s%s\n", n, " ", op->help_text);
1831 nigel 53 }
1832    
1833 ph10 654 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1834 ph10 644 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1835     printf("When reading patterns from a file instead of using a command line option,\n");
1836 nigel 77 printf("trailing white space is removed and blank lines are ignored.\n");
1837 ph10 654 printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1838 ph10 644 MAX_PATTERN_COUNT, PATBUFSIZE);
1839 nigel 53
1840 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1841 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1842     }
1843    
1844    
1845    
1846    
1847     /*************************************************
1848 nigel 77 * Handle a single-letter, no data option *
1849 nigel 53 *************************************************/
1850    
1851     static int
1852     handle_option(int letter, int options)
1853     {
1854     switch(letter)
1855     {
1856 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1857 ph10 561 case N_HELP: help(); pcregrep_exit(0);
1858 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1859 ph10 535 case N_LBUFFER: line_buffered = TRUE; break;
1860 nigel 53 case 'c': count_only = TRUE; break;
1861 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1862     case 'H': filenames = FN_FORCE; break;
1863     case 'h': filenames = FN_NONE; break;
1864 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1865 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1866 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
1867 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1868 nigel 53 case 'n': number = TRUE; break;
1869 ph10 565 case 'o': only_matching = 0; break;
1870 nigel 77 case 'q': quiet = TRUE; break;
1871 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1872 nigel 53 case 's': silent = TRUE; break;
1873 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1874 nigel 53 case 'v': invert = TRUE; break;
1875 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1876     case 'x': process_options |= PO_LINE_MATCH; break;
1877 nigel 53
1878     case 'V':
1879 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1880 ph10 561 pcregrep_exit(0);
1881 nigel 53 break;
1882    
1883     default:
1884     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1885 ph10 561 pcregrep_exit(usage(2));
1886 nigel 53 }
1887    
1888     return options;
1889     }
1890    
1891    
1892    
1893    
1894     /*************************************************
1895 nigel 87 * Construct printed ordinal *
1896     *************************************************/
1897    
1898     /* This turns a number into "1st", "3rd", etc. */
1899    
1900     static char *
1901     ordin(int n)
1902     {
1903     static char buffer[8];
1904     char *p = buffer;
1905     sprintf(p, "%d", n);
1906     while (*p != 0) p++;
1907     switch (n%10)
1908     {
1909     case 1: strcpy(p, "st"); break;
1910     case 2: strcpy(p, "nd"); break;
1911     case 3: strcpy(p, "rd"); break;
1912     default: strcpy(p, "th"); break;
1913     }
1914     return buffer;
1915     }
1916    
1917    
1918    
1919     /*************************************************
1920     * Compile a single pattern *
1921     *************************************************/
1922    
1923     /* When the -F option has been used, this is called for each substring.
1924     Otherwise it's called for each supplied pattern.
1925    
1926     Arguments:
1927     pattern the pattern string
1928     options the PCRE options
1929     filename the file name, or NULL for a command-line pattern
1930     count 0 if this is the only command line pattern, or
1931     number of the command line pattern, or
1932     linenumber for a pattern from a file
1933    
1934     Returns: TRUE on success, FALSE after an error
1935     */
1936    
1937     static BOOL
1938     compile_single_pattern(char *pattern, int options, char *filename, int count)
1939     {
1940 ph10 644 char buffer[PATBUFSIZE];
1941 nigel 87 const char *error;
1942     int errptr;
1943    
1944     if (pattern_count >= MAX_PATTERN_COUNT)
1945     {
1946     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1947     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1948     return FALSE;
1949     }
1950    
1951 ph10 644 sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1952 nigel 87 suffix[process_options]);
1953     pattern_list[pattern_count] =
1954     pcre_compile(buffer, options, &error, &errptr, pcretables);
1955 ph10 142 if (pattern_list[pattern_count] != NULL)
1956 ph10 141 {
1957 ph10 142 pattern_count++;
1958 ph10 141 return TRUE;
1959 ph10 142 }
1960 nigel 87
1961     /* Handle compile errors */
1962    
1963     errptr -= (int)strlen(prefix[process_options]);
1964     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1965    
1966     if (filename == NULL)
1967     {
1968     if (count == 0)
1969     fprintf(stderr, "pcregrep: Error in command-line regex "
1970     "at offset %d: %s\n", errptr, error);
1971     else
1972     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1973     "at offset %d: %s\n", ordin(count), errptr, error);
1974     }
1975     else
1976     {
1977     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1978     "at offset %d: %s\n", count, filename, errptr, error);
1979     }
1980    
1981     return FALSE;
1982     }
1983    
1984    
1985    
1986     /*************************************************
1987     * Compile one supplied pattern *
1988     *************************************************/
1989    
1990     /* When the -F option has been used, each string may be a list of strings,
1991 nigel 91 separated by line breaks. They will be matched literally.
1992 nigel 87
1993     Arguments:
1994     pattern the pattern string
1995     options the PCRE options
1996     filename the file name, or NULL for a command-line pattern
1997     count 0 if this is the only command line pattern, or
1998     number of the command line pattern, or
1999     linenumber for a pattern from a file
2000    
2001     Returns: TRUE on success, FALSE after an error
2002     */
2003    
2004     static BOOL
2005     compile_pattern(char *pattern, int options, char *filename, int count)
2006     {
2007     if ((process_options & PO_FIXED_STRINGS) != 0)
2008     {
2009 nigel 93 char *eop = pattern + strlen(pattern);
2010 ph10 644 char buffer[PATBUFSIZE];
2011 nigel 87 for(;;)
2012     {
2013 nigel 93 int ellength;
2014     char *p = end_of_line(pattern, eop, &ellength);
2015     if (ellength == 0)
2016 nigel 87 return compile_single_pattern(pattern, options, filename, count);
2017 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2018 nigel 93 pattern = p;
2019 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
2020     return FALSE;
2021     }
2022     }
2023     else return compile_single_pattern(pattern, options, filename, count);
2024     }
2025    
2026    
2027    
2028     /*************************************************
2029 nigel 49 * Main program *
2030     *************************************************/
2031    
2032 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2033    
2034 nigel 49 int
2035     main(int argc, char **argv)
2036     {
2037 nigel 53 int i, j;
2038 nigel 49 int rc = 1;
2039 nigel 87 int pcre_options = 0;
2040     int cmd_pattern_count = 0;
2041 ph10 141 int hint_count = 0;
2042 nigel 49 int errptr;
2043 nigel 87 BOOL only_one_at_top;
2044     char *patterns[MAX_PATTERN_COUNT];
2045     const char *locale_from = "--locale";
2046 nigel 49 const char *error;
2047    
2048 nigel 93 /* Set the default line ending value from the default in the PCRE library;
2049     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2050 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
2051 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
2052 ph10 391 rather than escapes such as as '\r'. */
2053 nigel 91
2054     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2055     switch(i)
2056     {
2057 ph10 391 default: newline = (char *)"lf"; break;
2058     case 13: newline = (char *)"cr"; break;
2059     case (13 << 8) | 10: newline = (char *)"crlf"; break;
2060     case -1: newline = (char *)"any"; break;
2061     case -2: newline = (char *)"anycrlf"; break;
2062 nigel 91 }
2063    
2064 nigel 49 /* Process the options */
2065    
2066     for (i = 1; i < argc; i++)
2067     {
2068 nigel 77 option_item *op = NULL;
2069     char *option_data = (char *)""; /* default to keep compiler happy */
2070     BOOL longop;
2071     BOOL longopwasequals = FALSE;
2072    
2073 nigel 49 if (argv[i][0] != '-') break;
2074 nigel 53
2075 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2076 nigel 87 but only if we have previously had -e or -f to define the patterns. */
2077 nigel 63
2078 nigel 77 if (argv[i][1] == 0)
2079     {
2080 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
2081 ph10 561 else pcregrep_exit(usage(2));
2082 nigel 77 }
2083 nigel 63
2084 nigel 77 /* Handle a long name option, or -- to terminate the options */
2085 nigel 53
2086     if (argv[i][1] == '-')
2087 nigel 49 {
2088 nigel 77 char *arg = argv[i] + 2;
2089     char *argequals = strchr(arg, '=');
2090 nigel 53
2091 nigel 77 if (*arg == 0) /* -- terminates options */
2092 nigel 49 {
2093 nigel 77 i++;
2094     break; /* out of the options-handling loop */
2095 nigel 53 }
2096 nigel 49
2097 nigel 77 longop = TRUE;
2098    
2099     /* Some long options have data that follows after =, for example file=name.
2100     Some options have variations in the long name spelling: specifically, we
2101     allow "regexp" because GNU grep allows it, though I personally go along
2102 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2103 ph10 422 These options are entered in the table as "regex(p)". Options can be in
2104     both these categories. */
2105 nigel 77
2106 nigel 53 for (op = optionlist; op->one_char != 0; op++)
2107     {
2108 nigel 77 char *opbra = strchr(op->long_name, '(');
2109     char *equals = strchr(op->long_name, '=');
2110 ph10 461
2111 ph10 422 /* Handle options with only one spelling of the name */
2112 ph10 461
2113 ph10 422 if (opbra == NULL) /* Does not contain '(' */
2114 nigel 53 {
2115 nigel 77 if (equals == NULL) /* Not thing=data case */
2116     {
2117     if (strcmp(arg, op->long_name) == 0) break;
2118     }
2119     else /* Special case xxx=data */
2120     {
2121 ph10 530 int oplen = (int)(equals - op->long_name);
2122 ph10 535 int arglen = (argequals == NULL)?
2123 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2124 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2125     {
2126     option_data = arg + arglen;
2127     if (*option_data == '=')
2128     {
2129     option_data++;
2130     longopwasequals = TRUE;
2131     }
2132     break;
2133     }
2134     }
2135 nigel 53 }
2136 ph10 461
2137 ph10 422 /* Handle options with an alternate spelling of the name */
2138 ph10 461
2139     else
2140 nigel 77 {
2141     char buff1[24];
2142     char buff2[24];
2143 ph10 461
2144 ph10 530 int baselen = (int)(opbra - op->long_name);
2145     int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2146 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
2147 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2148 ph10 461
2149 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
2150 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2151 ph10 461
2152     if (strncmp(arg, buff1, arglen) == 0 ||
2153 ph10 422 strncmp(arg, buff2, arglen) == 0)
2154     {
2155     if (equals != NULL && argequals != NULL)
2156     {
2157 ph10 461 option_data = argequals;
2158 ph10 422 if (*option_data == '=')
2159     {
2160 ph10 461 option_data++;
2161 ph10 422 longopwasequals = TRUE;
2162 ph10 461 }
2163     }
2164 nigel 77 break;
2165 ph10 461 }
2166 nigel 77 }
2167 nigel 53 }
2168 nigel 77
2169 nigel 53 if (op->one_char == 0)
2170     {
2171     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2172 ph10 561 pcregrep_exit(usage(2));
2173 nigel 53 }
2174     }
2175 nigel 49
2176 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2177     are not in the right form for putting in the option table because they use
2178     only one hyphen, yet are more than one character long. By putting them
2179     separately here, they will not get displayed as part of the help() output,
2180     but I don't think Jeffrey will care about that. */
2181    
2182     #ifdef JFRIEDL_DEBUG
2183     else if (strcmp(argv[i], "-pre") == 0) {
2184     jfriedl_prefix = argv[++i];
2185     continue;
2186     } else if (strcmp(argv[i], "-post") == 0) {
2187     jfriedl_postfix = argv[++i];
2188     continue;
2189     } else if (strcmp(argv[i], "-XT") == 0) {
2190     sscanf(argv[++i], "%d", &jfriedl_XT);
2191     continue;
2192     } else if (strcmp(argv[i], "-XR") == 0) {
2193     sscanf(argv[++i], "%d", &jfriedl_XR);
2194     continue;
2195     }
2196     #endif
2197    
2198    
2199 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2200     continue till we hit the last one or one that needs data. */
2201 nigel 53
2202     else
2203     {
2204     char *s = argv[i] + 1;
2205 nigel 77 longop = FALSE;
2206 nigel 53 while (*s != 0)
2207     {
2208 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2209 ph10 579 {
2210     if (*s == op->one_char) break;
2211 ph10 565 }
2212 nigel 77 if (op->one_char == 0)
2213 nigel 53 {
2214 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2215     *s, argv[i]);
2216 ph10 561 pcregrep_exit(usage(2));
2217 nigel 77 }
2218 ph10 579
2219 ph10 565 /* Check for a single-character option that has data: OP_OP_NUMBER
2220 ph10 579 is used for one that either has a numerical number or defaults, i.e. the
2221 ph10 565 data is optional. If a digit follows, there is data; if not, carry on
2222     with other single-character options in the same string. */
2223 ph10 579
2224 ph10 565 option_data = s+1;
2225     if (op->type == OP_OP_NUMBER)
2226 ph10 579 {
2227     if (isdigit((unsigned char)s[1])) break;
2228 nigel 53 }
2229 ph10 565 else /* Check for end or a dataless option */
2230 ph10 579 {
2231 ph10 565 if (op->type != OP_NODATA || s[1] == 0) break;
2232 ph10 579 }
2233    
2234     /* Handle a single-character option with no data, then loop for the
2235 ph10 565 next character in the string. */
2236    
2237 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2238 nigel 49 }
2239     }
2240 nigel 77
2241 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2242     is NO_DATA, it means that there is no data, and the option might set
2243     something in the PCRE options. */
2244 nigel 77
2245     if (op->type == OP_NODATA)
2246     {
2247 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2248     continue;
2249     }
2250    
2251     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2252     either has a value or defaults to something. It cannot have data in a
2253 ph10 579 separate item. At the moment, the only such options are "colo(u)r",
2254 ph10 565 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2255 nigel 87
2256     if (*option_data == 0 &&
2257     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2258     {
2259     switch (op->one_char)
2260 nigel 77 {
2261 nigel 87 case N_COLOUR:
2262     colour_option = (char *)"auto";
2263     break;
2264 ph10 579
2265 ph10 565 case 'o':
2266     only_matching = 0;
2267 ph10 579 break;
2268    
2269 nigel 87 #ifdef JFRIEDL_DEBUG
2270     case 'S':
2271     S_arg = 0;
2272     break;
2273     #endif
2274 nigel 77 }
2275 nigel 87 continue;
2276     }
2277 nigel 77
2278 nigel 87 /* Otherwise, find the data string for the option. */
2279    
2280     if (*option_data == 0)
2281     {
2282     if (i >= argc - 1 || longopwasequals)
2283 nigel 77 {
2284 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2285 ph10 561 pcregrep_exit(usage(2));
2286 nigel 87 }
2287     option_data = argv[++i];
2288     }
2289    
2290     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2291     multiple times to create a list of patterns. */
2292    
2293     if (op->type == OP_PATLIST)
2294     {
2295     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2296     {
2297     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2298     MAX_PATTERN_COUNT);
2299     return 2;
2300     }
2301     patterns[cmd_pattern_count++] = option_data;
2302     }
2303    
2304     /* Otherwise, deal with single string or numeric data values. */
2305    
2306 ph10 584 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2307     op->type != OP_OP_NUMBER)
2308 nigel 87 {
2309     *((char **)op->dataptr) = option_data;
2310     }
2311 ph10 558
2312     /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2313     only for unpicking arguments, so just keep it simple. */
2314    
2315 nigel 87 else
2316     {
2317 ph10 561 unsigned long int n = 0;
2318 ph10 558 char *endptr = option_data;
2319     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2320     while (isdigit((unsigned char)(*endptr)))
2321     n = n * 10 + (int)(*endptr++ - '0');
2322 ph10 644 if (toupper(*endptr) == 'K')
2323     {
2324 ph10 654 n *= 1024;
2325     endptr++;
2326     }
2327 ph10 644 else if (toupper(*endptr) == 'M')
2328     {
2329 ph10 654 n *= 1024*1024;
2330     endptr++;
2331     }
2332 nigel 87 if (*endptr != 0)
2333     {
2334     if (longop)
2335 nigel 77 {
2336 nigel 87 char *equals = strchr(op->long_name, '=');
2337     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2338 ph10 530 (int)(equals - op->long_name);
2339 nigel 87 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2340     option_data, nlen, op->long_name);
2341 nigel 77 }
2342 nigel 87 else
2343     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2344     option_data, op->one_char);
2345 ph10 561 pcregrep_exit(usage(2));
2346 nigel 77 }
2347 ph10 584 if (op->type == OP_LONGNUMBER)
2348     *((unsigned long int *)op->dataptr) = n;
2349     else
2350     *((int *)op->dataptr) = n;
2351 nigel 77 }
2352 nigel 49 }
2353    
2354 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2355     for -A and -B. */
2356    
2357     if (both_context > 0)
2358     {
2359     if (after_context == 0) after_context = both_context;
2360     if (before_context == 0) before_context = both_context;
2361     }
2362 ph10 286
2363     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2364 ph10 565 However, the latter two set only_matching. */
2365 nigel 77
2366 ph10 565 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2367 ph10 286 (file_offsets && line_offsets))
2368 ph10 280 {
2369     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2370     "and/or --line-offsets\n");
2371 ph10 561 pcregrep_exit(usage(2));
2372 ph10 280 }
2373    
2374 ph10 565 if (file_offsets || line_offsets) only_matching = 0;
2375 ph10 286
2376 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2377     LC_ALL environment variable is set, and if so, use it. */
2378 nigel 49
2379 nigel 87 if (locale == NULL)
2380 nigel 53 {
2381 nigel 87 locale = getenv("LC_ALL");
2382     locale_from = "LCC_ALL";
2383 nigel 53 }
2384 nigel 49
2385 nigel 87 if (locale == NULL)
2386     {
2387     locale = getenv("LC_CTYPE");
2388     locale_from = "LC_CTYPE";
2389     }
2390 nigel 49
2391 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2392     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2393    
2394     if (locale != NULL)
2395 nigel 49 {
2396 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2397 nigel 53 {
2398 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2399     locale, locale_from);
2400 nigel 53 return 2;
2401     }
2402 nigel 87 pcretables = pcre_maketables();
2403     }
2404 nigel 77
2405 nigel 87 /* Sort out colouring */
2406    
2407     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2408     {
2409     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2410     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2411     else
2412 nigel 53 {
2413 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2414     colour_option);
2415     return 2;
2416 nigel 77 }
2417 nigel 87 if (do_colour)
2418 nigel 77 {
2419 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2420     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2421     if (cs != NULL) colour_string = cs;
2422 nigel 77 }
2423 nigel 87 }
2424 ph10 535
2425 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2426    
2427     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2428     {
2429     pcre_options |= PCRE_NEWLINE_CR;
2430 nigel 93 endlinetype = EL_CR;
2431 nigel 91 }
2432     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2433     {
2434     pcre_options |= PCRE_NEWLINE_LF;
2435 nigel 93 endlinetype = EL_LF;
2436 nigel 91 }
2437     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2438     {
2439     pcre_options |= PCRE_NEWLINE_CRLF;
2440 nigel 93 endlinetype = EL_CRLF;
2441 nigel 91 }
2442 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2443     {
2444     pcre_options |= PCRE_NEWLINE_ANY;
2445     endlinetype = EL_ANY;
2446     }
2447 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2448     {
2449     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2450     endlinetype = EL_ANYCRLF;
2451     }
2452 nigel 91 else
2453     {
2454     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2455     return 2;
2456     }
2457    
2458 nigel 87 /* Interpret the text values for -d and -D */
2459    
2460     if (dee_option != NULL)
2461     {
2462     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2463     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2464     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2465     else
2466 nigel 77 {
2467 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2468     return 2;
2469 nigel 53 }
2470 nigel 49 }
2471    
2472 nigel 87 if (DEE_option != NULL)
2473     {
2474     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2475     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2476     else
2477     {
2478     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2479     return 2;
2480     }
2481     }
2482 nigel 49
2483 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2484 nigel 87
2485     #ifdef JFRIEDL_DEBUG
2486     if (S_arg > 9)
2487 nigel 49 {
2488 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2489     return 2;
2490     }
2491 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2492     {
2493     if (jfriedl_XT == 0) jfriedl_XT = 1;
2494     if (jfriedl_XR == 0) jfriedl_XR = 1;
2495     }
2496 nigel 87 #endif
2497 nigel 77
2498 ph10 644 /* Get memory for the main buffer, and to store the pattern and hints lists. */
2499 nigel 87
2500 ph10 644 bufsize = 3*bufthird;
2501     main_buffer = (char *)malloc(bufsize);
2502 nigel 87 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2503     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2504    
2505 ph10 644 if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2506 nigel 87 {
2507     fprintf(stderr, "pcregrep: malloc failed\n");
2508 ph10 123 goto EXIT2;
2509 nigel 87 }
2510    
2511     /* If no patterns were provided by -e, and there is no file provided by -f,
2512     the first argument is the one and only pattern, and it must exist. */
2513    
2514     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2515     {
2516 nigel 63 if (i >= argc) return usage(2);
2517 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2518     }
2519 nigel 77
2520 nigel 87 /* Compile the patterns that were provided on the command line, either by
2521     multiple uses of -e or as a single unkeyed pattern. */
2522    
2523     for (j = 0; j < cmd_pattern_count; j++)
2524     {
2525     if (!compile_pattern(patterns[j], pcre_options, NULL,
2526     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2527 ph10 123 goto EXIT2;
2528 nigel 87 }
2529    
2530     /* Compile the regular expressions that are provided in a file. */
2531    
2532     if (pattern_filename != NULL)
2533     {
2534     int linenumber = 0;
2535     FILE *f;
2536     char *filename;
2537 ph10 644 char buffer[PATBUFSIZE];
2538 nigel 87
2539     if (strcmp(pattern_filename, "-") == 0)
2540 nigel 77 {
2541 nigel 87 f = stdin;
2542     filename = stdin_name;
2543 nigel 77 }
2544 nigel 87 else
2545 nigel 77 {
2546 nigel 87 f = fopen(pattern_filename, "r");
2547     if (f == NULL)
2548     {
2549     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2550     strerror(errno));
2551 ph10 123 goto EXIT2;
2552 nigel 87 }
2553     filename = pattern_filename;
2554 nigel 77 }
2555    
2556 ph10 644 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2557 nigel 53 {
2558 nigel 87 char *s = buffer + (int)strlen(buffer);
2559     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2560     *s = 0;
2561     linenumber++;
2562     if (buffer[0] == 0) continue; /* Skip blank lines */
2563     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2564 ph10 121 goto EXIT2;
2565 nigel 53 }
2566 nigel 87
2567     if (f != stdin) fclose(f);
2568 nigel 49 }
2569    
2570 nigel 77 /* Study the regular expressions, as we will be running them many times */
2571 nigel 53
2572     for (j = 0; j < pattern_count; j++)
2573     {
2574     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2575     if (error != NULL)
2576     {
2577     char s[16];
2578     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2579     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2580 ph10 121 goto EXIT2;
2581 nigel 53 }
2582 ph10 142 hint_count++;
2583 nigel 53 }
2584 ph10 579
2585 ph10 561 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2586     pcre_extra block for each pattern. */
2587 nigel 53
2588 ph10 561 if (match_limit > 0 || match_limit_recursion > 0)
2589     {
2590     for (j = 0; j < pattern_count; j++)
2591     {
2592     if (hints_list[j] == NULL)
2593     {
2594     hints_list[j] = malloc(sizeof(pcre_extra));
2595 ph10 579 if (hints_list[j] == NULL)
2596 ph10 561 {
2597     fprintf(stderr, "pcregrep: malloc failed\n");
2598     pcregrep_exit(2);
2599     }
2600     }
2601     if (match_limit > 0)
2602 ph10 579 {
2603 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2604     hints_list[j]->match_limit = match_limit;
2605 ph10 579 }
2606 ph10 561 if (match_limit_recursion > 0)
2607 ph10 579 {
2608 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2609     hints_list[j]->match_limit_recursion = match_limit_recursion;
2610 ph10 579 }
2611 ph10 561 }
2612 ph10 579 }
2613 ph10 561
2614 nigel 77 /* If there are include or exclude patterns, compile them. */
2615    
2616     if (exclude_pattern != NULL)
2617     {
2618 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2619     pcretables);
2620 nigel 77 if (exclude_compiled == NULL)
2621     {
2622     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2623     errptr, error);
2624 ph10 121 goto EXIT2;
2625 nigel 77 }
2626     }
2627    
2628     if (include_pattern != NULL)
2629     {
2630 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2631     pcretables);
2632 nigel 77 if (include_compiled == NULL)
2633     {
2634     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2635     errptr, error);
2636 ph10 121 goto EXIT2;
2637 nigel 77 }
2638     }
2639    
2640 ph10 325 if (exclude_dir_pattern != NULL)
2641     {
2642     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2643     pcretables);
2644     if (exclude_dir_compiled == NULL)
2645     {
2646     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2647     errptr, error);
2648     goto EXIT2;
2649     }
2650     }
2651    
2652     if (include_dir_pattern != NULL)
2653     {
2654     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2655     pcretables);
2656     if (include_dir_compiled == NULL)
2657     {
2658     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2659     errptr, error);
2660     goto EXIT2;
2661     }
2662     }
2663    
2664 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2665 nigel 49
2666 nigel 87 if (i >= argc)
2667 ph10 121 {
2668 ph10 654 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2669 ph10 644 (filenames > FN_DEFAULT)? stdin_name : NULL);
2670 ph10 121 goto EXIT;
2671 ph10 123 }
2672 nigel 49
2673 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2674     Pass in the fact that there is only one argument at top level - this suppresses
2675 nigel 87 the file name if the argument is not a directory and filenames are not
2676     otherwise forced. */
2677 nigel 49
2678 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2679 nigel 49
2680     for (; i < argc; i++)
2681     {
2682 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2683     only_one_at_top);
2684 nigel 77 if (frc > 1) rc = frc;
2685     else if (frc == 0 && rc == 1) rc = 0;
2686 nigel 49 }
2687    
2688 ph10 121 EXIT:
2689 ph10 644 if (main_buffer != NULL) free(main_buffer);
2690 ph10 121 if (pattern_list != NULL)
2691     {
2692 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2693 ph10 121 free(pattern_list);
2694 ph10 123 }
2695 ph10 121 if (hints_list != NULL)
2696     {
2697 ph10 579 for (i = 0; i < hint_count; i++)
2698 ph10 561 {
2699     if (hints_list[i] != NULL) free(hints_list[i]);
2700 ph10 579 }
2701 ph10 121 free(hints_list);
2702 ph10 123 }
2703 ph10 561 pcregrep_exit(rc);
2704 ph10 121
2705     EXIT2:
2706     rc = 2;
2707     goto EXIT;
2708 nigel 49 }
2709    
2710 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12