/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 718 - (hide annotations) (download)
Wed Oct 5 16:11:19 2011 UTC (2 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 79910 byte(s)
Add some casts to get rid of compiler warnings from MSVC8.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 584 Copyright (c) 1997-2011 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 ph10 378 #define OFFSET_SIZE 99
75 nigel 49
76 nigel 77 #if BUFSIZ > 8192
77 ph10 644 #define PATBUFSIZE BUFSIZ
78 nigel 77 #else
79 ph10 644 #define PATBUFSIZE 8192
80 nigel 77 #endif
81 nigel 49
82 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
83     output. The order is important; it is assumed that a file name is wanted for
84     all values greater than FN_DEFAULT. */
85 nigel 77
86 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87 nigel 87
88 ph10 286 /* File reading styles */
89    
90     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92 nigel 87 /* Actions for the -d and -D options */
93    
94     enum { dee_READ, dee_SKIP, dee_RECURSE };
95     enum { DEE_READ, DEE_SKIP };
96    
97     /* Actions for special processing options (flag bits) */
98    
99     #define PO_WORD_MATCH 0x0001
100     #define PO_LINE_MATCH 0x0002
101     #define PO_FIXED_STRINGS 0x0004
102    
103 nigel 93 /* Line ending types */
104 nigel 87
105 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 nigel 87
107 ph10 535 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108     environments), a warning is issued if the value of fwrite() is ignored.
109     Unfortunately, casting to (void) does not suppress the warning. To get round
110     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 ph10 515 apply to fprintf(). */
112 nigel 93
113 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114 nigel 93
115 ph10 515
116    
117 nigel 49 /*************************************************
118     * Global variables *
119     *************************************************/
120    
121 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
122     regular code. */
123    
124     #ifdef JFRIEDL_DEBUG
125     static int S_arg = -1;
126 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128     static const char *jfriedl_prefix = "";
129     static const char *jfriedl_postfix = "";
130 nigel 87 #endif
131    
132 nigel 93 static int endlinetype;
133 nigel 91
134 nigel 87 static char *colour_string = (char *)"1;31";
135     static char *colour_option = NULL;
136     static char *dee_option = NULL;
137     static char *DEE_option = NULL;
138 ph10 644 static char *main_buffer = NULL;
139 nigel 91 static char *newline = NULL;
140 nigel 53 static char *pattern_filename = NULL;
141 nigel 77 static char *stdin_name = (char *)"(standard input)";
142 nigel 87 static char *locale = NULL;
143    
144     static const unsigned char *pcretables = NULL;
145    
146 nigel 53 static int pattern_count = 0;
147 ph10 121 static pcre **pattern_list = NULL;
148     static pcre_extra **hints_list = NULL;
149 nigel 49
150 nigel 77 static char *include_pattern = NULL;
151     static char *exclude_pattern = NULL;
152 ph10 325 static char *include_dir_pattern = NULL;
153     static char *exclude_dir_pattern = NULL;
154 nigel 77
155     static pcre *include_compiled = NULL;
156     static pcre *exclude_compiled = NULL;
157 ph10 325 static pcre *include_dir_compiled = NULL;
158     static pcre *exclude_dir_compiled = NULL;
159 nigel 77
160     static int after_context = 0;
161     static int before_context = 0;
162     static int both_context = 0;
163 ph10 644 static int bufthird = PCREGREP_BUFSIZE;
164     static int bufsize = 3*PCREGREP_BUFSIZE;
165 nigel 87 static int dee_action = dee_READ;
166     static int DEE_action = DEE_READ;
167     static int error_count = 0;
168     static int filenames = FN_DEFAULT;
169 ph10 565 static int only_matching = -1;
170 nigel 87 static int process_options = 0;
171 ph10 685
172     #ifdef SUPPORT_PCREGREP_JIT
173     static int study_options = PCRE_STUDY_JIT_COMPILE;
174     #else
175 ph10 667 static int study_options = 0;
176 ph10 685 #endif
177 nigel 77
178 ph10 561 static unsigned long int match_limit = 0;
179     static unsigned long int match_limit_recursion = 0;
180    
181 nigel 49 static BOOL count_only = FALSE;
182 nigel 87 static BOOL do_colour = FALSE;
183 ph10 280 static BOOL file_offsets = FALSE;
184 nigel 77 static BOOL hyphenpending = FALSE;
185 nigel 49 static BOOL invert = FALSE;
186 ph10 519 static BOOL line_buffered = FALSE;
187 ph10 280 static BOOL line_offsets = FALSE;
188 nigel 77 static BOOL multiline = FALSE;
189 nigel 49 static BOOL number = FALSE;
190 ph10 420 static BOOL omit_zero_count = FALSE;
191 ph10 561 static BOOL resource_error = FALSE;
192 nigel 77 static BOOL quiet = FALSE;
193 nigel 49 static BOOL silent = FALSE;
194 nigel 93 static BOOL utf8 = FALSE;
195 nigel 49
196 nigel 53 /* Structure for options and list of them */
197 nigel 49
198 ph10 584 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
199     OP_OP_NUMBER, OP_PATLIST };
200 nigel 77
201 nigel 53 typedef struct option_item {
202 nigel 77 int type;
203 nigel 53 int one_char;
204 nigel 77 void *dataptr;
205 nigel 67 const char *long_name;
206     const char *help_text;
207 nigel 53 } option_item;
208 nigel 49
209 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
210     used to identify them. */
211    
212 ph10 325 #define N_COLOUR (-1)
213     #define N_EXCLUDE (-2)
214     #define N_EXCLUDE_DIR (-3)
215     #define N_HELP (-4)
216     #define N_INCLUDE (-5)
217     #define N_INCLUDE_DIR (-6)
218     #define N_LABEL (-7)
219     #define N_LOCALE (-8)
220     #define N_NULL (-9)
221     #define N_LOFFSETS (-10)
222     #define N_FOFFSETS (-11)
223 ph10 519 #define N_LBUFFER (-12)
224 ph10 561 #define N_M_LIMIT (-13)
225     #define N_M_LIMIT_REC (-14)
226 ph10 644 #define N_BUFSIZE (-15)
227 ph10 685 #define N_NOJIT (-16)
228 nigel 87
229 nigel 53 static option_item optionlist[] = {
230 ph10 584 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
231     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
232     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
233     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
234 ph10 644 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
235 ph10 584 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
236     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
237     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
238     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
239     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
240     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
241     { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
242     { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
243     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
244     { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
245     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
246     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
247     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
248 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
249     { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
250     #else
251     { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
252     #endif
253 ph10 584 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
254     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
255     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
256     { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
257     { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
258     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
259     { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
260     { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
261     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
262     { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
263     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
264     { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
265     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
266     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
267     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
268     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
269     { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
270     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
271 ph10 571
272     /* These two were accidentally implemented with underscores instead of
273     hyphens in the option names. As this was not discovered for several releases,
274     the incorrect versions are left in the table for compatibility. However, the
275     --help function misses out any option that has an underscore in its name. */
276 ph10 579
277 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
278     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
279 ph10 571
280 nigel 87 #ifdef JFRIEDL_DEBUG
281     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
282     #endif
283     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
284     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
285     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
286     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
287     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
288     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
289     { OP_NODATA, 0, NULL, NULL, NULL }
290 nigel 53 };
291    
292 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
293     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
294     that the combination of -w and -x has the same effect as -x on its own, so we
295     can treat them as the same. */
296 nigel 53
297 nigel 87 static const char *prefix[] = {
298     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
299    
300     static const char *suffix[] = {
301     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
302    
303 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
304 nigel 87
305 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
306 nigel 87
307 nigel 93 const char utf8_table4[] = {
308     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
309     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
310     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
311     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
312    
313    
314    
315 nigel 53 /*************************************************
316 ph10 586 * Exit from the program *
317     *************************************************/
318    
319     /* If there has been a resource error, give a suitable message.
320    
321     Argument: the return code
322     Returns: does not return
323     */
324    
325     static void
326     pcregrep_exit(int rc)
327     {
328     if (resource_error)
329     {
330 ph10 685 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
331     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
332     PCRE_ERROR_JIT_STACKLIMIT);
333 ph10 586 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
334     }
335    
336     exit(rc);
337     }
338    
339    
340     /*************************************************
341 nigel 87 * OS-specific functions *
342 nigel 53 *************************************************/
343    
344     /* These functions are defined so that they can be made system specific,
345 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
346 nigel 53
347    
348     /************* Directory scanning in Unix ***********/
349    
350 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
351 nigel 53 #include <sys/types.h>
352     #include <sys/stat.h>
353     #include <dirent.h>
354    
355     typedef DIR directory_type;
356    
357 nigel 67 static int
358 nigel 53 isdirectory(char *filename)
359     {
360     struct stat statbuf;
361     if (stat(filename, &statbuf) < 0)
362     return 0; /* In the expectation that opening as a file will fail */
363     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
364     }
365    
366 nigel 67 static directory_type *
367 nigel 53 opendirectory(char *filename)
368     {
369     return opendir(filename);
370     }
371    
372 nigel 67 static char *
373 nigel 53 readdirectory(directory_type *dir)
374     {
375     for (;;)
376     {
377     struct dirent *dent = readdir(dir);
378     if (dent == NULL) return NULL;
379     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
380     return dent->d_name;
381     }
382 ph10 151 /* Control never reaches here */
383 nigel 53 }
384    
385 nigel 67 static void
386 nigel 53 closedirectory(directory_type *dir)
387     {
388     closedir(dir);
389     }
390    
391    
392 nigel 87 /************* Test for regular file in Unix **********/
393    
394     static int
395     isregfile(char *filename)
396     {
397     struct stat statbuf;
398     if (stat(filename, &statbuf) < 0)
399     return 1; /* In the expectation that opening as a file will fail */
400     return (statbuf.st_mode & S_IFMT) == S_IFREG;
401     }
402    
403    
404 ph10 519 /************* Test for a terminal in Unix **********/
405 nigel 87
406     static BOOL
407     is_stdout_tty(void)
408     {
409     return isatty(fileno(stdout));
410     }
411    
412 ph10 519 static BOOL
413     is_file_tty(FILE *f)
414     {
415     return isatty(fileno(f));
416     }
417 nigel 87
418 ph10 519
419 nigel 63 /************* Directory scanning in Win32 ***********/
420 nigel 53
421 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
422 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
423 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
424     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
425 ph10 558 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
426     undefined when it is indeed undefined. */
427 nigel 53
428 ph10 558 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
429 nigel 63
430     #ifndef STRICT
431     # define STRICT
432     #endif
433     #ifndef WIN32_LEAN_AND_MEAN
434     # define WIN32_LEAN_AND_MEAN
435     #endif
436 ph10 283
437     #include <windows.h>
438    
439 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
440     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
441     #endif
442    
443 nigel 63 typedef struct directory_type
444     {
445     HANDLE handle;
446     BOOL first;
447     WIN32_FIND_DATA data;
448     } directory_type;
449    
450     int
451     isdirectory(char *filename)
452     {
453     DWORD attr = GetFileAttributes(filename);
454     if (attr == INVALID_FILE_ATTRIBUTES)
455     return 0;
456     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
457     }
458    
459     directory_type *
460     opendirectory(char *filename)
461     {
462     size_t len;
463     char *pattern;
464     directory_type *dir;
465     DWORD err;
466     len = strlen(filename);
467     pattern = (char *) malloc(len + 3);
468     dir = (directory_type *) malloc(sizeof(*dir));
469     if ((pattern == NULL) || (dir == NULL))
470     {
471     fprintf(stderr, "pcregrep: malloc failed\n");
472 ph10 561 pcregrep_exit(2);
473 nigel 63 }
474     memcpy(pattern, filename, len);
475     memcpy(&(pattern[len]), "\\*", 3);
476     dir->handle = FindFirstFile(pattern, &(dir->data));
477     if (dir->handle != INVALID_HANDLE_VALUE)
478     {
479     free(pattern);
480     dir->first = TRUE;
481     return dir;
482     }
483     err = GetLastError();
484     free(pattern);
485     free(dir);
486     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
487     return NULL;
488     }
489    
490     char *
491     readdirectory(directory_type *dir)
492     {
493     for (;;)
494     {
495     if (!dir->first)
496     {
497     if (!FindNextFile(dir->handle, &(dir->data)))
498     return NULL;
499     }
500     else
501     {
502     dir->first = FALSE;
503     }
504     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
505     return dir->data.cFileName;
506     }
507     #ifndef _MSC_VER
508     return NULL; /* Keep compiler happy; never executed */
509     #endif
510     }
511    
512     void
513     closedirectory(directory_type *dir)
514     {
515     FindClose(dir->handle);
516     free(dir);
517     }
518    
519    
520 nigel 87 /************* Test for regular file in Win32 **********/
521    
522     /* I don't know how to do this, or if it can be done; assume all paths are
523     regular if they are not directories. */
524    
525     int isregfile(char *filename)
526     {
527 ph10 283 return !isdirectory(filename);
528 nigel 87 }
529    
530    
531 ph10 519 /************* Test for a terminal in Win32 **********/
532 nigel 87
533     /* I don't know how to do this; assume never */
534    
535     static BOOL
536     is_stdout_tty(void)
537     {
538 ph10 283 return FALSE;
539 nigel 87 }
540    
541 ph10 519 static BOOL
542     is_file_tty(FILE *f)
543     {
544     return FALSE;
545     }
546 nigel 87
547 ph10 519
548 nigel 53 /************* Directory scanning when we can't do it ***********/
549    
550     /* The type is void, and apart from isdirectory(), the functions do nothing. */
551    
552 nigel 63 #else
553    
554 nigel 53 typedef void directory_type;
555    
556 nigel 87 int isdirectory(char *filename) { return 0; }
557 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
558     char *readdirectory(directory_type *dir) { return (char*)0;}
559 nigel 53 void closedirectory(directory_type *dir) {}
560    
561 nigel 87
562     /************* Test for regular when we can't do it **********/
563    
564     /* Assume all files are regular. */
565    
566     int isregfile(char *filename) { return 1; }
567    
568    
569 ph10 519 /************* Test for a terminal when we can't do it **********/
570 nigel 87
571     static BOOL
572     is_stdout_tty(void)
573     {
574     return FALSE;
575     }
576    
577 ph10 519 static BOOL
578     is_file_tty(FILE *f)
579     {
580     return FALSE;
581     }
582 nigel 87
583 nigel 53 #endif
584    
585    
586    
587 ph10 137 #ifndef HAVE_STRERROR
588 nigel 49 /*************************************************
589     * Provide strerror() for non-ANSI libraries *
590     *************************************************/
591    
592     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
593     in their libraries, but can provide the same facility by this simple
594     alternative function. */
595    
596     extern int sys_nerr;
597     extern char *sys_errlist[];
598    
599     char *
600     strerror(int n)
601     {
602     if (n < 0 || n >= sys_nerr) return "unknown error number";
603     return sys_errlist[n];
604     }
605     #endif /* HAVE_STRERROR */
606    
607    
608    
609     /*************************************************
610 ph10 519 * Read one line of input *
611     *************************************************/
612    
613 ph10 535 /* Normally, input is read using fread() into a large buffer, so many lines may
614     be read at once. However, doing this for tty input means that no output appears
615 ph10 519 until a lot of input has been typed. Instead, tty input is handled line by
616     line. We cannot use fgets() for this, because it does not stop at a binary
617 ph10 535 zero, and therefore there is no way of telling how many characters it has read,
618 ph10 519 because there may be binary zeros embedded in the data.
619    
620     Arguments:
621     buffer the buffer to read into
622     length the maximum number of characters to read
623     f the file
624 ph10 535
625 ph10 519 Returns: the number of characters read, zero at end of file
626 ph10 535 */
627 ph10 519
628     static int
629     read_one_line(char *buffer, int length, FILE *f)
630     {
631     int c;
632     int yield = 0;
633     while ((c = fgetc(f)) != EOF)
634     {
635     buffer[yield++] = c;
636 ph10 535 if (c == '\n' || yield >= length) break;
637     }
638     return yield;
639 ph10 519 }
640    
641    
642    
643     /*************************************************
644 nigel 93 * Find end of line *
645     *************************************************/
646    
647     /* The length of the endline sequence that is found is set via lenptr. This may
648     be zero at the very end of the file if there is no line-ending sequence there.
649    
650     Arguments:
651     p current position in line
652     endptr end of available data
653     lenptr where to put the length of the eol sequence
654    
655 ph10 654 Returns: pointer after the last byte of the line,
656 ph10 644 including the newline byte(s)
657 nigel 93 */
658    
659     static char *
660     end_of_line(char *p, char *endptr, int *lenptr)
661     {
662     switch(endlinetype)
663     {
664     default: /* Just in case */
665     case EL_LF:
666     while (p < endptr && *p != '\n') p++;
667     if (p < endptr)
668     {
669     *lenptr = 1;
670     return p + 1;
671     }
672     *lenptr = 0;
673     return endptr;
674    
675     case EL_CR:
676     while (p < endptr && *p != '\r') p++;
677     if (p < endptr)
678     {
679     *lenptr = 1;
680     return p + 1;
681     }
682     *lenptr = 0;
683     return endptr;
684    
685     case EL_CRLF:
686     for (;;)
687     {
688     while (p < endptr && *p != '\r') p++;
689     if (++p >= endptr)
690     {
691     *lenptr = 0;
692     return endptr;
693     }
694     if (*p == '\n')
695     {
696     *lenptr = 2;
697     return p + 1;
698     }
699     }
700     break;
701    
702 ph10 149 case EL_ANYCRLF:
703     while (p < endptr)
704     {
705     int extra = 0;
706     register int c = *((unsigned char *)p);
707    
708     if (utf8 && c >= 0xc0)
709     {
710     int gcii, gcss;
711     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
712     gcss = 6*extra;
713     c = (c & utf8_table3[extra]) << gcss;
714     for (gcii = 1; gcii <= extra; gcii++)
715     {
716     gcss -= 6;
717     c |= (p[gcii] & 0x3f) << gcss;
718     }
719     }
720    
721     p += 1 + extra;
722    
723     switch (c)
724     {
725     case 0x0a: /* LF */
726     *lenptr = 1;
727     return p;
728    
729     case 0x0d: /* CR */
730     if (p < endptr && *p == 0x0a)
731     {
732     *lenptr = 2;
733     p++;
734     }
735     else *lenptr = 1;
736     return p;
737 ph10 150
738 ph10 149 default:
739     break;
740     }
741     } /* End of loop for ANYCRLF case */
742 ph10 150
743 ph10 149 *lenptr = 0; /* Must have hit the end */
744     return endptr;
745    
746 nigel 93 case EL_ANY:
747     while (p < endptr)
748     {
749     int extra = 0;
750     register int c = *((unsigned char *)p);
751    
752     if (utf8 && c >= 0xc0)
753     {
754     int gcii, gcss;
755     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
756     gcss = 6*extra;
757     c = (c & utf8_table3[extra]) << gcss;
758     for (gcii = 1; gcii <= extra; gcii++)
759     {
760     gcss -= 6;
761     c |= (p[gcii] & 0x3f) << gcss;
762     }
763     }
764    
765     p += 1 + extra;
766    
767     switch (c)
768     {
769     case 0x0a: /* LF */
770     case 0x0b: /* VT */
771     case 0x0c: /* FF */
772     *lenptr = 1;
773     return p;
774    
775     case 0x0d: /* CR */
776     if (p < endptr && *p == 0x0a)
777     {
778     *lenptr = 2;
779     p++;
780     }
781     else *lenptr = 1;
782     return p;
783    
784     case 0x85: /* NEL */
785     *lenptr = utf8? 2 : 1;
786     return p;
787    
788     case 0x2028: /* LS */
789     case 0x2029: /* PS */
790     *lenptr = 3;
791     return p;
792    
793     default:
794     break;
795     }
796     } /* End of loop for ANY case */
797    
798     *lenptr = 0; /* Must have hit the end */
799     return endptr;
800     } /* End of overall switch */
801     }
802    
803    
804    
805     /*************************************************
806     * Find start of previous line *
807     *************************************************/
808    
809     /* This is called when looking back for before lines to print.
810    
811     Arguments:
812     p start of the subsequent line
813     startptr start of available data
814    
815     Returns: pointer to the start of the previous line
816     */
817    
818     static char *
819     previous_line(char *p, char *startptr)
820     {
821     switch(endlinetype)
822     {
823     default: /* Just in case */
824     case EL_LF:
825     p--;
826     while (p > startptr && p[-1] != '\n') p--;
827     return p;
828    
829     case EL_CR:
830     p--;
831     while (p > startptr && p[-1] != '\n') p--;
832     return p;
833    
834     case EL_CRLF:
835     for (;;)
836     {
837     p -= 2;
838     while (p > startptr && p[-1] != '\n') p--;
839     if (p <= startptr + 1 || p[-2] == '\r') return p;
840     }
841     return p; /* But control should never get here */
842    
843     case EL_ANY:
844 ph10 150 case EL_ANYCRLF:
845 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
846     if (utf8) while ((*p & 0xc0) == 0x80) p--;
847    
848     while (p > startptr)
849     {
850     register int c;
851     char *pp = p - 1;
852    
853     if (utf8)
854     {
855     int extra = 0;
856     while ((*pp & 0xc0) == 0x80) pp--;
857     c = *((unsigned char *)pp);
858     if (c >= 0xc0)
859     {
860     int gcii, gcss;
861     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
862     gcss = 6*extra;
863     c = (c & utf8_table3[extra]) << gcss;
864     for (gcii = 1; gcii <= extra; gcii++)
865     {
866     gcss -= 6;
867     c |= (pp[gcii] & 0x3f) << gcss;
868     }
869     }
870     }
871     else c = *((unsigned char *)pp);
872    
873 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
874 nigel 93 {
875     case 0x0a: /* LF */
876 ph10 149 case 0x0d: /* CR */
877     return p;
878 ph10 150
879 ph10 149 default:
880     break;
881 ph10 150 }
882 ph10 149
883     else switch (c)
884     {
885     case 0x0a: /* LF */
886 nigel 93 case 0x0b: /* VT */
887     case 0x0c: /* FF */
888     case 0x0d: /* CR */
889     case 0x85: /* NEL */
890     case 0x2028: /* LS */
891     case 0x2029: /* PS */
892     return p;
893    
894     default:
895     break;
896     }
897    
898     p = pp; /* Back one character */
899     } /* End of loop for ANY case */
900    
901     return startptr; /* Hit start of data */
902     } /* End of overall switch */
903     }
904    
905    
906    
907    
908    
909     /*************************************************
910 nigel 77 * Print the previous "after" lines *
911 nigel 49 *************************************************/
912    
913 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
914 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
915     that a binary zero does not terminate it.
916 nigel 77
917     Arguments:
918     lastmatchnumber the number of the last matching line, plus one
919     lastmatchrestart where we restarted after the last match
920     endptr end of available data
921     printname filename for printing
922    
923     Returns: nothing
924     */
925    
926     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
927     char *endptr, char *printname)
928     {
929     if (after_context > 0 && lastmatchnumber > 0)
930     {
931     int count = 0;
932     while (lastmatchrestart < endptr && count++ < after_context)
933     {
934 nigel 93 int ellength;
935 nigel 77 char *pp = lastmatchrestart;
936     if (printname != NULL) fprintf(stdout, "%s-", printname);
937     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
938 nigel 93 pp = end_of_line(pp, endptr, &ellength);
939 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
940 nigel 93 lastmatchrestart = pp;
941 nigel 77 }
942     hyphenpending = TRUE;
943     }
944     }
945    
946    
947    
948     /*************************************************
949 ph10 378 * Apply patterns to subject till one matches *
950     *************************************************/
951    
952 ph10 392 /* This function is called to run through all patterns, looking for a match. It
953     is used multiple times for the same subject when colouring is enabled, in order
954 ph10 378 to find all possible matches.
955    
956     Arguments:
957 ph10 632 matchptr the start of the subject
958     length the length of the subject to match
959     startoffset where to start matching
960     offsets the offets vector to fill in
961     mrc address of where to put the result of pcre_exec()
962 ph10 392
963     Returns: TRUE if there was a match
964 ph10 378 FALSE if there was no match
965     invert if there was a non-fatal error
966 ph10 392 */
967 ph10 378
968     static BOOL
969 ph10 654 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
970 ph10 632 int *mrc)
971 ph10 378 {
972     int i;
973 ph10 561 size_t slen = length;
974     const char *msg = "this text:\n\n";
975     if (slen > 200)
976     {
977     slen = 200;
978     msg = "text that starts:\n\n";
979 ph10 579 }
980 ph10 378 for (i = 0; i < pattern_count; i++)
981     {
982 ph10 632 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
983     startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
984 ph10 378 if (*mrc >= 0) return TRUE;
985     if (*mrc == PCRE_ERROR_NOMATCH) continue;
986 ph10 561 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
987 ph10 378 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
988 ph10 561 fprintf(stderr, "%s", msg);
989     FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
990     fprintf(stderr, "\n\n");
991 ph10 685 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
992     *mrc == PCRE_ERROR_JIT_STACKLIMIT)
993 ph10 561 resource_error = TRUE;
994 ph10 378 if (error_count++ > 20)
995     {
996 ph10 561 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
997     pcregrep_exit(2);
998 ph10 378 }
999     return invert; /* No more matching; don't show the line again */
1000     }
1001    
1002     return FALSE; /* No match, no errors */
1003     }
1004    
1005    
1006    
1007     /*************************************************
1008 nigel 77 * Grep an individual file *
1009     *************************************************/
1010    
1011     /* This is called from grep_or_recurse() below. It uses a buffer that is three
1012 ph10 644 times the value of bufthird. The matching point is never allowed to stray into
1013 nigel 77 the top third of the buffer, thus keeping more of the file available for
1014     context printing or for multiline scanning. For large files, the pointer will
1015     be in the middle third most of the time, so the bottom third is available for
1016     "before" context printing.
1017    
1018     Arguments:
1019 ph10 286 handle the fopened FILE stream for a normal file
1020     the gzFile pointer when reading is via libz
1021     the BZFILE pointer when reading is via libbz2
1022     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1023 ph10 644 filename the file name or NULL (for errors)
1024 nigel 77 printname the file name if it is to be printed for each match
1025     or NULL if the file name is not to be printed
1026     it cannot be NULL if filenames[_nomatch]_only is set
1027    
1028     Returns: 0 if there was at least one match
1029     1 otherwise (no matches)
1030 ph10 654 2 if an overlong line is encountered
1031 ph10 644 3 if there is a read error on a .bz2 file
1032 nigel 77 */
1033    
1034 nigel 49 static int
1035 ph10 644 pcregrep(void *handle, int frtype, char *filename, char *printname)
1036 nigel 49 {
1037     int rc = 1;
1038 nigel 77 int linenumber = 1;
1039     int lastmatchnumber = 0;
1040 nigel 49 int count = 0;
1041 ph10 280 int filepos = 0;
1042 ph10 378 int offsets[OFFSET_SIZE];
1043 nigel 77 char *lastmatchrestart = NULL;
1044 ph10 644 char *ptr = main_buffer;
1045 nigel 77 char *endptr;
1046     size_t bufflength;
1047     BOOL endhyphenpending = FALSE;
1048 ph10 519 BOOL input_line_buffered = line_buffered;
1049 ph10 286 FILE *in = NULL; /* Ensure initialized */
1050 nigel 49
1051 ph10 286 #ifdef SUPPORT_LIBZ
1052     gzFile ingz = NULL;
1053     #endif
1054 nigel 77
1055 ph10 286 #ifdef SUPPORT_LIBBZ2
1056     BZFILE *inbz2 = NULL;
1057     #endif
1058    
1059    
1060     /* Do the first read into the start of the buffer and set up the pointer to end
1061     of what we have. In the case of libz, a non-zipped .gz file will be read as a
1062     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1063     fail. */
1064    
1065     #ifdef SUPPORT_LIBZ
1066     if (frtype == FR_LIBZ)
1067     {
1068     ingz = (gzFile)handle;
1069 ph10 644 bufflength = gzread (ingz, main_buffer, bufsize);
1070 ph10 286 }
1071     else
1072     #endif
1073    
1074     #ifdef SUPPORT_LIBBZ2
1075     if (frtype == FR_LIBBZ2)
1076     {
1077     inbz2 = (BZFILE *)handle;
1078 ph10 644 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1079 ph10 286 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1080     } /* without the cast it is unsigned. */
1081     else
1082     #endif
1083    
1084     {
1085     in = (FILE *)handle;
1086 ph10 519 if (is_file_tty(in)) input_line_buffered = TRUE;
1087 ph10 535 bufflength = input_line_buffered?
1088 ph10 644 read_one_line(main_buffer, bufsize, in) :
1089     fread(main_buffer, 1, bufsize, in);
1090 ph10 286 }
1091 ph10 535
1092 ph10 644 endptr = main_buffer + bufflength;
1093 nigel 77
1094     /* Loop while the current pointer is not at the end of the file. For large
1095     files, endptr will be at the end of the buffer when we are in the middle of the
1096     file, but ptr will never get there, because as soon as it gets over 2/3 of the
1097     way, the buffer is shifted left and re-filled. */
1098    
1099     while (ptr < endptr)
1100 nigel 49 {
1101 ph10 378 int endlinelength;
1102 nigel 87 int mrc = 0;
1103 ph10 654 int startoffset = 0;
1104 ph10 378 BOOL match;
1105 ph10 286 char *matchptr = ptr;
1106 nigel 77 char *t = ptr;
1107     size_t length, linelength;
1108 nigel 49
1109 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
1110     of the subject string to pass to pcre_exec(). In multiline mode, it is the
1111     length remainder of the data in the buffer. Otherwise, it is the length of
1112 ph10 378 the next line, excluding the terminating newline. After matching, we always
1113     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1114     option is used for compiling, so that any match is constrained to be in the
1115     first line. */
1116 nigel 77
1117 nigel 93 t = end_of_line(t, endptr, &endlinelength);
1118     linelength = t - ptr - endlinelength;
1119 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
1120 ph10 654
1121     /* Check to see if the line we are looking at extends right to the very end
1122     of the buffer without a line terminator. This means the line is too long to
1123 ph10 644 handle. */
1124 ph10 654
1125 ph10 644 if (endlinelength == 0 && t == main_buffer + bufsize)
1126     {
1127     fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1128 ph10 646 "pcregrep: check the --buffer-size option\n",
1129 ph10 654 linenumber,
1130 ph10 644 (filename == NULL)? "" : " of file ",
1131     (filename == NULL)? "" : filename);
1132     return 2;
1133 ph10 654 }
1134 nigel 77
1135 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
1136    
1137     #ifdef JFRIEDL_DEBUG
1138     if (jfriedl_XT || jfriedl_XR)
1139     {
1140     #include <sys/time.h>
1141     #include <time.h>
1142     struct timeval start_time, end_time;
1143     struct timezone dummy;
1144 ph10 392 int i;
1145 nigel 89
1146     if (jfriedl_XT)
1147     {
1148     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1149     const char *orig = ptr;
1150     ptr = malloc(newlen + 1);
1151     if (!ptr) {
1152     printf("out of memory");
1153 ph10 561 pcregrep_exit(2);
1154 nigel 89 }
1155     endptr = ptr;
1156     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1157     for (i = 0; i < jfriedl_XT; i++) {
1158     strncpy(endptr, orig, length);
1159     endptr += length;
1160     }
1161     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1162     length = newlen;
1163     }
1164    
1165     if (gettimeofday(&start_time, &dummy) != 0)
1166     perror("bad gettimeofday");
1167    
1168    
1169     for (i = 0; i < jfriedl_XR; i++)
1170 ph10 392 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1171 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1172 nigel 89
1173     if (gettimeofday(&end_time, &dummy) != 0)
1174     perror("bad gettimeofday");
1175    
1176     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1177     -
1178     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1179    
1180     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1181     return 0;
1182     }
1183     #endif
1184    
1185 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
1186 ph10 279 in order to find any further matches in the same line. */
1187 nigel 89
1188 ph10 286 ONLY_MATCHING_RESTART:
1189    
1190 ph10 392 /* Run through all the patterns until one matches or there is an error other
1191 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1192     finding subsequent matches when colouring matched lines. */
1193 ph10 392
1194 ph10 632 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1195 nigel 77
1196 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1197 nigel 77
1198 nigel 49 if (match != invert)
1199     {
1200 nigel 77 BOOL hyphenprinted = FALSE;
1201    
1202 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1203 nigel 77
1204 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1205    
1206     /* Just count if just counting is wanted. */
1207    
1208 nigel 49 if (count_only) count++;
1209    
1210 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1211     in the file. */
1212    
1213 ph10 420 else if (filenames == FN_MATCH_ONLY)
1214 nigel 49 {
1215 nigel 77 fprintf(stdout, "%s\n", printname);
1216 nigel 49 return 0;
1217     }
1218    
1219 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1220    
1221 nigel 77 else if (quiet) return 0;
1222 nigel 49
1223 ph10 579 /* The --only-matching option prints just the substring that matched, or a
1224 ph10 565 captured portion of it, as long as this string is not empty, and the
1225     --file-offsets and --line-offsets options output offsets for the matching
1226     substring (they both force --only-matching = 0). None of these options
1227 ph10 636 prints any context. Afterwards, adjust the start and then jump back to look
1228     for further matches in the same line. If we are in invert mode, however,
1229     nothing is printed and we do not restart - this could still be useful
1230     because the return code is set. */
1231 nigel 87
1232 ph10 565 else if (only_matching >= 0)
1233 nigel 87 {
1234 ph10 279 if (!invert)
1235 ph10 286 {
1236 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1237     if (number) fprintf(stdout, "%d:", linenumber);
1238 ph10 280 if (line_offsets)
1239 ph10 565 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1240 ph10 286 offsets[1] - offsets[0]);
1241 ph10 280 else if (file_offsets)
1242 ph10 579 fprintf(stdout, "%d,%d\n",
1243 ph10 565 (int)(filepos + matchptr + offsets[0] - ptr),
1244 ph10 286 offsets[1] - offsets[0]);
1245 ph10 565 else if (only_matching < mrc)
1246 ph10 377 {
1247 ph10 565 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1248     if (plen > 0)
1249 ph10 579 {
1250 ph10 565 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1251     FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1252     if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1253     fprintf(stdout, "\n");
1254 ph10 579 }
1255 ph10 392 }
1256 ph10 565 else if (printname != NULL || number) fprintf(stdout, "\n");
1257 ph10 286 match = FALSE;
1258 ph10 564 if (line_buffered) fflush(stdout);
1259 ph10 636 rc = 0; /* Had some success */
1260     startoffset = offsets[1]; /* Restart after the match */
1261 ph10 286 goto ONLY_MATCHING_RESTART;
1262     }
1263 nigel 87 }
1264    
1265     /* This is the default case when none of the above options is set. We print
1266     the matching lines(s), possibly preceded and/or followed by other lines of
1267     context. */
1268    
1269 nigel 49 else
1270     {
1271 nigel 77 /* See if there is a requirement to print some "after" lines from a
1272     previous match. We never print any overlaps. */
1273    
1274     if (after_context > 0 && lastmatchnumber > 0)
1275     {
1276 nigel 93 int ellength;
1277 nigel 77 int linecount = 0;
1278     char *p = lastmatchrestart;
1279    
1280     while (p < ptr && linecount < after_context)
1281     {
1282 nigel 93 p = end_of_line(p, ptr, &ellength);
1283 nigel 77 linecount++;
1284     }
1285    
1286     /* It is important to advance lastmatchrestart during this printing so
1287 nigel 87 that it interacts correctly with any "before" printing below. Print
1288     each line's data using fwrite() in case there are binary zeroes. */
1289 nigel 77
1290     while (lastmatchrestart < p)
1291     {
1292     char *pp = lastmatchrestart;
1293     if (printname != NULL) fprintf(stdout, "%s-", printname);
1294     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1295 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1296 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1297 nigel 93 lastmatchrestart = pp;
1298 nigel 77 }
1299     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1300     }
1301    
1302     /* If there were non-contiguous lines printed above, insert hyphens. */
1303    
1304     if (hyphenpending)
1305     {
1306     fprintf(stdout, "--\n");
1307     hyphenpending = FALSE;
1308     hyphenprinted = TRUE;
1309     }
1310    
1311     /* See if there is a requirement to print some "before" lines for this
1312     match. Again, don't print overlaps. */
1313    
1314     if (before_context > 0)
1315     {
1316     int linecount = 0;
1317     char *p = ptr;
1318    
1319 ph10 644 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1320 nigel 87 linecount < before_context)
1321 nigel 77 {
1322 nigel 87 linecount++;
1323 ph10 644 p = previous_line(p, main_buffer);
1324 nigel 77 }
1325    
1326     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1327     fprintf(stdout, "--\n");
1328    
1329     while (p < ptr)
1330     {
1331 nigel 93 int ellength;
1332 nigel 77 char *pp = p;
1333     if (printname != NULL) fprintf(stdout, "%s-", printname);
1334     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1335 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1336 ph10 515 FWRITE(p, 1, pp - p, stdout);
1337 nigel 93 p = pp;
1338 nigel 77 }
1339     }
1340    
1341     /* Now print the matching line(s); ensure we set hyphenpending at the end
1342 nigel 85 of the file if any context lines are being output. */
1343 nigel 77
1344 nigel 85 if (after_context > 0 || before_context > 0)
1345     endhyphenpending = TRUE;
1346    
1347 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1348 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1349 nigel 77
1350     /* In multiline mode, we want to print to the end of the line in which
1351     the end of the matched string is found, so we adjust linelength and the
1352 ph10 222 line number appropriately, but only when there actually was a match
1353     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1354     the match will always be before the first newline sequence. */
1355 nigel 77
1356 ph10 587 if (multiline & !invert)
1357 nigel 77 {
1358 ph10 587 char *endmatch = ptr + offsets[1];
1359     t = ptr;
1360     while (t < endmatch)
1361 nigel 93 {
1362 ph10 587 t = end_of_line(t, endptr, &endlinelength);
1363     if (t < endmatch) linenumber++; else break;
1364 nigel 93 }
1365 ph10 587 linelength = t - ptr - endlinelength;
1366 nigel 77 }
1367    
1368 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1369     zeroes are treated as just another data character. */
1370    
1371     /* This extra option, for Jeffrey Friedl's debugging requirements,
1372     replaces the matched string, or a specific captured string if it exists,
1373     with X. When this happens, colouring is ignored. */
1374    
1375     #ifdef JFRIEDL_DEBUG
1376     if (S_arg >= 0 && S_arg < mrc)
1377     {
1378     int first = S_arg * 2;
1379     int last = first + 1;
1380 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1381 nigel 87 fprintf(stdout, "X");
1382 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1383 nigel 87 }
1384     else
1385     #endif
1386    
1387 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1388 ph10 585 matches, but not of course if the line is a non-match. */
1389 ph10 589
1390 ph10 585 if (do_colour && !invert)
1391 nigel 87 {
1392 ph10 589 int plength;
1393 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1394 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1395 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1396 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1397 ph10 378 for (;;)
1398     {
1399 ph10 632 startoffset = offsets[1];
1400 ph10 718 if (startoffset >= (int)linelength + endlinelength ||
1401 ph10 654 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1402 ph10 632 break;
1403     FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1404 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1405 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1406 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1407     }
1408 ph10 587
1409     /* In multiline mode, we may have already printed the complete line
1410 ph10 589 and its line-ending characters (if they matched the pattern), so there
1411 ph10 587 may be no more to print. */
1412 ph10 589
1413 ph10 636 plength = (linelength + endlinelength) - startoffset;
1414     if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1415 nigel 87 }
1416 ph10 392
1417 ph10 378 /* Not colouring; no need to search for further matches */
1418 ph10 392
1419 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1420 nigel 49 }
1421    
1422 ph10 519 /* End of doing what has to be done for a match. If --line-buffered was
1423     given, flush the output. */
1424 nigel 87
1425 ph10 519 if (line_buffered) fflush(stdout);
1426 nigel 77 rc = 0; /* Had some success */
1427    
1428     /* Remember where the last match happened for after_context. We remember
1429     where we are about to restart, and that line's number. */
1430    
1431 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1432 nigel 77 lastmatchnumber = linenumber + 1;
1433 nigel 49 }
1434 nigel 77
1435 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1436     anything to be printed), we have to move on to the end of the match before
1437     proceeding. */
1438    
1439     if (multiline && invert && match)
1440     {
1441     int ellength;
1442     char *endmatch = ptr + offsets[1];
1443     t = ptr;
1444     while (t < endmatch)
1445     {
1446     t = end_of_line(t, endptr, &ellength);
1447     if (t <= endmatch) linenumber++; else break;
1448     }
1449     endmatch = end_of_line(endmatch, endptr, &ellength);
1450     linelength = endmatch - ptr - ellength;
1451     }
1452    
1453 ph10 286 /* Advance to after the newline and increment the line number. The file
1454 ph10 280 offset to the current line is maintained in filepos. */
1455 nigel 77
1456 nigel 93 ptr += linelength + endlinelength;
1457 ph10 530 filepos += (int)(linelength + endlinelength);
1458 nigel 77 linenumber++;
1459 ph10 535
1460     /* If input is line buffered, and the buffer is not yet full, read another
1461 ph10 519 line and add it into the buffer. */
1462 ph10 535
1463 ph10 718 if (input_line_buffered && bufflength < (size_t)bufsize)
1464 ph10 519 {
1465 ph10 644 int add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
1466 ph10 519 bufflength += add;
1467 ph10 535 endptr += add;
1468     }
1469 nigel 77
1470     /* If we haven't yet reached the end of the file (the buffer is full), and
1471     the current point is in the top 1/3 of the buffer, slide the buffer down by
1472     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1473     about to be lost, print them. */
1474    
1475 ph10 718 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1476 nigel 77 {
1477     if (after_context > 0 &&
1478     lastmatchnumber > 0 &&
1479 ph10 644 lastmatchrestart < main_buffer + bufthird)
1480 nigel 77 {
1481     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1482     lastmatchnumber = 0;
1483     }
1484    
1485     /* Now do the shuffle */
1486    
1487 ph10 644 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1488     ptr -= bufthird;
1489 ph10 286
1490     #ifdef SUPPORT_LIBZ
1491     if (frtype == FR_LIBZ)
1492 ph10 644 bufflength = 2*bufthird +
1493     gzread (ingz, main_buffer + 2*bufthird, bufthird);
1494 ph10 286 else
1495     #endif
1496    
1497     #ifdef SUPPORT_LIBBZ2
1498     if (frtype == FR_LIBBZ2)
1499 ph10 644 bufflength = 2*bufthird +
1500     BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1501 ph10 286 else
1502     #endif
1503    
1504 ph10 644 bufflength = 2*bufthird +
1505 ph10 535 (input_line_buffered?
1506 ph10 644 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1507     fread(main_buffer + 2*bufthird, 1, bufthird, in));
1508     endptr = main_buffer + bufflength;
1509 nigel 77
1510     /* Adjust any last match point */
1511    
1512 ph10 644 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1513 nigel 77 }
1514     } /* Loop through the whole file */
1515    
1516     /* End of file; print final "after" lines if wanted; do_after_lines sets
1517     hyphenpending if it prints something. */
1518    
1519 ph10 565 if (only_matching < 0 && !count_only)
1520 nigel 87 {
1521     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1522     hyphenpending |= endhyphenpending;
1523     }
1524 nigel 77
1525     /* Print the file name if we are looking for those without matches and there
1526     were none. If we found a match, we won't have got this far. */
1527    
1528 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1529 nigel 77 {
1530     fprintf(stdout, "%s\n", printname);
1531     return 0;
1532 nigel 49 }
1533    
1534 nigel 77 /* Print the match count if wanted */
1535    
1536 nigel 49 if (count_only)
1537     {
1538 ph10 420 if (count > 0 || !omit_zero_count)
1539 ph10 461 {
1540     if (printname != NULL && filenames != FN_NONE)
1541 ph10 420 fprintf(stdout, "%s:", printname);
1542     fprintf(stdout, "%d\n", count);
1543 ph10 461 }
1544 nigel 49 }
1545    
1546     return rc;
1547     }
1548    
1549    
1550    
1551     /*************************************************
1552 nigel 53 * Grep a file or recurse into a directory *
1553     *************************************************/
1554    
1555 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1556     recursing; if it's a file, grep it.
1557    
1558     Arguments:
1559     pathname the path to investigate
1560 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1561 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1562    
1563     Returns: 0 if there was at least one match
1564     1 if there were no matches
1565     2 there was some kind of error
1566    
1567     However, file opening failures are suppressed if "silent" is set.
1568     */
1569    
1570 nigel 53 static int
1571 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1572 nigel 53 {
1573     int rc = 1;
1574     int sep;
1575 ph10 286 int frtype;
1576     int pathlen;
1577     void *handle;
1578     FILE *in = NULL; /* Ensure initialized */
1579 nigel 53
1580 ph10 286 #ifdef SUPPORT_LIBZ
1581     gzFile ingz = NULL;
1582     #endif
1583    
1584     #ifdef SUPPORT_LIBBZ2
1585     BZFILE *inbz2 = NULL;
1586     #endif
1587    
1588 nigel 77 /* If the file name is "-" we scan stdin */
1589 nigel 53
1590 nigel 77 if (strcmp(pathname, "-") == 0)
1591 nigel 53 {
1592 ph10 644 return pcregrep(stdin, FR_PLAIN, stdin_name,
1593 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1594 nigel 77 stdin_name : NULL);
1595     }
1596    
1597 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1598 ph10 325 each file and directory within it, subject to any include or exclude patterns
1599     that were set. The scanning code is localized so it can be made
1600     system-specific. */
1601 nigel 87
1602     if ((sep = isdirectory(pathname)) != 0)
1603 nigel 77 {
1604 nigel 87 if (dee_action == dee_SKIP) return 1;
1605     if (dee_action == dee_RECURSE)
1606 nigel 53 {
1607 nigel 87 char buffer[1024];
1608     char *nextfile;
1609     directory_type *dir = opendirectory(pathname);
1610 nigel 53
1611 nigel 87 if (dir == NULL)
1612     {
1613     if (!silent)
1614     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1615     strerror(errno));
1616     return 2;
1617     }
1618 nigel 77
1619 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1620     {
1621 ph10 324 int frc, nflen;
1622 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1623 ph10 530 nflen = (int)(strlen(nextfile));
1624 ph10 345
1625 ph10 325 if (isdirectory(buffer))
1626     {
1627     if (exclude_dir_compiled != NULL &&
1628     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1629     continue;
1630 ph10 345
1631 ph10 325 if (include_dir_compiled != NULL &&
1632     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1633     continue;
1634     }
1635 ph10 345 else
1636     {
1637 ph10 324 if (exclude_compiled != NULL &&
1638     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1639     continue;
1640 ph10 345
1641 ph10 324 if (include_compiled != NULL &&
1642     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1643     continue;
1644 ph10 345 }
1645 nigel 77
1646 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1647     if (frc > 1) rc = frc;
1648     else if (frc == 0 && rc == 1) rc = 0;
1649     }
1650    
1651     closedirectory(dir);
1652     return rc;
1653 nigel 53 }
1654     }
1655    
1656 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1657     been requested. */
1658 nigel 53
1659 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1660    
1661     /* Control reaches here if we have a regular file, or if we have a directory
1662     and recursion or skipping was not requested, or if we have anything else and
1663     skipping was not requested. The scan proceeds. If this is the first and only
1664     argument at top level, we don't show the file name, unless we are only showing
1665     the file name, or the filename was forced (-H). */
1666    
1667 ph10 530 pathlen = (int)(strlen(pathname));
1668 ph10 286
1669     /* Open using zlib if it is supported and the file name ends with .gz. */
1670    
1671     #ifdef SUPPORT_LIBZ
1672     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1673 nigel 53 {
1674 ph10 286 ingz = gzopen(pathname, "rb");
1675     if (ingz == NULL)
1676     {
1677     if (!silent)
1678     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1679     strerror(errno));
1680     return 2;
1681     }
1682     handle = (void *)ingz;
1683     frtype = FR_LIBZ;
1684     }
1685     else
1686     #endif
1687    
1688     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1689    
1690     #ifdef SUPPORT_LIBBZ2
1691     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1692     {
1693     inbz2 = BZ2_bzopen(pathname, "rb");
1694     handle = (void *)inbz2;
1695     frtype = FR_LIBBZ2;
1696     }
1697     else
1698     #endif
1699    
1700     /* Otherwise use plain fopen(). The label is so that we can come back here if
1701     an attempt to read a .bz2 file indicates that it really is a plain file. */
1702    
1703     #ifdef SUPPORT_LIBBZ2
1704     PLAIN_FILE:
1705     #endif
1706     {
1707 ph10 419 in = fopen(pathname, "rb");
1708 ph10 286 handle = (void *)in;
1709     frtype = FR_PLAIN;
1710     }
1711    
1712     /* All the opening methods return errno when they fail. */
1713    
1714     if (handle == NULL)
1715     {
1716 nigel 77 if (!silent)
1717     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1718     strerror(errno));
1719 nigel 53 return 2;
1720     }
1721    
1722 ph10 286 /* Now grep the file */
1723    
1724 ph10 644 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1725 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1726 nigel 77
1727 ph10 286 /* Close in an appropriate manner. */
1728    
1729     #ifdef SUPPORT_LIBZ
1730     if (frtype == FR_LIBZ)
1731     gzclose(ingz);
1732     else
1733     #endif
1734    
1735 ph10 644 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1736 ph10 286 read failed. If the error indicates that the file isn't in fact bzipped, try
1737     again as a normal file. */
1738    
1739     #ifdef SUPPORT_LIBBZ2
1740     if (frtype == FR_LIBBZ2)
1741     {
1742 ph10 644 if (rc == 3)
1743 ph10 286 {
1744     int errnum;
1745     const char *err = BZ2_bzerror(inbz2, &errnum);
1746     if (errnum == BZ_DATA_ERROR_MAGIC)
1747     {
1748     BZ2_bzclose(inbz2);
1749     goto PLAIN_FILE;
1750     }
1751     else if (!silent)
1752     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1753     pathname, err);
1754 ph10 654 rc = 2; /* The normal "something went wrong" code */
1755 ph10 286 }
1756     BZ2_bzclose(inbz2);
1757     }
1758     else
1759     #endif
1760    
1761     /* Normal file close */
1762    
1763 nigel 53 fclose(in);
1764 ph10 286
1765     /* Pass back the yield from pcregrep(). */
1766    
1767 nigel 53 return rc;
1768     }
1769    
1770    
1771    
1772    
1773     /*************************************************
1774 nigel 49 * Usage function *
1775     *************************************************/
1776    
1777     static int
1778     usage(int rc)
1779     {
1780 nigel 87 option_item *op;
1781     fprintf(stderr, "Usage: pcregrep [-");
1782     for (op = optionlist; op->one_char != 0; op++)
1783     {
1784     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1785     }
1786     fprintf(stderr, "] [long options] [pattern] [files]\n");
1787 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1788     "options.\n");
1789 nigel 49 return rc;
1790     }
1791    
1792    
1793    
1794    
1795     /*************************************************
1796 nigel 53 * Help function *
1797     *************************************************/
1798    
1799     static void
1800     help(void)
1801     {
1802     option_item *op;
1803    
1804 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1805 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1806 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1807 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1808    
1809     #ifdef SUPPORT_LIBZ
1810     printf("Files whose names end in .gz are read using zlib.\n");
1811     #endif
1812    
1813     #ifdef SUPPORT_LIBBZ2
1814     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1815     #endif
1816    
1817     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1818     printf("Other files and the standard input are read as plain files.\n\n");
1819     #else
1820     printf("All files are read as plain files, without any interpretation.\n\n");
1821     #endif
1822    
1823 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1824     printf("Options:\n");
1825    
1826     for (op = optionlist; op->one_char != 0; op++)
1827     {
1828     int n;
1829     char s[4];
1830 ph10 579
1831 ph10 571 /* Two options were accidentally implemented and documented with underscores
1832     instead of hyphens in their names, something that was not noticed for quite a
1833 ph10 579 few releases. When fixing this, I left the underscored versions in the list
1834     in case people were using them. However, we don't want to display them in the
1835     help data. There are no other options that contain underscores, and we do not
1836     expect ever to implement such options. Therefore, just omit any option that
1837 ph10 571 contains an underscore. */
1838 ph10 579
1839     if (strchr(op->long_name, '_') != NULL) continue;
1840    
1841 nigel 53 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1842 ph10 571 n = 31 - printf(" %s --%s", s, op->long_name);
1843 nigel 53 if (n < 1) n = 1;
1844 ph10 571 printf("%.*s%s\n", n, " ", op->help_text);
1845 nigel 53 }
1846    
1847 ph10 654 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1848 ph10 644 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1849     printf("When reading patterns from a file instead of using a command line option,\n");
1850 nigel 77 printf("trailing white space is removed and blank lines are ignored.\n");
1851 ph10 654 printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1852 ph10 644 MAX_PATTERN_COUNT, PATBUFSIZE);
1853 nigel 53
1854 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1855 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1856     }
1857    
1858    
1859    
1860    
1861     /*************************************************
1862 nigel 77 * Handle a single-letter, no data option *
1863 nigel 53 *************************************************/
1864    
1865     static int
1866     handle_option(int letter, int options)
1867     {
1868     switch(letter)
1869     {
1870 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1871 ph10 561 case N_HELP: help(); pcregrep_exit(0);
1872 ph10 685 case N_LBUFFER: line_buffered = TRUE; break;
1873 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1874 ph10 691 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1875 nigel 53 case 'c': count_only = TRUE; break;
1876 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1877     case 'H': filenames = FN_FORCE; break;
1878     case 'h': filenames = FN_NONE; break;
1879 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1880 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1881 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
1882 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1883 nigel 53 case 'n': number = TRUE; break;
1884 ph10 565 case 'o': only_matching = 0; break;
1885 nigel 77 case 'q': quiet = TRUE; break;
1886 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1887 nigel 53 case 's': silent = TRUE; break;
1888 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1889 nigel 53 case 'v': invert = TRUE; break;
1890 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1891     case 'x': process_options |= PO_LINE_MATCH; break;
1892 nigel 53
1893     case 'V':
1894 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1895 ph10 561 pcregrep_exit(0);
1896 nigel 53 break;
1897    
1898     default:
1899     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1900 ph10 561 pcregrep_exit(usage(2));
1901 nigel 53 }
1902    
1903     return options;
1904     }
1905    
1906    
1907    
1908    
1909     /*************************************************
1910 nigel 87 * Construct printed ordinal *
1911     *************************************************/
1912    
1913     /* This turns a number into "1st", "3rd", etc. */
1914    
1915     static char *
1916     ordin(int n)
1917     {
1918     static char buffer[8];
1919     char *p = buffer;
1920     sprintf(p, "%d", n);
1921     while (*p != 0) p++;
1922     switch (n%10)
1923     {
1924     case 1: strcpy(p, "st"); break;
1925     case 2: strcpy(p, "nd"); break;
1926     case 3: strcpy(p, "rd"); break;
1927     default: strcpy(p, "th"); break;
1928     }
1929     return buffer;
1930     }
1931    
1932    
1933    
1934     /*************************************************
1935     * Compile a single pattern *
1936     *************************************************/
1937    
1938     /* When the -F option has been used, this is called for each substring.
1939     Otherwise it's called for each supplied pattern.
1940    
1941     Arguments:
1942     pattern the pattern string
1943     options the PCRE options
1944     filename the file name, or NULL for a command-line pattern
1945     count 0 if this is the only command line pattern, or
1946     number of the command line pattern, or
1947     linenumber for a pattern from a file
1948    
1949     Returns: TRUE on success, FALSE after an error
1950     */
1951    
1952     static BOOL
1953     compile_single_pattern(char *pattern, int options, char *filename, int count)
1954     {
1955 ph10 644 char buffer[PATBUFSIZE];
1956 nigel 87 const char *error;
1957     int errptr;
1958    
1959     if (pattern_count >= MAX_PATTERN_COUNT)
1960     {
1961     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1962     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1963     return FALSE;
1964     }
1965    
1966 ph10 644 sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1967 nigel 87 suffix[process_options]);
1968     pattern_list[pattern_count] =
1969     pcre_compile(buffer, options, &error, &errptr, pcretables);
1970 ph10 142 if (pattern_list[pattern_count] != NULL)
1971 ph10 141 {
1972 ph10 142 pattern_count++;
1973 ph10 141 return TRUE;
1974 ph10 142 }
1975 nigel 87
1976     /* Handle compile errors */
1977    
1978     errptr -= (int)strlen(prefix[process_options]);
1979     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1980    
1981     if (filename == NULL)
1982     {
1983     if (count == 0)
1984     fprintf(stderr, "pcregrep: Error in command-line regex "
1985     "at offset %d: %s\n", errptr, error);
1986     else
1987     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1988     "at offset %d: %s\n", ordin(count), errptr, error);
1989     }
1990     else
1991     {
1992     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1993     "at offset %d: %s\n", count, filename, errptr, error);
1994     }
1995    
1996     return FALSE;
1997     }
1998    
1999    
2000    
2001     /*************************************************
2002     * Compile one supplied pattern *
2003     *************************************************/
2004    
2005     /* When the -F option has been used, each string may be a list of strings,
2006 nigel 91 separated by line breaks. They will be matched literally.
2007 nigel 87
2008     Arguments:
2009     pattern the pattern string
2010     options the PCRE options
2011     filename the file name, or NULL for a command-line pattern
2012     count 0 if this is the only command line pattern, or
2013     number of the command line pattern, or
2014     linenumber for a pattern from a file
2015    
2016     Returns: TRUE on success, FALSE after an error
2017     */
2018    
2019     static BOOL
2020     compile_pattern(char *pattern, int options, char *filename, int count)
2021     {
2022     if ((process_options & PO_FIXED_STRINGS) != 0)
2023     {
2024 nigel 93 char *eop = pattern + strlen(pattern);
2025 ph10 644 char buffer[PATBUFSIZE];
2026 nigel 87 for(;;)
2027     {
2028 nigel 93 int ellength;
2029     char *p = end_of_line(pattern, eop, &ellength);
2030     if (ellength == 0)
2031 nigel 87 return compile_single_pattern(pattern, options, filename, count);
2032 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2033 nigel 93 pattern = p;
2034 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
2035     return FALSE;
2036     }
2037     }
2038     else return compile_single_pattern(pattern, options, filename, count);
2039     }
2040    
2041    
2042    
2043     /*************************************************
2044 nigel 49 * Main program *
2045     *************************************************/
2046    
2047 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2048    
2049 nigel 49 int
2050     main(int argc, char **argv)
2051     {
2052 nigel 53 int i, j;
2053 nigel 49 int rc = 1;
2054 nigel 87 int pcre_options = 0;
2055     int cmd_pattern_count = 0;
2056 ph10 141 int hint_count = 0;
2057 nigel 49 int errptr;
2058 nigel 87 BOOL only_one_at_top;
2059     char *patterns[MAX_PATTERN_COUNT];
2060     const char *locale_from = "--locale";
2061 nigel 49 const char *error;
2062    
2063 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
2064     pcre_jit_stack *jit_stack = NULL;
2065     #endif
2066    
2067 nigel 93 /* Set the default line ending value from the default in the PCRE library;
2068     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2069 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
2070 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
2071 ph10 391 rather than escapes such as as '\r'. */
2072 nigel 91
2073     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2074     switch(i)
2075     {
2076 ph10 391 default: newline = (char *)"lf"; break;
2077     case 13: newline = (char *)"cr"; break;
2078     case (13 << 8) | 10: newline = (char *)"crlf"; break;
2079     case -1: newline = (char *)"any"; break;
2080     case -2: newline = (char *)"anycrlf"; break;
2081 nigel 91 }
2082    
2083 nigel 49 /* Process the options */
2084    
2085     for (i = 1; i < argc; i++)
2086     {
2087 nigel 77 option_item *op = NULL;
2088     char *option_data = (char *)""; /* default to keep compiler happy */
2089     BOOL longop;
2090     BOOL longopwasequals = FALSE;
2091    
2092 nigel 49 if (argv[i][0] != '-') break;
2093 nigel 53
2094 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2095 nigel 87 but only if we have previously had -e or -f to define the patterns. */
2096 nigel 63
2097 nigel 77 if (argv[i][1] == 0)
2098     {
2099 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
2100 ph10 561 else pcregrep_exit(usage(2));
2101 nigel 77 }
2102 nigel 63
2103 nigel 77 /* Handle a long name option, or -- to terminate the options */
2104 nigel 53
2105     if (argv[i][1] == '-')
2106 nigel 49 {
2107 nigel 77 char *arg = argv[i] + 2;
2108     char *argequals = strchr(arg, '=');
2109 nigel 53
2110 nigel 77 if (*arg == 0) /* -- terminates options */
2111 nigel 49 {
2112 nigel 77 i++;
2113     break; /* out of the options-handling loop */
2114 nigel 53 }
2115 nigel 49
2116 nigel 77 longop = TRUE;
2117    
2118     /* Some long options have data that follows after =, for example file=name.
2119     Some options have variations in the long name spelling: specifically, we
2120     allow "regexp" because GNU grep allows it, though I personally go along
2121 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2122 ph10 422 These options are entered in the table as "regex(p)". Options can be in
2123     both these categories. */
2124 nigel 77
2125 nigel 53 for (op = optionlist; op->one_char != 0; op++)
2126     {
2127 nigel 77 char *opbra = strchr(op->long_name, '(');
2128     char *equals = strchr(op->long_name, '=');
2129 ph10 461
2130 ph10 422 /* Handle options with only one spelling of the name */
2131 ph10 461
2132 ph10 422 if (opbra == NULL) /* Does not contain '(' */
2133 nigel 53 {
2134 nigel 77 if (equals == NULL) /* Not thing=data case */
2135     {
2136     if (strcmp(arg, op->long_name) == 0) break;
2137     }
2138     else /* Special case xxx=data */
2139     {
2140 ph10 530 int oplen = (int)(equals - op->long_name);
2141 ph10 535 int arglen = (argequals == NULL)?
2142 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2143 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2144     {
2145     option_data = arg + arglen;
2146     if (*option_data == '=')
2147     {
2148     option_data++;
2149     longopwasequals = TRUE;
2150     }
2151     break;
2152     }
2153     }
2154 nigel 53 }
2155 ph10 461
2156 ph10 422 /* Handle options with an alternate spelling of the name */
2157 ph10 461
2158     else
2159 nigel 77 {
2160     char buff1[24];
2161     char buff2[24];
2162 ph10 461
2163 ph10 530 int baselen = (int)(opbra - op->long_name);
2164     int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2165 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
2166 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2167 ph10 461
2168 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
2169 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2170 ph10 461
2171     if (strncmp(arg, buff1, arglen) == 0 ||
2172 ph10 422 strncmp(arg, buff2, arglen) == 0)
2173     {
2174     if (equals != NULL && argequals != NULL)
2175     {
2176 ph10 461 option_data = argequals;
2177 ph10 422 if (*option_data == '=')
2178     {
2179 ph10 461 option_data++;
2180 ph10 422 longopwasequals = TRUE;
2181 ph10 461 }
2182     }
2183 nigel 77 break;
2184 ph10 461 }
2185 nigel 77 }
2186 nigel 53 }
2187 nigel 77
2188 nigel 53 if (op->one_char == 0)
2189     {
2190     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2191 ph10 561 pcregrep_exit(usage(2));
2192 nigel 53 }
2193     }
2194 nigel 49
2195 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2196     are not in the right form for putting in the option table because they use
2197     only one hyphen, yet are more than one character long. By putting them
2198     separately here, they will not get displayed as part of the help() output,
2199     but I don't think Jeffrey will care about that. */
2200    
2201     #ifdef JFRIEDL_DEBUG
2202     else if (strcmp(argv[i], "-pre") == 0) {
2203     jfriedl_prefix = argv[++i];
2204     continue;
2205     } else if (strcmp(argv[i], "-post") == 0) {
2206     jfriedl_postfix = argv[++i];
2207     continue;
2208     } else if (strcmp(argv[i], "-XT") == 0) {
2209     sscanf(argv[++i], "%d", &jfriedl_XT);
2210     continue;
2211     } else if (strcmp(argv[i], "-XR") == 0) {
2212     sscanf(argv[++i], "%d", &jfriedl_XR);
2213     continue;
2214     }
2215     #endif
2216    
2217    
2218 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2219     continue till we hit the last one or one that needs data. */
2220 nigel 53
2221     else
2222     {
2223     char *s = argv[i] + 1;
2224 nigel 77 longop = FALSE;
2225 nigel 53 while (*s != 0)
2226     {
2227 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2228 ph10 579 {
2229     if (*s == op->one_char) break;
2230 ph10 565 }
2231 nigel 77 if (op->one_char == 0)
2232 nigel 53 {
2233 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2234     *s, argv[i]);
2235 ph10 561 pcregrep_exit(usage(2));
2236 nigel 77 }
2237 ph10 579
2238 ph10 565 /* Check for a single-character option that has data: OP_OP_NUMBER
2239 ph10 579 is used for one that either has a numerical number or defaults, i.e. the
2240 ph10 565 data is optional. If a digit follows, there is data; if not, carry on
2241     with other single-character options in the same string. */
2242 ph10 579
2243 ph10 565 option_data = s+1;
2244     if (op->type == OP_OP_NUMBER)
2245 ph10 579 {
2246     if (isdigit((unsigned char)s[1])) break;
2247 nigel 53 }
2248 ph10 565 else /* Check for end or a dataless option */
2249 ph10 579 {
2250 ph10 565 if (op->type != OP_NODATA || s[1] == 0) break;
2251 ph10 579 }
2252    
2253     /* Handle a single-character option with no data, then loop for the
2254 ph10 565 next character in the string. */
2255    
2256 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2257 nigel 49 }
2258     }
2259 nigel 77
2260 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2261     is NO_DATA, it means that there is no data, and the option might set
2262     something in the PCRE options. */
2263 nigel 77
2264     if (op->type == OP_NODATA)
2265     {
2266 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2267     continue;
2268     }
2269    
2270     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2271     either has a value or defaults to something. It cannot have data in a
2272 ph10 579 separate item. At the moment, the only such options are "colo(u)r",
2273 ph10 565 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2274 nigel 87
2275     if (*option_data == 0 &&
2276     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2277     {
2278     switch (op->one_char)
2279 nigel 77 {
2280 nigel 87 case N_COLOUR:
2281     colour_option = (char *)"auto";
2282     break;
2283 ph10 579
2284 ph10 565 case 'o':
2285     only_matching = 0;
2286 ph10 579 break;
2287    
2288 nigel 87 #ifdef JFRIEDL_DEBUG
2289     case 'S':
2290     S_arg = 0;
2291     break;
2292     #endif
2293 nigel 77 }
2294 nigel 87 continue;
2295     }
2296 nigel 77
2297 nigel 87 /* Otherwise, find the data string for the option. */
2298    
2299     if (*option_data == 0)
2300     {
2301     if (i >= argc - 1 || longopwasequals)
2302 nigel 77 {
2303 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2304 ph10 561 pcregrep_exit(usage(2));
2305 nigel 87 }
2306     option_data = argv[++i];
2307     }
2308    
2309     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2310     multiple times to create a list of patterns. */
2311    
2312     if (op->type == OP_PATLIST)
2313     {
2314     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2315     {
2316     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2317     MAX_PATTERN_COUNT);
2318     return 2;
2319     }
2320     patterns[cmd_pattern_count++] = option_data;
2321     }
2322    
2323     /* Otherwise, deal with single string or numeric data values. */
2324    
2325 ph10 584 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2326     op->type != OP_OP_NUMBER)
2327 nigel 87 {
2328     *((char **)op->dataptr) = option_data;
2329     }
2330 ph10 558
2331     /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2332     only for unpicking arguments, so just keep it simple. */
2333    
2334 nigel 87 else
2335     {
2336 ph10 561 unsigned long int n = 0;
2337 ph10 558 char *endptr = option_data;
2338     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2339     while (isdigit((unsigned char)(*endptr)))
2340     n = n * 10 + (int)(*endptr++ - '0');
2341 ph10 644 if (toupper(*endptr) == 'K')
2342     {
2343 ph10 654 n *= 1024;
2344     endptr++;
2345     }
2346 ph10 644 else if (toupper(*endptr) == 'M')
2347     {
2348 ph10 654 n *= 1024*1024;
2349     endptr++;
2350     }
2351 nigel 87 if (*endptr != 0)
2352     {
2353     if (longop)
2354 nigel 77 {
2355 nigel 87 char *equals = strchr(op->long_name, '=');
2356     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2357 ph10 530 (int)(equals - op->long_name);
2358 nigel 87 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2359     option_data, nlen, op->long_name);
2360 nigel 77 }
2361 nigel 87 else
2362     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2363     option_data, op->one_char);
2364 ph10 561 pcregrep_exit(usage(2));
2365 nigel 77 }
2366 ph10 584 if (op->type == OP_LONGNUMBER)
2367     *((unsigned long int *)op->dataptr) = n;
2368     else
2369     *((int *)op->dataptr) = n;
2370 nigel 77 }
2371 nigel 49 }
2372    
2373 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2374     for -A and -B. */
2375    
2376     if (both_context > 0)
2377     {
2378     if (after_context == 0) after_context = both_context;
2379     if (before_context == 0) before_context = both_context;
2380     }
2381 ph10 286
2382     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2383 ph10 565 However, the latter two set only_matching. */
2384 nigel 77
2385 ph10 565 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2386 ph10 286 (file_offsets && line_offsets))
2387 ph10 280 {
2388     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2389     "and/or --line-offsets\n");
2390 ph10 561 pcregrep_exit(usage(2));
2391 ph10 280 }
2392    
2393 ph10 565 if (file_offsets || line_offsets) only_matching = 0;
2394 ph10 286
2395 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2396     LC_ALL environment variable is set, and if so, use it. */
2397 nigel 49
2398 nigel 87 if (locale == NULL)
2399 nigel 53 {
2400 nigel 87 locale = getenv("LC_ALL");
2401     locale_from = "LCC_ALL";
2402 nigel 53 }
2403 nigel 49
2404 nigel 87 if (locale == NULL)
2405     {
2406     locale = getenv("LC_CTYPE");
2407     locale_from = "LC_CTYPE";
2408     }
2409 nigel 49
2410 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2411     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2412    
2413     if (locale != NULL)
2414 nigel 49 {
2415 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2416 nigel 53 {
2417 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2418     locale, locale_from);
2419 nigel 53 return 2;
2420     }
2421 nigel 87 pcretables = pcre_maketables();
2422     }
2423 nigel 77
2424 nigel 87 /* Sort out colouring */
2425    
2426     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2427     {
2428     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2429     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2430     else
2431 nigel 53 {
2432 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2433     colour_option);
2434     return 2;
2435 nigel 77 }
2436 nigel 87 if (do_colour)
2437 nigel 77 {
2438 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2439     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2440     if (cs != NULL) colour_string = cs;
2441 nigel 77 }
2442 nigel 87 }
2443 ph10 535
2444 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2445    
2446     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2447     {
2448     pcre_options |= PCRE_NEWLINE_CR;
2449 nigel 93 endlinetype = EL_CR;
2450 nigel 91 }
2451     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2452     {
2453     pcre_options |= PCRE_NEWLINE_LF;
2454 nigel 93 endlinetype = EL_LF;
2455 nigel 91 }
2456     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2457     {
2458     pcre_options |= PCRE_NEWLINE_CRLF;
2459 nigel 93 endlinetype = EL_CRLF;
2460 nigel 91 }
2461 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2462     {
2463     pcre_options |= PCRE_NEWLINE_ANY;
2464     endlinetype = EL_ANY;
2465     }
2466 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2467     {
2468     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2469     endlinetype = EL_ANYCRLF;
2470     }
2471 nigel 91 else
2472     {
2473     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2474     return 2;
2475     }
2476    
2477 nigel 87 /* Interpret the text values for -d and -D */
2478    
2479     if (dee_option != NULL)
2480     {
2481     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2482     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2483     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2484     else
2485 nigel 77 {
2486 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2487     return 2;
2488 nigel 53 }
2489 nigel 49 }
2490    
2491 nigel 87 if (DEE_option != NULL)
2492     {
2493     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2494     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2495     else
2496     {
2497     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2498     return 2;
2499     }
2500     }
2501 nigel 49
2502 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2503 nigel 87
2504     #ifdef JFRIEDL_DEBUG
2505     if (S_arg > 9)
2506 nigel 49 {
2507 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2508     return 2;
2509     }
2510 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2511     {
2512     if (jfriedl_XT == 0) jfriedl_XT = 1;
2513     if (jfriedl_XR == 0) jfriedl_XR = 1;
2514     }
2515 nigel 87 #endif
2516 nigel 77
2517 ph10 644 /* Get memory for the main buffer, and to store the pattern and hints lists. */
2518 nigel 87
2519 ph10 644 bufsize = 3*bufthird;
2520     main_buffer = (char *)malloc(bufsize);
2521 nigel 87 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2522     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2523    
2524 ph10 644 if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2525 nigel 87 {
2526     fprintf(stderr, "pcregrep: malloc failed\n");
2527 ph10 123 goto EXIT2;
2528 nigel 87 }
2529    
2530     /* If no patterns were provided by -e, and there is no file provided by -f,
2531     the first argument is the one and only pattern, and it must exist. */
2532    
2533     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2534     {
2535 nigel 63 if (i >= argc) return usage(2);
2536 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2537     }
2538 nigel 77
2539 nigel 87 /* Compile the patterns that were provided on the command line, either by
2540     multiple uses of -e or as a single unkeyed pattern. */
2541    
2542     for (j = 0; j < cmd_pattern_count; j++)
2543     {
2544     if (!compile_pattern(patterns[j], pcre_options, NULL,
2545     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2546 ph10 123 goto EXIT2;
2547 nigel 87 }
2548    
2549     /* Compile the regular expressions that are provided in a file. */
2550    
2551     if (pattern_filename != NULL)
2552     {
2553     int linenumber = 0;
2554     FILE *f;
2555     char *filename;
2556 ph10 644 char buffer[PATBUFSIZE];
2557 nigel 87
2558     if (strcmp(pattern_filename, "-") == 0)
2559 nigel 77 {
2560 nigel 87 f = stdin;
2561     filename = stdin_name;
2562 nigel 77 }
2563 nigel 87 else
2564 nigel 77 {
2565 nigel 87 f = fopen(pattern_filename, "r");
2566     if (f == NULL)
2567     {
2568     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2569     strerror(errno));
2570 ph10 123 goto EXIT2;
2571 nigel 87 }
2572     filename = pattern_filename;
2573 nigel 77 }
2574    
2575 ph10 644 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2576 nigel 53 {
2577 nigel 87 char *s = buffer + (int)strlen(buffer);
2578     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2579     *s = 0;
2580     linenumber++;
2581     if (buffer[0] == 0) continue; /* Skip blank lines */
2582     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2583 ph10 121 goto EXIT2;
2584 nigel 53 }
2585 nigel 87
2586     if (f != stdin) fclose(f);
2587 nigel 49 }
2588    
2589 ph10 691 /* Study the regular expressions, as we will be running them many times. Unless
2590 ph10 685 JIT has been explicitly disabled, arrange a stack for it to use. */
2591 nigel 53
2592 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
2593     if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2594     jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2595 ph10 691 #endif
2596    
2597 nigel 53 for (j = 0; j < pattern_count; j++)
2598     {
2599 ph10 667 hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2600 nigel 53 if (error != NULL)
2601     {
2602     char s[16];
2603     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2604     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2605 ph10 121 goto EXIT2;
2606 nigel 53 }
2607 ph10 142 hint_count++;
2608 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
2609 ph10 691 if (jit_stack != NULL && hints_list[j] != NULL)
2610 ph10 685 pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
2611     #endif
2612 nigel 53 }
2613 ph10 579
2614 ph10 561 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2615     pcre_extra block for each pattern. */
2616 nigel 53
2617 ph10 561 if (match_limit > 0 || match_limit_recursion > 0)
2618     {
2619     for (j = 0; j < pattern_count; j++)
2620     {
2621     if (hints_list[j] == NULL)
2622     {
2623     hints_list[j] = malloc(sizeof(pcre_extra));
2624 ph10 579 if (hints_list[j] == NULL)
2625 ph10 561 {
2626     fprintf(stderr, "pcregrep: malloc failed\n");
2627     pcregrep_exit(2);
2628     }
2629     }
2630     if (match_limit > 0)
2631 ph10 579 {
2632 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2633     hints_list[j]->match_limit = match_limit;
2634 ph10 579 }
2635 ph10 561 if (match_limit_recursion > 0)
2636 ph10 579 {
2637 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2638     hints_list[j]->match_limit_recursion = match_limit_recursion;
2639 ph10 579 }
2640 ph10 561 }
2641 ph10 579 }
2642 ph10 561
2643 nigel 77 /* If there are include or exclude patterns, compile them. */
2644    
2645     if (exclude_pattern != NULL)
2646     {
2647 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2648     pcretables);
2649 nigel 77 if (exclude_compiled == NULL)
2650     {
2651     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2652     errptr, error);
2653 ph10 121 goto EXIT2;
2654 nigel 77 }
2655     }
2656    
2657     if (include_pattern != NULL)
2658     {
2659 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2660     pcretables);
2661 nigel 77 if (include_compiled == NULL)
2662     {
2663     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2664     errptr, error);
2665 ph10 121 goto EXIT2;
2666 nigel 77 }
2667     }
2668    
2669 ph10 325 if (exclude_dir_pattern != NULL)
2670     {
2671     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2672     pcretables);
2673     if (exclude_dir_compiled == NULL)
2674     {
2675     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2676     errptr, error);
2677     goto EXIT2;
2678     }
2679     }
2680    
2681     if (include_dir_pattern != NULL)
2682     {
2683     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2684     pcretables);
2685     if (include_dir_compiled == NULL)
2686     {
2687     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2688     errptr, error);
2689     goto EXIT2;
2690     }
2691     }
2692    
2693 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2694 nigel 49
2695 nigel 87 if (i >= argc)
2696 ph10 121 {
2697 ph10 654 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2698 ph10 644 (filenames > FN_DEFAULT)? stdin_name : NULL);
2699 ph10 121 goto EXIT;
2700 ph10 123 }
2701 nigel 49
2702 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2703     Pass in the fact that there is only one argument at top level - this suppresses
2704 nigel 87 the file name if the argument is not a directory and filenames are not
2705     otherwise forced. */
2706 nigel 49
2707 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2708 nigel 49
2709     for (; i < argc; i++)
2710     {
2711 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2712     only_one_at_top);
2713 nigel 77 if (frc > 1) rc = frc;
2714     else if (frc == 0 && rc == 1) rc = 0;
2715 nigel 49 }
2716    
2717 ph10 121 EXIT:
2718 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
2719     if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2720     #endif
2721 ph10 644 if (main_buffer != NULL) free(main_buffer);
2722 ph10 121 if (pattern_list != NULL)
2723     {
2724 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2725 ph10 121 free(pattern_list);
2726 ph10 123 }
2727 ph10 121 if (hints_list != NULL)
2728     {
2729 ph10 579 for (i = 0; i < hint_count; i++)
2730 ph10 561 {
2731 ph10 667 if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2732 ph10 579 }
2733 ph10 121 free(hints_list);
2734 ph10 123 }
2735 ph10 561 pcregrep_exit(rc);
2736 ph10 121
2737     EXIT2:
2738     rc = 2;
2739     goto EXIT;
2740 nigel 49 }
2741    
2742 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12