/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 944 - (hide annotations) (download)
Tue Feb 28 16:49:21 2012 UTC (2 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 81152 byte(s)
Added --file-list to pcregrep.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 836 Copyright (c) 1997-2012 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 ph10 378 #define OFFSET_SIZE 99
75 nigel 49
76 nigel 77 #if BUFSIZ > 8192
77 ph10 644 #define PATBUFSIZE BUFSIZ
78 nigel 77 #else
79 ph10 644 #define PATBUFSIZE 8192
80 nigel 77 #endif
81 nigel 49
82 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
83     output. The order is important; it is assumed that a file name is wanted for
84     all values greater than FN_DEFAULT. */
85 nigel 77
86 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87 nigel 87
88 ph10 286 /* File reading styles */
89    
90     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92 nigel 87 /* Actions for the -d and -D options */
93    
94     enum { dee_READ, dee_SKIP, dee_RECURSE };
95     enum { DEE_READ, DEE_SKIP };
96    
97     /* Actions for special processing options (flag bits) */
98    
99     #define PO_WORD_MATCH 0x0001
100     #define PO_LINE_MATCH 0x0002
101     #define PO_FIXED_STRINGS 0x0004
102    
103 nigel 93 /* Line ending types */
104 nigel 87
105 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 nigel 87
107 ph10 535 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108     environments), a warning is issued if the value of fwrite() is ignored.
109     Unfortunately, casting to (void) does not suppress the warning. To get round
110     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 ph10 515 apply to fprintf(). */
112 nigel 93
113 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114 nigel 93
115 ph10 515
116    
117 nigel 49 /*************************************************
118     * Global variables *
119     *************************************************/
120    
121 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
122     regular code. */
123    
124     #ifdef JFRIEDL_DEBUG
125     static int S_arg = -1;
126 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128     static const char *jfriedl_prefix = "";
129     static const char *jfriedl_postfix = "";
130 nigel 87 #endif
131    
132 nigel 93 static int endlinetype;
133 nigel 91
134 nigel 87 static char *colour_string = (char *)"1;31";
135     static char *colour_option = NULL;
136     static char *dee_option = NULL;
137     static char *DEE_option = NULL;
138 ph10 644 static char *main_buffer = NULL;
139 nigel 91 static char *newline = NULL;
140 nigel 53 static char *pattern_filename = NULL;
141 nigel 77 static char *stdin_name = (char *)"(standard input)";
142 nigel 87 static char *locale = NULL;
143    
144     static const unsigned char *pcretables = NULL;
145    
146 nigel 53 static int pattern_count = 0;
147 ph10 121 static pcre **pattern_list = NULL;
148     static pcre_extra **hints_list = NULL;
149 nigel 49
150 ph10 944 static char *file_list = NULL;
151 nigel 77 static char *include_pattern = NULL;
152     static char *exclude_pattern = NULL;
153 ph10 325 static char *include_dir_pattern = NULL;
154     static char *exclude_dir_pattern = NULL;
155 nigel 77
156     static pcre *include_compiled = NULL;
157     static pcre *exclude_compiled = NULL;
158 ph10 325 static pcre *include_dir_compiled = NULL;
159     static pcre *exclude_dir_compiled = NULL;
160 nigel 77
161     static int after_context = 0;
162     static int before_context = 0;
163     static int both_context = 0;
164 ph10 644 static int bufthird = PCREGREP_BUFSIZE;
165     static int bufsize = 3*PCREGREP_BUFSIZE;
166 nigel 87 static int dee_action = dee_READ;
167     static int DEE_action = DEE_READ;
168     static int error_count = 0;
169     static int filenames = FN_DEFAULT;
170 ph10 565 static int only_matching = -1;
171 nigel 87 static int process_options = 0;
172 ph10 685
173     #ifdef SUPPORT_PCREGREP_JIT
174     static int study_options = PCRE_STUDY_JIT_COMPILE;
175     #else
176 ph10 667 static int study_options = 0;
177 ph10 685 #endif
178 nigel 77
179 ph10 561 static unsigned long int match_limit = 0;
180     static unsigned long int match_limit_recursion = 0;
181    
182 nigel 49 static BOOL count_only = FALSE;
183 nigel 87 static BOOL do_colour = FALSE;
184 ph10 280 static BOOL file_offsets = FALSE;
185 nigel 77 static BOOL hyphenpending = FALSE;
186 nigel 49 static BOOL invert = FALSE;
187 ph10 519 static BOOL line_buffered = FALSE;
188 ph10 280 static BOOL line_offsets = FALSE;
189 nigel 77 static BOOL multiline = FALSE;
190 nigel 49 static BOOL number = FALSE;
191 ph10 420 static BOOL omit_zero_count = FALSE;
192 ph10 561 static BOOL resource_error = FALSE;
193 nigel 77 static BOOL quiet = FALSE;
194 nigel 49 static BOOL silent = FALSE;
195 nigel 93 static BOOL utf8 = FALSE;
196 nigel 49
197 nigel 53 /* Structure for options and list of them */
198 nigel 49
199 ph10 584 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
200     OP_OP_NUMBER, OP_PATLIST };
201 nigel 77
202 nigel 53 typedef struct option_item {
203 nigel 77 int type;
204 nigel 53 int one_char;
205 nigel 77 void *dataptr;
206 nigel 67 const char *long_name;
207     const char *help_text;
208 nigel 53 } option_item;
209 nigel 49
210 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
211     used to identify them. */
212    
213 ph10 325 #define N_COLOUR (-1)
214     #define N_EXCLUDE (-2)
215     #define N_EXCLUDE_DIR (-3)
216     #define N_HELP (-4)
217     #define N_INCLUDE (-5)
218     #define N_INCLUDE_DIR (-6)
219     #define N_LABEL (-7)
220     #define N_LOCALE (-8)
221     #define N_NULL (-9)
222     #define N_LOFFSETS (-10)
223     #define N_FOFFSETS (-11)
224 ph10 519 #define N_LBUFFER (-12)
225 ph10 561 #define N_M_LIMIT (-13)
226     #define N_M_LIMIT_REC (-14)
227 ph10 644 #define N_BUFSIZE (-15)
228 ph10 685 #define N_NOJIT (-16)
229 ph10 944 #define N_FILE_LIST (-17)
230 nigel 87
231 nigel 53 static option_item optionlist[] = {
232 ph10 584 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
233     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
234     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
235     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
236 ph10 644 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
237 ph10 584 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
238     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
239     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
240     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
241     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
242     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
243     { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
244     { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
245     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
246 ph10 944 { OP_STRING, N_FILE_LIST, &file_list, "file-list=path","read files to search from file" },
247 ph10 584 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
248     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
249     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
250     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
251 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
252     { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
253     #else
254     { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
255     #endif
256 ph10 584 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
257     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
258     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
259     { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
260     { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
261     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
262     { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
263     { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
264     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
265     { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
266     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
267     { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
268     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
269     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
270     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
271     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
272     { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
273     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
274 ph10 571
275     /* These two were accidentally implemented with underscores instead of
276     hyphens in the option names. As this was not discovered for several releases,
277     the incorrect versions are left in the table for compatibility. However, the
278     --help function misses out any option that has an underscore in its name. */
279 ph10 579
280 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
281     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
282 ph10 571
283 nigel 87 #ifdef JFRIEDL_DEBUG
284     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
285     #endif
286     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
287     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
288     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
289     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
290     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
291     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
292     { OP_NODATA, 0, NULL, NULL, NULL }
293 nigel 53 };
294    
295 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
296     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
297     that the combination of -w and -x has the same effect as -x on its own, so we
298     can treat them as the same. */
299 nigel 53
300 nigel 87 static const char *prefix[] = {
301     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
302    
303     static const char *suffix[] = {
304     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
305    
306 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
307 nigel 87
308 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
309 nigel 87
310 nigel 93 const char utf8_table4[] = {
311     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
312     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
313     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
314     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
315    
316    
317    
318 nigel 53 /*************************************************
319 ph10 586 * Exit from the program *
320     *************************************************/
321    
322     /* If there has been a resource error, give a suitable message.
323    
324     Argument: the return code
325     Returns: does not return
326     */
327    
328     static void
329     pcregrep_exit(int rc)
330     {
331     if (resource_error)
332     {
333 ph10 685 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
334     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
335     PCRE_ERROR_JIT_STACKLIMIT);
336 ph10 586 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
337     }
338    
339     exit(rc);
340     }
341    
342    
343     /*************************************************
344 nigel 87 * OS-specific functions *
345 nigel 53 *************************************************/
346    
347     /* These functions are defined so that they can be made system specific,
348 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
349 nigel 53
350    
351     /************* Directory scanning in Unix ***********/
352    
353 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
354 nigel 53 #include <sys/types.h>
355     #include <sys/stat.h>
356     #include <dirent.h>
357    
358     typedef DIR directory_type;
359    
360 nigel 67 static int
361 nigel 53 isdirectory(char *filename)
362     {
363     struct stat statbuf;
364     if (stat(filename, &statbuf) < 0)
365     return 0; /* In the expectation that opening as a file will fail */
366     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
367     }
368    
369 nigel 67 static directory_type *
370 nigel 53 opendirectory(char *filename)
371     {
372     return opendir(filename);
373     }
374    
375 nigel 67 static char *
376 nigel 53 readdirectory(directory_type *dir)
377     {
378     for (;;)
379     {
380     struct dirent *dent = readdir(dir);
381     if (dent == NULL) return NULL;
382     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
383     return dent->d_name;
384     }
385 ph10 151 /* Control never reaches here */
386 nigel 53 }
387    
388 nigel 67 static void
389 nigel 53 closedirectory(directory_type *dir)
390     {
391     closedir(dir);
392     }
393    
394    
395 nigel 87 /************* Test for regular file in Unix **********/
396    
397     static int
398     isregfile(char *filename)
399     {
400     struct stat statbuf;
401     if (stat(filename, &statbuf) < 0)
402     return 1; /* In the expectation that opening as a file will fail */
403     return (statbuf.st_mode & S_IFMT) == S_IFREG;
404     }
405    
406    
407 ph10 519 /************* Test for a terminal in Unix **********/
408 nigel 87
409     static BOOL
410     is_stdout_tty(void)
411     {
412     return isatty(fileno(stdout));
413     }
414    
415 ph10 519 static BOOL
416     is_file_tty(FILE *f)
417     {
418     return isatty(fileno(f));
419     }
420 nigel 87
421 ph10 519
422 nigel 63 /************* Directory scanning in Win32 ***********/
423 nigel 53
424 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
425 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
426 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
427     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
428 ph10 558 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
429     undefined when it is indeed undefined. */
430 nigel 53
431 ph10 558 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
432 nigel 63
433     #ifndef STRICT
434     # define STRICT
435     #endif
436     #ifndef WIN32_LEAN_AND_MEAN
437     # define WIN32_LEAN_AND_MEAN
438     #endif
439 ph10 283
440     #include <windows.h>
441    
442 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
443     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
444     #endif
445    
446 nigel 63 typedef struct directory_type
447     {
448     HANDLE handle;
449     BOOL first;
450     WIN32_FIND_DATA data;
451     } directory_type;
452    
453     int
454     isdirectory(char *filename)
455     {
456     DWORD attr = GetFileAttributes(filename);
457     if (attr == INVALID_FILE_ATTRIBUTES)
458     return 0;
459     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
460     }
461    
462     directory_type *
463     opendirectory(char *filename)
464     {
465     size_t len;
466     char *pattern;
467     directory_type *dir;
468     DWORD err;
469     len = strlen(filename);
470     pattern = (char *) malloc(len + 3);
471     dir = (directory_type *) malloc(sizeof(*dir));
472     if ((pattern == NULL) || (dir == NULL))
473     {
474     fprintf(stderr, "pcregrep: malloc failed\n");
475 ph10 561 pcregrep_exit(2);
476 nigel 63 }
477     memcpy(pattern, filename, len);
478     memcpy(&(pattern[len]), "\\*", 3);
479     dir->handle = FindFirstFile(pattern, &(dir->data));
480     if (dir->handle != INVALID_HANDLE_VALUE)
481     {
482     free(pattern);
483     dir->first = TRUE;
484     return dir;
485     }
486     err = GetLastError();
487     free(pattern);
488     free(dir);
489     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
490     return NULL;
491     }
492    
493     char *
494     readdirectory(directory_type *dir)
495     {
496     for (;;)
497     {
498     if (!dir->first)
499     {
500     if (!FindNextFile(dir->handle, &(dir->data)))
501     return NULL;
502     }
503     else
504     {
505     dir->first = FALSE;
506     }
507     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
508     return dir->data.cFileName;
509     }
510     #ifndef _MSC_VER
511     return NULL; /* Keep compiler happy; never executed */
512     #endif
513     }
514    
515     void
516     closedirectory(directory_type *dir)
517     {
518     FindClose(dir->handle);
519     free(dir);
520     }
521    
522    
523 nigel 87 /************* Test for regular file in Win32 **********/
524    
525     /* I don't know how to do this, or if it can be done; assume all paths are
526     regular if they are not directories. */
527    
528     int isregfile(char *filename)
529     {
530 ph10 283 return !isdirectory(filename);
531 nigel 87 }
532    
533    
534 ph10 519 /************* Test for a terminal in Win32 **********/
535 nigel 87
536     /* I don't know how to do this; assume never */
537    
538     static BOOL
539     is_stdout_tty(void)
540     {
541 ph10 283 return FALSE;
542 nigel 87 }
543    
544 ph10 519 static BOOL
545     is_file_tty(FILE *f)
546     {
547     return FALSE;
548     }
549 nigel 87
550 ph10 519
551 nigel 53 /************* Directory scanning when we can't do it ***********/
552    
553     /* The type is void, and apart from isdirectory(), the functions do nothing. */
554    
555 nigel 63 #else
556    
557 nigel 53 typedef void directory_type;
558    
559 nigel 87 int isdirectory(char *filename) { return 0; }
560 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
561     char *readdirectory(directory_type *dir) { return (char*)0;}
562 nigel 53 void closedirectory(directory_type *dir) {}
563    
564 nigel 87
565     /************* Test for regular when we can't do it **********/
566    
567     /* Assume all files are regular. */
568    
569     int isregfile(char *filename) { return 1; }
570    
571    
572 ph10 519 /************* Test for a terminal when we can't do it **********/
573 nigel 87
574     static BOOL
575     is_stdout_tty(void)
576     {
577     return FALSE;
578     }
579    
580 ph10 519 static BOOL
581     is_file_tty(FILE *f)
582     {
583     return FALSE;
584     }
585 nigel 87
586 nigel 53 #endif
587    
588    
589    
590 ph10 137 #ifndef HAVE_STRERROR
591 nigel 49 /*************************************************
592     * Provide strerror() for non-ANSI libraries *
593     *************************************************/
594    
595     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
596     in their libraries, but can provide the same facility by this simple
597     alternative function. */
598    
599     extern int sys_nerr;
600     extern char *sys_errlist[];
601    
602     char *
603     strerror(int n)
604     {
605     if (n < 0 || n >= sys_nerr) return "unknown error number";
606     return sys_errlist[n];
607     }
608     #endif /* HAVE_STRERROR */
609    
610    
611    
612     /*************************************************
613 ph10 519 * Read one line of input *
614     *************************************************/
615    
616 ph10 535 /* Normally, input is read using fread() into a large buffer, so many lines may
617     be read at once. However, doing this for tty input means that no output appears
618 ph10 519 until a lot of input has been typed. Instead, tty input is handled line by
619     line. We cannot use fgets() for this, because it does not stop at a binary
620 ph10 535 zero, and therefore there is no way of telling how many characters it has read,
621 ph10 519 because there may be binary zeros embedded in the data.
622    
623     Arguments:
624     buffer the buffer to read into
625     length the maximum number of characters to read
626     f the file
627 ph10 535
628 ph10 519 Returns: the number of characters read, zero at end of file
629 ph10 535 */
630 ph10 519
631 ph10 904 static unsigned int
632 ph10 519 read_one_line(char *buffer, int length, FILE *f)
633     {
634     int c;
635     int yield = 0;
636     while ((c = fgetc(f)) != EOF)
637     {
638     buffer[yield++] = c;
639 ph10 535 if (c == '\n' || yield >= length) break;
640     }
641     return yield;
642 ph10 519 }
643    
644    
645    
646     /*************************************************
647 nigel 93 * Find end of line *
648     *************************************************/
649    
650     /* The length of the endline sequence that is found is set via lenptr. This may
651     be zero at the very end of the file if there is no line-ending sequence there.
652    
653     Arguments:
654     p current position in line
655     endptr end of available data
656     lenptr where to put the length of the eol sequence
657    
658 ph10 654 Returns: pointer after the last byte of the line,
659 ph10 644 including the newline byte(s)
660 nigel 93 */
661    
662     static char *
663     end_of_line(char *p, char *endptr, int *lenptr)
664     {
665     switch(endlinetype)
666     {
667     default: /* Just in case */
668     case EL_LF:
669     while (p < endptr && *p != '\n') p++;
670     if (p < endptr)
671     {
672     *lenptr = 1;
673     return p + 1;
674     }
675     *lenptr = 0;
676     return endptr;
677    
678     case EL_CR:
679     while (p < endptr && *p != '\r') p++;
680     if (p < endptr)
681     {
682     *lenptr = 1;
683     return p + 1;
684     }
685     *lenptr = 0;
686     return endptr;
687    
688     case EL_CRLF:
689     for (;;)
690     {
691     while (p < endptr && *p != '\r') p++;
692     if (++p >= endptr)
693     {
694     *lenptr = 0;
695     return endptr;
696     }
697     if (*p == '\n')
698     {
699     *lenptr = 2;
700     return p + 1;
701     }
702     }
703     break;
704    
705 ph10 149 case EL_ANYCRLF:
706     while (p < endptr)
707     {
708     int extra = 0;
709     register int c = *((unsigned char *)p);
710    
711     if (utf8 && c >= 0xc0)
712     {
713     int gcii, gcss;
714     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
715     gcss = 6*extra;
716     c = (c & utf8_table3[extra]) << gcss;
717     for (gcii = 1; gcii <= extra; gcii++)
718     {
719     gcss -= 6;
720     c |= (p[gcii] & 0x3f) << gcss;
721     }
722     }
723    
724     p += 1 + extra;
725    
726     switch (c)
727     {
728     case 0x0a: /* LF */
729     *lenptr = 1;
730     return p;
731    
732     case 0x0d: /* CR */
733     if (p < endptr && *p == 0x0a)
734     {
735     *lenptr = 2;
736     p++;
737     }
738     else *lenptr = 1;
739     return p;
740 ph10 150
741 ph10 149 default:
742     break;
743     }
744     } /* End of loop for ANYCRLF case */
745 ph10 150
746 ph10 149 *lenptr = 0; /* Must have hit the end */
747     return endptr;
748    
749 nigel 93 case EL_ANY:
750     while (p < endptr)
751     {
752     int extra = 0;
753     register int c = *((unsigned char *)p);
754    
755     if (utf8 && c >= 0xc0)
756     {
757     int gcii, gcss;
758     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
759     gcss = 6*extra;
760     c = (c & utf8_table3[extra]) << gcss;
761     for (gcii = 1; gcii <= extra; gcii++)
762     {
763     gcss -= 6;
764     c |= (p[gcii] & 0x3f) << gcss;
765     }
766     }
767    
768     p += 1 + extra;
769    
770     switch (c)
771     {
772     case 0x0a: /* LF */
773     case 0x0b: /* VT */
774     case 0x0c: /* FF */
775     *lenptr = 1;
776     return p;
777    
778     case 0x0d: /* CR */
779     if (p < endptr && *p == 0x0a)
780     {
781     *lenptr = 2;
782     p++;
783     }
784     else *lenptr = 1;
785     return p;
786    
787     case 0x85: /* NEL */
788     *lenptr = utf8? 2 : 1;
789     return p;
790    
791     case 0x2028: /* LS */
792     case 0x2029: /* PS */
793     *lenptr = 3;
794     return p;
795    
796     default:
797     break;
798     }
799     } /* End of loop for ANY case */
800    
801     *lenptr = 0; /* Must have hit the end */
802     return endptr;
803     } /* End of overall switch */
804     }
805    
806    
807    
808     /*************************************************
809     * Find start of previous line *
810     *************************************************/
811    
812     /* This is called when looking back for before lines to print.
813    
814     Arguments:
815     p start of the subsequent line
816     startptr start of available data
817    
818     Returns: pointer to the start of the previous line
819     */
820    
821     static char *
822     previous_line(char *p, char *startptr)
823     {
824     switch(endlinetype)
825     {
826     default: /* Just in case */
827     case EL_LF:
828     p--;
829     while (p > startptr && p[-1] != '\n') p--;
830     return p;
831    
832     case EL_CR:
833     p--;
834     while (p > startptr && p[-1] != '\n') p--;
835     return p;
836    
837     case EL_CRLF:
838     for (;;)
839     {
840     p -= 2;
841     while (p > startptr && p[-1] != '\n') p--;
842     if (p <= startptr + 1 || p[-2] == '\r') return p;
843     }
844     return p; /* But control should never get here */
845    
846     case EL_ANY:
847 ph10 150 case EL_ANYCRLF:
848 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
849     if (utf8) while ((*p & 0xc0) == 0x80) p--;
850    
851     while (p > startptr)
852     {
853     register int c;
854     char *pp = p - 1;
855    
856     if (utf8)
857     {
858     int extra = 0;
859     while ((*pp & 0xc0) == 0x80) pp--;
860     c = *((unsigned char *)pp);
861     if (c >= 0xc0)
862     {
863     int gcii, gcss;
864     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
865     gcss = 6*extra;
866     c = (c & utf8_table3[extra]) << gcss;
867     for (gcii = 1; gcii <= extra; gcii++)
868     {
869     gcss -= 6;
870     c |= (pp[gcii] & 0x3f) << gcss;
871     }
872     }
873     }
874     else c = *((unsigned char *)pp);
875    
876 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
877 nigel 93 {
878     case 0x0a: /* LF */
879 ph10 149 case 0x0d: /* CR */
880     return p;
881 ph10 150
882 ph10 149 default:
883     break;
884 ph10 150 }
885 ph10 149
886     else switch (c)
887     {
888     case 0x0a: /* LF */
889 nigel 93 case 0x0b: /* VT */
890     case 0x0c: /* FF */
891     case 0x0d: /* CR */
892     case 0x85: /* NEL */
893     case 0x2028: /* LS */
894     case 0x2029: /* PS */
895     return p;
896    
897     default:
898     break;
899     }
900    
901     p = pp; /* Back one character */
902     } /* End of loop for ANY case */
903    
904     return startptr; /* Hit start of data */
905     } /* End of overall switch */
906     }
907    
908    
909    
910    
911    
912     /*************************************************
913 nigel 77 * Print the previous "after" lines *
914 nigel 49 *************************************************/
915    
916 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
917 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
918     that a binary zero does not terminate it.
919 nigel 77
920     Arguments:
921     lastmatchnumber the number of the last matching line, plus one
922     lastmatchrestart where we restarted after the last match
923     endptr end of available data
924     printname filename for printing
925    
926     Returns: nothing
927     */
928    
929     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
930     char *endptr, char *printname)
931     {
932     if (after_context > 0 && lastmatchnumber > 0)
933     {
934     int count = 0;
935     while (lastmatchrestart < endptr && count++ < after_context)
936     {
937 nigel 93 int ellength;
938 nigel 77 char *pp = lastmatchrestart;
939     if (printname != NULL) fprintf(stdout, "%s-", printname);
940     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
941 nigel 93 pp = end_of_line(pp, endptr, &ellength);
942 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
943 nigel 93 lastmatchrestart = pp;
944 nigel 77 }
945     hyphenpending = TRUE;
946     }
947     }
948    
949    
950    
951     /*************************************************
952 ph10 378 * Apply patterns to subject till one matches *
953     *************************************************/
954    
955 ph10 392 /* This function is called to run through all patterns, looking for a match. It
956     is used multiple times for the same subject when colouring is enabled, in order
957 ph10 378 to find all possible matches.
958    
959     Arguments:
960 ph10 632 matchptr the start of the subject
961     length the length of the subject to match
962     startoffset where to start matching
963     offsets the offets vector to fill in
964     mrc address of where to put the result of pcre_exec()
965 ph10 392
966     Returns: TRUE if there was a match
967 ph10 378 FALSE if there was no match
968     invert if there was a non-fatal error
969 ph10 392 */
970 ph10 378
971     static BOOL
972 ph10 654 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
973 ph10 632 int *mrc)
974 ph10 378 {
975     int i;
976 ph10 561 size_t slen = length;
977     const char *msg = "this text:\n\n";
978     if (slen > 200)
979     {
980     slen = 200;
981     msg = "text that starts:\n\n";
982 ph10 579 }
983 ph10 378 for (i = 0; i < pattern_count; i++)
984     {
985 ph10 632 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
986     startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
987 ph10 378 if (*mrc >= 0) return TRUE;
988     if (*mrc == PCRE_ERROR_NOMATCH) continue;
989 ph10 561 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
990 ph10 378 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
991 ph10 561 fprintf(stderr, "%s", msg);
992     FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
993     fprintf(stderr, "\n\n");
994 ph10 685 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
995     *mrc == PCRE_ERROR_JIT_STACKLIMIT)
996 ph10 561 resource_error = TRUE;
997 ph10 378 if (error_count++ > 20)
998     {
999 ph10 561 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1000     pcregrep_exit(2);
1001 ph10 378 }
1002     return invert; /* No more matching; don't show the line again */
1003     }
1004    
1005     return FALSE; /* No match, no errors */
1006     }
1007    
1008    
1009    
1010     /*************************************************
1011 nigel 77 * Grep an individual file *
1012     *************************************************/
1013    
1014     /* This is called from grep_or_recurse() below. It uses a buffer that is three
1015 ph10 644 times the value of bufthird. The matching point is never allowed to stray into
1016 nigel 77 the top third of the buffer, thus keeping more of the file available for
1017     context printing or for multiline scanning. For large files, the pointer will
1018     be in the middle third most of the time, so the bottom third is available for
1019     "before" context printing.
1020    
1021     Arguments:
1022 ph10 286 handle the fopened FILE stream for a normal file
1023     the gzFile pointer when reading is via libz
1024     the BZFILE pointer when reading is via libbz2
1025     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1026 ph10 644 filename the file name or NULL (for errors)
1027 nigel 77 printname the file name if it is to be printed for each match
1028     or NULL if the file name is not to be printed
1029     it cannot be NULL if filenames[_nomatch]_only is set
1030    
1031     Returns: 0 if there was at least one match
1032     1 otherwise (no matches)
1033 ph10 654 2 if an overlong line is encountered
1034 ph10 644 3 if there is a read error on a .bz2 file
1035 nigel 77 */
1036    
1037 nigel 49 static int
1038 ph10 644 pcregrep(void *handle, int frtype, char *filename, char *printname)
1039 nigel 49 {
1040     int rc = 1;
1041 nigel 77 int linenumber = 1;
1042     int lastmatchnumber = 0;
1043 nigel 49 int count = 0;
1044 ph10 280 int filepos = 0;
1045 ph10 378 int offsets[OFFSET_SIZE];
1046 nigel 77 char *lastmatchrestart = NULL;
1047 ph10 644 char *ptr = main_buffer;
1048 nigel 77 char *endptr;
1049     size_t bufflength;
1050     BOOL endhyphenpending = FALSE;
1051 ph10 519 BOOL input_line_buffered = line_buffered;
1052 ph10 286 FILE *in = NULL; /* Ensure initialized */
1053 nigel 49
1054 ph10 286 #ifdef SUPPORT_LIBZ
1055     gzFile ingz = NULL;
1056     #endif
1057 nigel 77
1058 ph10 286 #ifdef SUPPORT_LIBBZ2
1059     BZFILE *inbz2 = NULL;
1060     #endif
1061    
1062    
1063     /* Do the first read into the start of the buffer and set up the pointer to end
1064     of what we have. In the case of libz, a non-zipped .gz file will be read as a
1065     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1066     fail. */
1067    
1068     #ifdef SUPPORT_LIBZ
1069     if (frtype == FR_LIBZ)
1070     {
1071     ingz = (gzFile)handle;
1072 ph10 644 bufflength = gzread (ingz, main_buffer, bufsize);
1073 ph10 286 }
1074     else
1075     #endif
1076    
1077     #ifdef SUPPORT_LIBBZ2
1078     if (frtype == FR_LIBBZ2)
1079     {
1080     inbz2 = (BZFILE *)handle;
1081 ph10 644 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1082 ph10 286 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1083     } /* without the cast it is unsigned. */
1084     else
1085     #endif
1086    
1087     {
1088     in = (FILE *)handle;
1089 ph10 519 if (is_file_tty(in)) input_line_buffered = TRUE;
1090 ph10 535 bufflength = input_line_buffered?
1091 ph10 644 read_one_line(main_buffer, bufsize, in) :
1092     fread(main_buffer, 1, bufsize, in);
1093 ph10 286 }
1094 ph10 535
1095 ph10 644 endptr = main_buffer + bufflength;
1096 nigel 77
1097     /* Loop while the current pointer is not at the end of the file. For large
1098     files, endptr will be at the end of the buffer when we are in the middle of the
1099     file, but ptr will never get there, because as soon as it gets over 2/3 of the
1100     way, the buffer is shifted left and re-filled. */
1101    
1102     while (ptr < endptr)
1103 nigel 49 {
1104 ph10 378 int endlinelength;
1105 nigel 87 int mrc = 0;
1106 ph10 654 int startoffset = 0;
1107 ph10 378 BOOL match;
1108 ph10 286 char *matchptr = ptr;
1109 nigel 77 char *t = ptr;
1110     size_t length, linelength;
1111 nigel 49
1112 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
1113     of the subject string to pass to pcre_exec(). In multiline mode, it is the
1114     length remainder of the data in the buffer. Otherwise, it is the length of
1115 ph10 378 the next line, excluding the terminating newline. After matching, we always
1116     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1117     option is used for compiling, so that any match is constrained to be in the
1118     first line. */
1119 nigel 77
1120 nigel 93 t = end_of_line(t, endptr, &endlinelength);
1121     linelength = t - ptr - endlinelength;
1122 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
1123 ph10 654
1124     /* Check to see if the line we are looking at extends right to the very end
1125     of the buffer without a line terminator. This means the line is too long to
1126 ph10 644 handle. */
1127 ph10 654
1128 ph10 644 if (endlinelength == 0 && t == main_buffer + bufsize)
1129     {
1130     fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1131 ph10 646 "pcregrep: check the --buffer-size option\n",
1132 ph10 654 linenumber,
1133 ph10 644 (filename == NULL)? "" : " of file ",
1134     (filename == NULL)? "" : filename);
1135     return 2;
1136 ph10 654 }
1137 nigel 77
1138 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
1139    
1140     #ifdef JFRIEDL_DEBUG
1141     if (jfriedl_XT || jfriedl_XR)
1142     {
1143     #include <sys/time.h>
1144     #include <time.h>
1145     struct timeval start_time, end_time;
1146     struct timezone dummy;
1147 ph10 392 int i;
1148 nigel 89
1149     if (jfriedl_XT)
1150     {
1151     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1152     const char *orig = ptr;
1153     ptr = malloc(newlen + 1);
1154     if (!ptr) {
1155     printf("out of memory");
1156 ph10 561 pcregrep_exit(2);
1157 nigel 89 }
1158     endptr = ptr;
1159     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1160     for (i = 0; i < jfriedl_XT; i++) {
1161     strncpy(endptr, orig, length);
1162     endptr += length;
1163     }
1164     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1165     length = newlen;
1166     }
1167    
1168     if (gettimeofday(&start_time, &dummy) != 0)
1169     perror("bad gettimeofday");
1170    
1171    
1172     for (i = 0; i < jfriedl_XR; i++)
1173 ph10 392 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1174 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1175 nigel 89
1176     if (gettimeofday(&end_time, &dummy) != 0)
1177     perror("bad gettimeofday");
1178    
1179     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1180     -
1181     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1182    
1183     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1184     return 0;
1185     }
1186     #endif
1187    
1188 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
1189 ph10 279 in order to find any further matches in the same line. */
1190 nigel 89
1191 ph10 286 ONLY_MATCHING_RESTART:
1192    
1193 ph10 392 /* Run through all the patterns until one matches or there is an error other
1194 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1195     finding subsequent matches when colouring matched lines. */
1196 ph10 392
1197 ph10 632 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1198 nigel 77
1199 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1200 nigel 77
1201 nigel 49 if (match != invert)
1202     {
1203 nigel 77 BOOL hyphenprinted = FALSE;
1204    
1205 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1206 nigel 77
1207 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1208    
1209     /* Just count if just counting is wanted. */
1210    
1211 nigel 49 if (count_only) count++;
1212    
1213 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1214     in the file. */
1215    
1216 ph10 420 else if (filenames == FN_MATCH_ONLY)
1217 nigel 49 {
1218 nigel 77 fprintf(stdout, "%s\n", printname);
1219 nigel 49 return 0;
1220     }
1221    
1222 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1223    
1224 nigel 77 else if (quiet) return 0;
1225 nigel 49
1226 ph10 579 /* The --only-matching option prints just the substring that matched, or a
1227 ph10 565 captured portion of it, as long as this string is not empty, and the
1228     --file-offsets and --line-offsets options output offsets for the matching
1229     substring (they both force --only-matching = 0). None of these options
1230 ph10 636 prints any context. Afterwards, adjust the start and then jump back to look
1231     for further matches in the same line. If we are in invert mode, however,
1232     nothing is printed and we do not restart - this could still be useful
1233     because the return code is set. */
1234 nigel 87
1235 ph10 565 else if (only_matching >= 0)
1236 nigel 87 {
1237 ph10 279 if (!invert)
1238 ph10 286 {
1239 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1240     if (number) fprintf(stdout, "%d:", linenumber);
1241 ph10 280 if (line_offsets)
1242 ph10 565 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1243 ph10 286 offsets[1] - offsets[0]);
1244 ph10 280 else if (file_offsets)
1245 ph10 579 fprintf(stdout, "%d,%d\n",
1246 ph10 565 (int)(filepos + matchptr + offsets[0] - ptr),
1247 ph10 286 offsets[1] - offsets[0]);
1248 ph10 565 else if (only_matching < mrc)
1249 ph10 377 {
1250 ph10 565 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1251     if (plen > 0)
1252 ph10 579 {
1253 ph10 565 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1254     FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1255     if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1256     fprintf(stdout, "\n");
1257 ph10 579 }
1258 ph10 392 }
1259 ph10 565 else if (printname != NULL || number) fprintf(stdout, "\n");
1260 ph10 286 match = FALSE;
1261 ph10 564 if (line_buffered) fflush(stdout);
1262 ph10 636 rc = 0; /* Had some success */
1263     startoffset = offsets[1]; /* Restart after the match */
1264 ph10 286 goto ONLY_MATCHING_RESTART;
1265     }
1266 nigel 87 }
1267    
1268     /* This is the default case when none of the above options is set. We print
1269     the matching lines(s), possibly preceded and/or followed by other lines of
1270     context. */
1271    
1272 nigel 49 else
1273     {
1274 nigel 77 /* See if there is a requirement to print some "after" lines from a
1275     previous match. We never print any overlaps. */
1276    
1277     if (after_context > 0 && lastmatchnumber > 0)
1278     {
1279 nigel 93 int ellength;
1280 nigel 77 int linecount = 0;
1281     char *p = lastmatchrestart;
1282    
1283     while (p < ptr && linecount < after_context)
1284     {
1285 nigel 93 p = end_of_line(p, ptr, &ellength);
1286 nigel 77 linecount++;
1287     }
1288    
1289     /* It is important to advance lastmatchrestart during this printing so
1290 nigel 87 that it interacts correctly with any "before" printing below. Print
1291     each line's data using fwrite() in case there are binary zeroes. */
1292 nigel 77
1293     while (lastmatchrestart < p)
1294     {
1295     char *pp = lastmatchrestart;
1296     if (printname != NULL) fprintf(stdout, "%s-", printname);
1297     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1298 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1299 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1300 nigel 93 lastmatchrestart = pp;
1301 nigel 77 }
1302     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1303     }
1304    
1305     /* If there were non-contiguous lines printed above, insert hyphens. */
1306    
1307     if (hyphenpending)
1308     {
1309     fprintf(stdout, "--\n");
1310     hyphenpending = FALSE;
1311     hyphenprinted = TRUE;
1312     }
1313    
1314     /* See if there is a requirement to print some "before" lines for this
1315     match. Again, don't print overlaps. */
1316    
1317     if (before_context > 0)
1318     {
1319     int linecount = 0;
1320     char *p = ptr;
1321    
1322 ph10 644 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1323 nigel 87 linecount < before_context)
1324 nigel 77 {
1325 nigel 87 linecount++;
1326 ph10 644 p = previous_line(p, main_buffer);
1327 nigel 77 }
1328    
1329     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1330     fprintf(stdout, "--\n");
1331    
1332     while (p < ptr)
1333     {
1334 nigel 93 int ellength;
1335 nigel 77 char *pp = p;
1336     if (printname != NULL) fprintf(stdout, "%s-", printname);
1337     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1338 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1339 ph10 515 FWRITE(p, 1, pp - p, stdout);
1340 nigel 93 p = pp;
1341 nigel 77 }
1342     }
1343    
1344     /* Now print the matching line(s); ensure we set hyphenpending at the end
1345 nigel 85 of the file if any context lines are being output. */
1346 nigel 77
1347 nigel 85 if (after_context > 0 || before_context > 0)
1348     endhyphenpending = TRUE;
1349    
1350 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1351 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1352 nigel 77
1353     /* In multiline mode, we want to print to the end of the line in which
1354     the end of the matched string is found, so we adjust linelength and the
1355 ph10 222 line number appropriately, but only when there actually was a match
1356     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1357     the match will always be before the first newline sequence. */
1358 nigel 77
1359 ph10 587 if (multiline & !invert)
1360 nigel 77 {
1361 ph10 587 char *endmatch = ptr + offsets[1];
1362     t = ptr;
1363     while (t < endmatch)
1364 nigel 93 {
1365 ph10 587 t = end_of_line(t, endptr, &endlinelength);
1366     if (t < endmatch) linenumber++; else break;
1367 nigel 93 }
1368 ph10 587 linelength = t - ptr - endlinelength;
1369 nigel 77 }
1370    
1371 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1372     zeroes are treated as just another data character. */
1373    
1374     /* This extra option, for Jeffrey Friedl's debugging requirements,
1375     replaces the matched string, or a specific captured string if it exists,
1376     with X. When this happens, colouring is ignored. */
1377    
1378     #ifdef JFRIEDL_DEBUG
1379     if (S_arg >= 0 && S_arg < mrc)
1380     {
1381     int first = S_arg * 2;
1382     int last = first + 1;
1383 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1384 nigel 87 fprintf(stdout, "X");
1385 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1386 nigel 87 }
1387     else
1388     #endif
1389    
1390 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1391 ph10 585 matches, but not of course if the line is a non-match. */
1392 ph10 589
1393 ph10 585 if (do_colour && !invert)
1394 nigel 87 {
1395 ph10 589 int plength;
1396 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1397 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1398 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1399 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1400 ph10 378 for (;;)
1401     {
1402 ph10 632 startoffset = offsets[1];
1403 ph10 718 if (startoffset >= (int)linelength + endlinelength ||
1404 ph10 654 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1405 ph10 632 break;
1406     FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1407 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1408 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1409 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1410     }
1411 ph10 587
1412     /* In multiline mode, we may have already printed the complete line
1413 ph10 589 and its line-ending characters (if they matched the pattern), so there
1414 ph10 587 may be no more to print. */
1415 ph10 589
1416 ph10 836 plength = (int)((linelength + endlinelength) - startoffset);
1417 ph10 636 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1418 nigel 87 }
1419 ph10 392
1420 ph10 378 /* Not colouring; no need to search for further matches */
1421 ph10 392
1422 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1423 nigel 49 }
1424    
1425 ph10 519 /* End of doing what has to be done for a match. If --line-buffered was
1426     given, flush the output. */
1427 nigel 87
1428 ph10 519 if (line_buffered) fflush(stdout);
1429 nigel 77 rc = 0; /* Had some success */
1430    
1431     /* Remember where the last match happened for after_context. We remember
1432     where we are about to restart, and that line's number. */
1433    
1434 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1435 nigel 77 lastmatchnumber = linenumber + 1;
1436 nigel 49 }
1437 nigel 77
1438 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1439     anything to be printed), we have to move on to the end of the match before
1440     proceeding. */
1441    
1442     if (multiline && invert && match)
1443     {
1444     int ellength;
1445     char *endmatch = ptr + offsets[1];
1446     t = ptr;
1447     while (t < endmatch)
1448     {
1449     t = end_of_line(t, endptr, &ellength);
1450     if (t <= endmatch) linenumber++; else break;
1451     }
1452     endmatch = end_of_line(endmatch, endptr, &ellength);
1453     linelength = endmatch - ptr - ellength;
1454     }
1455    
1456 ph10 286 /* Advance to after the newline and increment the line number. The file
1457 ph10 280 offset to the current line is maintained in filepos. */
1458 nigel 77
1459 nigel 93 ptr += linelength + endlinelength;
1460 ph10 530 filepos += (int)(linelength + endlinelength);
1461 nigel 77 linenumber++;
1462 ph10 535
1463     /* If input is line buffered, and the buffer is not yet full, read another
1464 ph10 519 line and add it into the buffer. */
1465 ph10 535
1466 ph10 718 if (input_line_buffered && bufflength < (size_t)bufsize)
1467 ph10 519 {
1468 ph10 836 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
1469 ph10 519 bufflength += add;
1470 ph10 535 endptr += add;
1471     }
1472 nigel 77
1473     /* If we haven't yet reached the end of the file (the buffer is full), and
1474     the current point is in the top 1/3 of the buffer, slide the buffer down by
1475     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1476     about to be lost, print them. */
1477    
1478 ph10 718 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
1479 nigel 77 {
1480     if (after_context > 0 &&
1481     lastmatchnumber > 0 &&
1482 ph10 644 lastmatchrestart < main_buffer + bufthird)
1483 nigel 77 {
1484     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1485     lastmatchnumber = 0;
1486     }
1487    
1488     /* Now do the shuffle */
1489    
1490 ph10 644 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1491     ptr -= bufthird;
1492 ph10 286
1493     #ifdef SUPPORT_LIBZ
1494     if (frtype == FR_LIBZ)
1495 ph10 644 bufflength = 2*bufthird +
1496     gzread (ingz, main_buffer + 2*bufthird, bufthird);
1497 ph10 286 else
1498     #endif
1499    
1500     #ifdef SUPPORT_LIBBZ2
1501     if (frtype == FR_LIBBZ2)
1502 ph10 644 bufflength = 2*bufthird +
1503     BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1504 ph10 286 else
1505     #endif
1506    
1507 ph10 644 bufflength = 2*bufthird +
1508 ph10 535 (input_line_buffered?
1509 ph10 644 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1510     fread(main_buffer + 2*bufthird, 1, bufthird, in));
1511     endptr = main_buffer + bufflength;
1512 nigel 77
1513     /* Adjust any last match point */
1514    
1515 ph10 644 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1516 nigel 77 }
1517     } /* Loop through the whole file */
1518    
1519     /* End of file; print final "after" lines if wanted; do_after_lines sets
1520     hyphenpending if it prints something. */
1521    
1522 ph10 565 if (only_matching < 0 && !count_only)
1523 nigel 87 {
1524     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1525     hyphenpending |= endhyphenpending;
1526     }
1527 nigel 77
1528     /* Print the file name if we are looking for those without matches and there
1529     were none. If we found a match, we won't have got this far. */
1530    
1531 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1532 nigel 77 {
1533     fprintf(stdout, "%s\n", printname);
1534     return 0;
1535 nigel 49 }
1536    
1537 nigel 77 /* Print the match count if wanted */
1538    
1539 nigel 49 if (count_only)
1540     {
1541 ph10 420 if (count > 0 || !omit_zero_count)
1542 ph10 461 {
1543     if (printname != NULL && filenames != FN_NONE)
1544 ph10 420 fprintf(stdout, "%s:", printname);
1545     fprintf(stdout, "%d\n", count);
1546 ph10 461 }
1547 nigel 49 }
1548    
1549     return rc;
1550     }
1551    
1552    
1553    
1554     /*************************************************
1555 nigel 53 * Grep a file or recurse into a directory *
1556     *************************************************/
1557    
1558 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1559     recursing; if it's a file, grep it.
1560    
1561     Arguments:
1562     pathname the path to investigate
1563 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1564 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1565    
1566     Returns: 0 if there was at least one match
1567     1 if there were no matches
1568     2 there was some kind of error
1569    
1570     However, file opening failures are suppressed if "silent" is set.
1571     */
1572    
1573 nigel 53 static int
1574 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1575 nigel 53 {
1576     int rc = 1;
1577     int sep;
1578 ph10 286 int frtype;
1579     void *handle;
1580     FILE *in = NULL; /* Ensure initialized */
1581 nigel 53
1582 ph10 286 #ifdef SUPPORT_LIBZ
1583     gzFile ingz = NULL;
1584     #endif
1585    
1586     #ifdef SUPPORT_LIBBZ2
1587     BZFILE *inbz2 = NULL;
1588     #endif
1589    
1590 ph10 879 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBZ2
1591     int pathlen;
1592     #endif
1593    
1594 nigel 77 /* If the file name is "-" we scan stdin */
1595 nigel 53
1596 nigel 77 if (strcmp(pathname, "-") == 0)
1597 nigel 53 {
1598 ph10 644 return pcregrep(stdin, FR_PLAIN, stdin_name,
1599 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1600 nigel 77 stdin_name : NULL);
1601     }
1602    
1603 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1604 ph10 325 each file and directory within it, subject to any include or exclude patterns
1605     that were set. The scanning code is localized so it can be made
1606     system-specific. */
1607 nigel 87
1608     if ((sep = isdirectory(pathname)) != 0)
1609 nigel 77 {
1610 nigel 87 if (dee_action == dee_SKIP) return 1;
1611     if (dee_action == dee_RECURSE)
1612 nigel 53 {
1613 nigel 87 char buffer[1024];
1614     char *nextfile;
1615     directory_type *dir = opendirectory(pathname);
1616 nigel 53
1617 nigel 87 if (dir == NULL)
1618     {
1619     if (!silent)
1620     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1621     strerror(errno));
1622     return 2;
1623     }
1624 nigel 77
1625 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1626     {
1627 ph10 324 int frc, nflen;
1628 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1629 ph10 530 nflen = (int)(strlen(nextfile));
1630 ph10 345
1631 ph10 325 if (isdirectory(buffer))
1632     {
1633     if (exclude_dir_compiled != NULL &&
1634     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1635     continue;
1636 ph10 345
1637 ph10 325 if (include_dir_compiled != NULL &&
1638     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1639     continue;
1640     }
1641 ph10 345 else
1642     {
1643 ph10 324 if (exclude_compiled != NULL &&
1644     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1645     continue;
1646 ph10 345
1647 ph10 324 if (include_compiled != NULL &&
1648     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1649     continue;
1650 ph10 345 }
1651 nigel 77
1652 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1653     if (frc > 1) rc = frc;
1654     else if (frc == 0 && rc == 1) rc = 0;
1655     }
1656    
1657     closedirectory(dir);
1658     return rc;
1659 nigel 53 }
1660     }
1661    
1662 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1663     been requested. */
1664 nigel 53
1665 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1666    
1667     /* Control reaches here if we have a regular file, or if we have a directory
1668     and recursion or skipping was not requested, or if we have anything else and
1669     skipping was not requested. The scan proceeds. If this is the first and only
1670     argument at top level, we don't show the file name, unless we are only showing
1671     the file name, or the filename was forced (-H). */
1672    
1673 ph10 879 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBZ2
1674 ph10 530 pathlen = (int)(strlen(pathname));
1675 ph10 879 #endif
1676 ph10 286
1677     /* Open using zlib if it is supported and the file name ends with .gz. */
1678    
1679     #ifdef SUPPORT_LIBZ
1680     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1681 nigel 53 {
1682 ph10 286 ingz = gzopen(pathname, "rb");
1683     if (ingz == NULL)
1684     {
1685     if (!silent)
1686     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1687     strerror(errno));
1688     return 2;
1689     }
1690     handle = (void *)ingz;
1691     frtype = FR_LIBZ;
1692     }
1693     else
1694     #endif
1695    
1696     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1697    
1698     #ifdef SUPPORT_LIBBZ2
1699     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1700     {
1701     inbz2 = BZ2_bzopen(pathname, "rb");
1702     handle = (void *)inbz2;
1703     frtype = FR_LIBBZ2;
1704     }
1705     else
1706     #endif
1707    
1708     /* Otherwise use plain fopen(). The label is so that we can come back here if
1709     an attempt to read a .bz2 file indicates that it really is a plain file. */
1710    
1711     #ifdef SUPPORT_LIBBZ2
1712     PLAIN_FILE:
1713     #endif
1714     {
1715 ph10 419 in = fopen(pathname, "rb");
1716 ph10 286 handle = (void *)in;
1717     frtype = FR_PLAIN;
1718     }
1719    
1720     /* All the opening methods return errno when they fail. */
1721    
1722     if (handle == NULL)
1723     {
1724 nigel 77 if (!silent)
1725     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1726     strerror(errno));
1727 nigel 53 return 2;
1728     }
1729    
1730 ph10 286 /* Now grep the file */
1731    
1732 ph10 644 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1733 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1734 nigel 77
1735 ph10 286 /* Close in an appropriate manner. */
1736    
1737     #ifdef SUPPORT_LIBZ
1738     if (frtype == FR_LIBZ)
1739     gzclose(ingz);
1740     else
1741     #endif
1742    
1743 ph10 644 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1744 ph10 286 read failed. If the error indicates that the file isn't in fact bzipped, try
1745     again as a normal file. */
1746    
1747     #ifdef SUPPORT_LIBBZ2
1748     if (frtype == FR_LIBBZ2)
1749     {
1750 ph10 644 if (rc == 3)
1751 ph10 286 {
1752     int errnum;
1753     const char *err = BZ2_bzerror(inbz2, &errnum);
1754     if (errnum == BZ_DATA_ERROR_MAGIC)
1755     {
1756     BZ2_bzclose(inbz2);
1757     goto PLAIN_FILE;
1758     }
1759     else if (!silent)
1760     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1761     pathname, err);
1762 ph10 654 rc = 2; /* The normal "something went wrong" code */
1763 ph10 286 }
1764     BZ2_bzclose(inbz2);
1765     }
1766     else
1767     #endif
1768    
1769     /* Normal file close */
1770    
1771 nigel 53 fclose(in);
1772 ph10 286
1773     /* Pass back the yield from pcregrep(). */
1774    
1775 nigel 53 return rc;
1776     }
1777    
1778    
1779    
1780    
1781     /*************************************************
1782 nigel 49 * Usage function *
1783     *************************************************/
1784    
1785     static int
1786     usage(int rc)
1787     {
1788 nigel 87 option_item *op;
1789     fprintf(stderr, "Usage: pcregrep [-");
1790     for (op = optionlist; op->one_char != 0; op++)
1791     {
1792     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1793     }
1794     fprintf(stderr, "] [long options] [pattern] [files]\n");
1795 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1796     "options.\n");
1797 nigel 49 return rc;
1798     }
1799    
1800    
1801    
1802    
1803     /*************************************************
1804 nigel 53 * Help function *
1805     *************************************************/
1806    
1807     static void
1808     help(void)
1809     {
1810     option_item *op;
1811    
1812 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1813 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1814 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1815 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1816    
1817     #ifdef SUPPORT_LIBZ
1818     printf("Files whose names end in .gz are read using zlib.\n");
1819     #endif
1820    
1821     #ifdef SUPPORT_LIBBZ2
1822     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1823     #endif
1824    
1825     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1826     printf("Other files and the standard input are read as plain files.\n\n");
1827     #else
1828     printf("All files are read as plain files, without any interpretation.\n\n");
1829     #endif
1830    
1831 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1832     printf("Options:\n");
1833    
1834     for (op = optionlist; op->one_char != 0; op++)
1835     {
1836     int n;
1837     char s[4];
1838 ph10 579
1839 ph10 571 /* Two options were accidentally implemented and documented with underscores
1840     instead of hyphens in their names, something that was not noticed for quite a
1841 ph10 579 few releases. When fixing this, I left the underscored versions in the list
1842     in case people were using them. However, we don't want to display them in the
1843     help data. There are no other options that contain underscores, and we do not
1844     expect ever to implement such options. Therefore, just omit any option that
1845 ph10 571 contains an underscore. */
1846 ph10 579
1847     if (strchr(op->long_name, '_') != NULL) continue;
1848    
1849 nigel 53 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1850 ph10 571 n = 31 - printf(" %s --%s", s, op->long_name);
1851 nigel 53 if (n < 1) n = 1;
1852 ph10 571 printf("%.*s%s\n", n, " ", op->help_text);
1853 nigel 53 }
1854    
1855 ph10 654 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1856 ph10 644 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1857 ph10 944 printf("When reading patterns or file names from a file, trailing white\n");
1858     printf("space is removed and blank lines are ignored.\n");
1859 ph10 654 printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1860 ph10 644 MAX_PATTERN_COUNT, PATBUFSIZE);
1861 nigel 53
1862 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1863 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1864     }
1865    
1866    
1867    
1868    
1869     /*************************************************
1870 nigel 77 * Handle a single-letter, no data option *
1871 nigel 53 *************************************************/
1872    
1873     static int
1874     handle_option(int letter, int options)
1875     {
1876     switch(letter)
1877     {
1878 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1879 ph10 561 case N_HELP: help(); pcregrep_exit(0);
1880 ph10 685 case N_LBUFFER: line_buffered = TRUE; break;
1881 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1882 ph10 691 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
1883 nigel 53 case 'c': count_only = TRUE; break;
1884 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1885     case 'H': filenames = FN_FORCE; break;
1886     case 'h': filenames = FN_NONE; break;
1887 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1888 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1889 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
1890 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1891 nigel 53 case 'n': number = TRUE; break;
1892 ph10 565 case 'o': only_matching = 0; break;
1893 nigel 77 case 'q': quiet = TRUE; break;
1894 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1895 nigel 53 case 's': silent = TRUE; break;
1896 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1897 nigel 53 case 'v': invert = TRUE; break;
1898 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1899     case 'x': process_options |= PO_LINE_MATCH; break;
1900 nigel 53
1901     case 'V':
1902 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1903 ph10 561 pcregrep_exit(0);
1904 nigel 53 break;
1905    
1906     default:
1907     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1908 ph10 561 pcregrep_exit(usage(2));
1909 nigel 53 }
1910    
1911     return options;
1912     }
1913    
1914    
1915    
1916    
1917     /*************************************************
1918 nigel 87 * Construct printed ordinal *
1919     *************************************************/
1920    
1921     /* This turns a number into "1st", "3rd", etc. */
1922    
1923     static char *
1924     ordin(int n)
1925     {
1926     static char buffer[8];
1927     char *p = buffer;
1928     sprintf(p, "%d", n);
1929     while (*p != 0) p++;
1930     switch (n%10)
1931     {
1932     case 1: strcpy(p, "st"); break;
1933     case 2: strcpy(p, "nd"); break;
1934     case 3: strcpy(p, "rd"); break;
1935     default: strcpy(p, "th"); break;
1936     }
1937     return buffer;
1938     }
1939    
1940    
1941    
1942     /*************************************************
1943     * Compile a single pattern *
1944     *************************************************/
1945    
1946     /* When the -F option has been used, this is called for each substring.
1947     Otherwise it's called for each supplied pattern.
1948    
1949     Arguments:
1950     pattern the pattern string
1951     options the PCRE options
1952     filename the file name, or NULL for a command-line pattern
1953     count 0 if this is the only command line pattern, or
1954     number of the command line pattern, or
1955     linenumber for a pattern from a file
1956    
1957     Returns: TRUE on success, FALSE after an error
1958     */
1959    
1960     static BOOL
1961     compile_single_pattern(char *pattern, int options, char *filename, int count)
1962     {
1963 ph10 644 char buffer[PATBUFSIZE];
1964 nigel 87 const char *error;
1965     int errptr;
1966    
1967     if (pattern_count >= MAX_PATTERN_COUNT)
1968     {
1969     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1970     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1971     return FALSE;
1972     }
1973    
1974 ph10 644 sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1975 nigel 87 suffix[process_options]);
1976     pattern_list[pattern_count] =
1977     pcre_compile(buffer, options, &error, &errptr, pcretables);
1978 ph10 142 if (pattern_list[pattern_count] != NULL)
1979 ph10 141 {
1980 ph10 142 pattern_count++;
1981 ph10 141 return TRUE;
1982 ph10 142 }
1983 nigel 87
1984     /* Handle compile errors */
1985    
1986     errptr -= (int)strlen(prefix[process_options]);
1987     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1988    
1989     if (filename == NULL)
1990     {
1991     if (count == 0)
1992     fprintf(stderr, "pcregrep: Error in command-line regex "
1993     "at offset %d: %s\n", errptr, error);
1994     else
1995     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1996     "at offset %d: %s\n", ordin(count), errptr, error);
1997     }
1998     else
1999     {
2000     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2001     "at offset %d: %s\n", count, filename, errptr, error);
2002     }
2003    
2004     return FALSE;
2005     }
2006    
2007    
2008    
2009     /*************************************************
2010     * Compile one supplied pattern *
2011     *************************************************/
2012    
2013     /* When the -F option has been used, each string may be a list of strings,
2014 nigel 91 separated by line breaks. They will be matched literally.
2015 nigel 87
2016     Arguments:
2017     pattern the pattern string
2018     options the PCRE options
2019     filename the file name, or NULL for a command-line pattern
2020     count 0 if this is the only command line pattern, or
2021     number of the command line pattern, or
2022     linenumber for a pattern from a file
2023    
2024     Returns: TRUE on success, FALSE after an error
2025     */
2026    
2027     static BOOL
2028     compile_pattern(char *pattern, int options, char *filename, int count)
2029     {
2030     if ((process_options & PO_FIXED_STRINGS) != 0)
2031     {
2032 nigel 93 char *eop = pattern + strlen(pattern);
2033 ph10 644 char buffer[PATBUFSIZE];
2034 nigel 87 for(;;)
2035     {
2036 nigel 93 int ellength;
2037     char *p = end_of_line(pattern, eop, &ellength);
2038     if (ellength == 0)
2039 nigel 87 return compile_single_pattern(pattern, options, filename, count);
2040 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2041 nigel 93 pattern = p;
2042 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
2043     return FALSE;
2044     }
2045     }
2046     else return compile_single_pattern(pattern, options, filename, count);
2047     }
2048    
2049    
2050    
2051     /*************************************************
2052 nigel 49 * Main program *
2053     *************************************************/
2054    
2055 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2056    
2057 nigel 49 int
2058     main(int argc, char **argv)
2059     {
2060 nigel 53 int i, j;
2061 nigel 49 int rc = 1;
2062 nigel 87 int pcre_options = 0;
2063     int cmd_pattern_count = 0;
2064 ph10 141 int hint_count = 0;
2065 nigel 49 int errptr;
2066 nigel 87 BOOL only_one_at_top;
2067     char *patterns[MAX_PATTERN_COUNT];
2068     const char *locale_from = "--locale";
2069 nigel 49 const char *error;
2070    
2071 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
2072     pcre_jit_stack *jit_stack = NULL;
2073     #endif
2074    
2075 nigel 93 /* Set the default line ending value from the default in the PCRE library;
2076     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2077 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
2078 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
2079 ph10 391 rather than escapes such as as '\r'. */
2080 nigel 91
2081     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2082     switch(i)
2083     {
2084 ph10 391 default: newline = (char *)"lf"; break;
2085     case 13: newline = (char *)"cr"; break;
2086     case (13 << 8) | 10: newline = (char *)"crlf"; break;
2087     case -1: newline = (char *)"any"; break;
2088     case -2: newline = (char *)"anycrlf"; break;
2089 nigel 91 }
2090    
2091 nigel 49 /* Process the options */
2092    
2093     for (i = 1; i < argc; i++)
2094     {
2095 nigel 77 option_item *op = NULL;
2096     char *option_data = (char *)""; /* default to keep compiler happy */
2097     BOOL longop;
2098     BOOL longopwasequals = FALSE;
2099    
2100 nigel 49 if (argv[i][0] != '-') break;
2101 nigel 53
2102 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2103 nigel 87 but only if we have previously had -e or -f to define the patterns. */
2104 nigel 63
2105 nigel 77 if (argv[i][1] == 0)
2106     {
2107 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
2108 ph10 561 else pcregrep_exit(usage(2));
2109 nigel 77 }
2110 nigel 63
2111 nigel 77 /* Handle a long name option, or -- to terminate the options */
2112 nigel 53
2113     if (argv[i][1] == '-')
2114 nigel 49 {
2115 nigel 77 char *arg = argv[i] + 2;
2116     char *argequals = strchr(arg, '=');
2117 nigel 53
2118 nigel 77 if (*arg == 0) /* -- terminates options */
2119 nigel 49 {
2120 nigel 77 i++;
2121     break; /* out of the options-handling loop */
2122 nigel 53 }
2123 nigel 49
2124 nigel 77 longop = TRUE;
2125    
2126     /* Some long options have data that follows after =, for example file=name.
2127     Some options have variations in the long name spelling: specifically, we
2128     allow "regexp" because GNU grep allows it, though I personally go along
2129 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2130 ph10 422 These options are entered in the table as "regex(p)". Options can be in
2131     both these categories. */
2132 nigel 77
2133 nigel 53 for (op = optionlist; op->one_char != 0; op++)
2134     {
2135 nigel 77 char *opbra = strchr(op->long_name, '(');
2136     char *equals = strchr(op->long_name, '=');
2137 ph10 461
2138 ph10 422 /* Handle options with only one spelling of the name */
2139 ph10 461
2140 ph10 422 if (opbra == NULL) /* Does not contain '(' */
2141 nigel 53 {
2142 nigel 77 if (equals == NULL) /* Not thing=data case */
2143     {
2144     if (strcmp(arg, op->long_name) == 0) break;
2145     }
2146     else /* Special case xxx=data */
2147     {
2148 ph10 530 int oplen = (int)(equals - op->long_name);
2149 ph10 535 int arglen = (argequals == NULL)?
2150 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2151 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2152     {
2153     option_data = arg + arglen;
2154     if (*option_data == '=')
2155     {
2156     option_data++;
2157     longopwasequals = TRUE;
2158     }
2159     break;
2160     }
2161     }
2162 nigel 53 }
2163 ph10 461
2164 ph10 422 /* Handle options with an alternate spelling of the name */
2165 ph10 461
2166     else
2167 nigel 77 {
2168     char buff1[24];
2169     char buff2[24];
2170 ph10 461
2171 ph10 530 int baselen = (int)(opbra - op->long_name);
2172     int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2173 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
2174 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2175 ph10 461
2176 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
2177 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2178 ph10 461
2179     if (strncmp(arg, buff1, arglen) == 0 ||
2180 ph10 422 strncmp(arg, buff2, arglen) == 0)
2181     {
2182     if (equals != NULL && argequals != NULL)
2183     {
2184 ph10 461 option_data = argequals;
2185 ph10 422 if (*option_data == '=')
2186     {
2187 ph10 461 option_data++;
2188 ph10 422 longopwasequals = TRUE;
2189 ph10 461 }
2190     }
2191 nigel 77 break;
2192 ph10 461 }
2193 nigel 77 }
2194 nigel 53 }
2195 nigel 77
2196 nigel 53 if (op->one_char == 0)
2197     {
2198     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2199 ph10 561 pcregrep_exit(usage(2));
2200 nigel 53 }
2201     }
2202 nigel 49
2203 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2204     are not in the right form for putting in the option table because they use
2205     only one hyphen, yet are more than one character long. By putting them
2206     separately here, they will not get displayed as part of the help() output,
2207     but I don't think Jeffrey will care about that. */
2208    
2209     #ifdef JFRIEDL_DEBUG
2210     else if (strcmp(argv[i], "-pre") == 0) {
2211     jfriedl_prefix = argv[++i];
2212     continue;
2213     } else if (strcmp(argv[i], "-post") == 0) {
2214     jfriedl_postfix = argv[++i];
2215     continue;
2216     } else if (strcmp(argv[i], "-XT") == 0) {
2217     sscanf(argv[++i], "%d", &jfriedl_XT);
2218     continue;
2219     } else if (strcmp(argv[i], "-XR") == 0) {
2220     sscanf(argv[++i], "%d", &jfriedl_XR);
2221     continue;
2222     }
2223     #endif
2224    
2225    
2226 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2227     continue till we hit the last one or one that needs data. */
2228 nigel 53
2229     else
2230     {
2231     char *s = argv[i] + 1;
2232 nigel 77 longop = FALSE;
2233 nigel 53 while (*s != 0)
2234     {
2235 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2236 ph10 579 {
2237     if (*s == op->one_char) break;
2238 ph10 565 }
2239 nigel 77 if (op->one_char == 0)
2240 nigel 53 {
2241 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2242     *s, argv[i]);
2243 ph10 561 pcregrep_exit(usage(2));
2244 nigel 77 }
2245 ph10 579
2246 ph10 565 /* Check for a single-character option that has data: OP_OP_NUMBER
2247 ph10 579 is used for one that either has a numerical number or defaults, i.e. the
2248 ph10 565 data is optional. If a digit follows, there is data; if not, carry on
2249     with other single-character options in the same string. */
2250 ph10 579
2251 ph10 565 option_data = s+1;
2252     if (op->type == OP_OP_NUMBER)
2253 ph10 579 {
2254     if (isdigit((unsigned char)s[1])) break;
2255 nigel 53 }
2256 ph10 565 else /* Check for end or a dataless option */
2257 ph10 579 {
2258 ph10 565 if (op->type != OP_NODATA || s[1] == 0) break;
2259 ph10 579 }
2260    
2261     /* Handle a single-character option with no data, then loop for the
2262 ph10 565 next character in the string. */
2263    
2264 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2265 nigel 49 }
2266     }
2267 nigel 77
2268 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2269     is NO_DATA, it means that there is no data, and the option might set
2270     something in the PCRE options. */
2271 nigel 77
2272     if (op->type == OP_NODATA)
2273     {
2274 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2275     continue;
2276     }
2277    
2278     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2279     either has a value or defaults to something. It cannot have data in a
2280 ph10 579 separate item. At the moment, the only such options are "colo(u)r",
2281 ph10 565 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2282 nigel 87
2283     if (*option_data == 0 &&
2284     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2285     {
2286     switch (op->one_char)
2287 nigel 77 {
2288 nigel 87 case N_COLOUR:
2289     colour_option = (char *)"auto";
2290     break;
2291 ph10 579
2292 ph10 565 case 'o':
2293     only_matching = 0;
2294 ph10 579 break;
2295    
2296 nigel 87 #ifdef JFRIEDL_DEBUG
2297     case 'S':
2298     S_arg = 0;
2299     break;
2300     #endif
2301 nigel 77 }
2302 nigel 87 continue;
2303     }
2304 nigel 77
2305 nigel 87 /* Otherwise, find the data string for the option. */
2306    
2307     if (*option_data == 0)
2308     {
2309     if (i >= argc - 1 || longopwasequals)
2310 nigel 77 {
2311 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2312 ph10 561 pcregrep_exit(usage(2));
2313 nigel 87 }
2314     option_data = argv[++i];
2315     }
2316    
2317     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2318     multiple times to create a list of patterns. */
2319    
2320     if (op->type == OP_PATLIST)
2321     {
2322     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2323     {
2324     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2325     MAX_PATTERN_COUNT);
2326     return 2;
2327     }
2328     patterns[cmd_pattern_count++] = option_data;
2329     }
2330    
2331     /* Otherwise, deal with single string or numeric data values. */
2332    
2333 ph10 584 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2334     op->type != OP_OP_NUMBER)
2335 nigel 87 {
2336     *((char **)op->dataptr) = option_data;
2337     }
2338 ph10 558
2339     /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2340     only for unpicking arguments, so just keep it simple. */
2341    
2342 nigel 87 else
2343     {
2344 ph10 561 unsigned long int n = 0;
2345 ph10 558 char *endptr = option_data;
2346     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2347     while (isdigit((unsigned char)(*endptr)))
2348     n = n * 10 + (int)(*endptr++ - '0');
2349 ph10 644 if (toupper(*endptr) == 'K')
2350     {
2351 ph10 654 n *= 1024;
2352     endptr++;
2353     }
2354 ph10 644 else if (toupper(*endptr) == 'M')
2355     {
2356 ph10 654 n *= 1024*1024;
2357     endptr++;
2358     }
2359 nigel 87 if (*endptr != 0)
2360     {
2361     if (longop)
2362 nigel 77 {
2363 nigel 87 char *equals = strchr(op->long_name, '=');
2364     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2365 ph10 530 (int)(equals - op->long_name);
2366 nigel 87 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2367     option_data, nlen, op->long_name);
2368 nigel 77 }
2369 nigel 87 else
2370     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2371     option_data, op->one_char);
2372 ph10 561 pcregrep_exit(usage(2));
2373 nigel 77 }
2374 ph10 584 if (op->type == OP_LONGNUMBER)
2375     *((unsigned long int *)op->dataptr) = n;
2376     else
2377     *((int *)op->dataptr) = n;
2378 nigel 77 }
2379 nigel 49 }
2380    
2381 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2382     for -A and -B. */
2383    
2384     if (both_context > 0)
2385     {
2386     if (after_context == 0) after_context = both_context;
2387     if (before_context == 0) before_context = both_context;
2388     }
2389 ph10 286
2390     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2391 ph10 565 However, the latter two set only_matching. */
2392 nigel 77
2393 ph10 565 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2394 ph10 286 (file_offsets && line_offsets))
2395 ph10 280 {
2396     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2397     "and/or --line-offsets\n");
2398 ph10 561 pcregrep_exit(usage(2));
2399 ph10 280 }
2400    
2401 ph10 565 if (file_offsets || line_offsets) only_matching = 0;
2402 ph10 286
2403 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2404     LC_ALL environment variable is set, and if so, use it. */
2405 nigel 49
2406 nigel 87 if (locale == NULL)
2407 nigel 53 {
2408 nigel 87 locale = getenv("LC_ALL");
2409     locale_from = "LCC_ALL";
2410 nigel 53 }
2411 nigel 49
2412 nigel 87 if (locale == NULL)
2413     {
2414     locale = getenv("LC_CTYPE");
2415     locale_from = "LC_CTYPE";
2416     }
2417 nigel 49
2418 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2419     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2420    
2421     if (locale != NULL)
2422 nigel 49 {
2423 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2424 nigel 53 {
2425 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2426     locale, locale_from);
2427 nigel 53 return 2;
2428     }
2429 nigel 87 pcretables = pcre_maketables();
2430     }
2431 nigel 77
2432 nigel 87 /* Sort out colouring */
2433    
2434     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2435     {
2436     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2437     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2438     else
2439 nigel 53 {
2440 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2441     colour_option);
2442     return 2;
2443 nigel 77 }
2444 nigel 87 if (do_colour)
2445 nigel 77 {
2446 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2447     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2448     if (cs != NULL) colour_string = cs;
2449 nigel 77 }
2450 nigel 87 }
2451 ph10 535
2452 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2453    
2454     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2455     {
2456     pcre_options |= PCRE_NEWLINE_CR;
2457 nigel 93 endlinetype = EL_CR;
2458 nigel 91 }
2459     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2460     {
2461     pcre_options |= PCRE_NEWLINE_LF;
2462 nigel 93 endlinetype = EL_LF;
2463 nigel 91 }
2464     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2465     {
2466     pcre_options |= PCRE_NEWLINE_CRLF;
2467 nigel 93 endlinetype = EL_CRLF;
2468 nigel 91 }
2469 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2470     {
2471     pcre_options |= PCRE_NEWLINE_ANY;
2472     endlinetype = EL_ANY;
2473     }
2474 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2475     {
2476     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2477     endlinetype = EL_ANYCRLF;
2478     }
2479 nigel 91 else
2480     {
2481     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2482     return 2;
2483     }
2484    
2485 nigel 87 /* Interpret the text values for -d and -D */
2486    
2487     if (dee_option != NULL)
2488     {
2489     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2490     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2491     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2492     else
2493 nigel 77 {
2494 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2495     return 2;
2496 nigel 53 }
2497 nigel 49 }
2498    
2499 nigel 87 if (DEE_option != NULL)
2500     {
2501     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2502     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2503     else
2504     {
2505     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2506     return 2;
2507     }
2508     }
2509 nigel 49
2510 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2511 nigel 87
2512     #ifdef JFRIEDL_DEBUG
2513     if (S_arg > 9)
2514 nigel 49 {
2515 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2516     return 2;
2517     }
2518 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2519     {
2520     if (jfriedl_XT == 0) jfriedl_XT = 1;
2521     if (jfriedl_XR == 0) jfriedl_XR = 1;
2522     }
2523 nigel 87 #endif
2524 nigel 77
2525 ph10 644 /* Get memory for the main buffer, and to store the pattern and hints lists. */
2526 nigel 87
2527 ph10 644 bufsize = 3*bufthird;
2528     main_buffer = (char *)malloc(bufsize);
2529 nigel 87 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2530     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2531    
2532 ph10 644 if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2533 nigel 87 {
2534     fprintf(stderr, "pcregrep: malloc failed\n");
2535 ph10 123 goto EXIT2;
2536 nigel 87 }
2537    
2538     /* If no patterns were provided by -e, and there is no file provided by -f,
2539     the first argument is the one and only pattern, and it must exist. */
2540    
2541     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2542     {
2543 nigel 63 if (i >= argc) return usage(2);
2544 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2545     }
2546 nigel 77
2547 nigel 87 /* Compile the patterns that were provided on the command line, either by
2548     multiple uses of -e or as a single unkeyed pattern. */
2549    
2550     for (j = 0; j < cmd_pattern_count; j++)
2551     {
2552     if (!compile_pattern(patterns[j], pcre_options, NULL,
2553     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2554 ph10 123 goto EXIT2;
2555 nigel 87 }
2556    
2557     /* Compile the regular expressions that are provided in a file. */
2558    
2559     if (pattern_filename != NULL)
2560     {
2561     int linenumber = 0;
2562     FILE *f;
2563     char *filename;
2564 ph10 644 char buffer[PATBUFSIZE];
2565 nigel 87
2566     if (strcmp(pattern_filename, "-") == 0)
2567 nigel 77 {
2568 nigel 87 f = stdin;
2569     filename = stdin_name;
2570 nigel 77 }
2571 nigel 87 else
2572 nigel 77 {
2573 nigel 87 f = fopen(pattern_filename, "r");
2574     if (f == NULL)
2575     {
2576     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2577     strerror(errno));
2578 ph10 123 goto EXIT2;
2579 nigel 87 }
2580     filename = pattern_filename;
2581 nigel 77 }
2582    
2583 ph10 644 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2584 nigel 53 {
2585 nigel 87 char *s = buffer + (int)strlen(buffer);
2586     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2587     *s = 0;
2588     linenumber++;
2589     if (buffer[0] == 0) continue; /* Skip blank lines */
2590     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2591 ph10 121 goto EXIT2;
2592 nigel 53 }
2593 nigel 87
2594     if (f != stdin) fclose(f);
2595 nigel 49 }
2596    
2597 ph10 691 /* Study the regular expressions, as we will be running them many times. Unless
2598 ph10 685 JIT has been explicitly disabled, arrange a stack for it to use. */
2599 nigel 53
2600 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
2601     if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2602     jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
2603 ph10 691 #endif
2604    
2605 nigel 53 for (j = 0; j < pattern_count; j++)
2606     {
2607 ph10 667 hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2608 nigel 53 if (error != NULL)
2609     {
2610     char s[16];
2611     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2612     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2613 ph10 121 goto EXIT2;
2614 nigel 53 }
2615 ph10 142 hint_count++;
2616 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
2617 ph10 691 if (jit_stack != NULL && hints_list[j] != NULL)
2618 ph10 685 pcre_assign_jit_stack(hints_list[j], NULL, jit_stack);
2619     #endif
2620 nigel 53 }
2621 ph10 579
2622 ph10 561 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2623     pcre_extra block for each pattern. */
2624 nigel 53
2625 ph10 561 if (match_limit > 0 || match_limit_recursion > 0)
2626     {
2627     for (j = 0; j < pattern_count; j++)
2628     {
2629     if (hints_list[j] == NULL)
2630     {
2631     hints_list[j] = malloc(sizeof(pcre_extra));
2632 ph10 579 if (hints_list[j] == NULL)
2633 ph10 561 {
2634     fprintf(stderr, "pcregrep: malloc failed\n");
2635     pcregrep_exit(2);
2636     }
2637     }
2638     if (match_limit > 0)
2639 ph10 579 {
2640 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2641     hints_list[j]->match_limit = match_limit;
2642 ph10 579 }
2643 ph10 561 if (match_limit_recursion > 0)
2644 ph10 579 {
2645 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2646     hints_list[j]->match_limit_recursion = match_limit_recursion;
2647 ph10 579 }
2648 ph10 561 }
2649 ph10 579 }
2650 ph10 561
2651 nigel 77 /* If there are include or exclude patterns, compile them. */
2652    
2653     if (exclude_pattern != NULL)
2654     {
2655 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2656     pcretables);
2657 nigel 77 if (exclude_compiled == NULL)
2658     {
2659     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2660     errptr, error);
2661 ph10 121 goto EXIT2;
2662 nigel 77 }
2663     }
2664    
2665     if (include_pattern != NULL)
2666     {
2667 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2668     pcretables);
2669 nigel 77 if (include_compiled == NULL)
2670     {
2671     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2672     errptr, error);
2673 ph10 121 goto EXIT2;
2674 nigel 77 }
2675     }
2676    
2677 ph10 325 if (exclude_dir_pattern != NULL)
2678     {
2679     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2680     pcretables);
2681     if (exclude_dir_compiled == NULL)
2682     {
2683     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2684     errptr, error);
2685     goto EXIT2;
2686     }
2687     }
2688    
2689     if (include_dir_pattern != NULL)
2690     {
2691     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2692     pcretables);
2693     if (include_dir_compiled == NULL)
2694     {
2695     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2696     errptr, error);
2697     goto EXIT2;
2698     }
2699     }
2700 ph10 944
2701     /* If a file that contains a list of files to search has been specified, read
2702     it line by line and search the given files. Otherwise, if there are no further
2703     arguments, do the business on stdin and exit. */
2704 ph10 325
2705 ph10 944 if (file_list != NULL)
2706     {
2707     char buffer[PATBUFSIZE];
2708     FILE *fl;
2709     if (strcmp(file_list, "-") == 0) fl = stdin; else
2710     {
2711     fl = fopen(file_list, "rb");
2712     if (fl == NULL)
2713     {
2714     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", file_list,
2715     strerror(errno));
2716     goto EXIT2;
2717     }
2718     }
2719     while (fgets(buffer, PATBUFSIZE, fl) != NULL)
2720     {
2721     int frc;
2722     char *end = buffer + (int)strlen(buffer);
2723     while (end > buffer && isspace(end[-1])) end--;
2724     *end = 0;
2725     if (*buffer != 0)
2726     {
2727     frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
2728     if (frc > 1) rc = frc;
2729     else if (frc == 0 && rc == 1) rc = 0;
2730     }
2731     }
2732     if (fl != stdin) fclose (fl);
2733     }
2734 nigel 49
2735 ph10 944 /* Do this only if there was no file list (and no file arguments). */
2736    
2737     else if (i >= argc)
2738 ph10 121 {
2739 ph10 654 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2740 ph10 644 (filenames > FN_DEFAULT)? stdin_name : NULL);
2741 ph10 121 goto EXIT;
2742 ph10 123 }
2743 nigel 49
2744 ph10 944 /* After handling file-list or if there are remaining arguments, work through
2745     them as files or directories. Pass in the fact that there is only one argument
2746     at top level - this suppresses the file name if the argument is not a directory
2747     and filenames are not otherwise forced. */
2748 nigel 49
2749 ph10 944 only_one_at_top = i == argc - 1 && file_list == NULL;
2750 nigel 49
2751     for (; i < argc; i++)
2752     {
2753 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2754     only_one_at_top);
2755 nigel 77 if (frc > 1) rc = frc;
2756     else if (frc == 0 && rc == 1) rc = 0;
2757 nigel 49 }
2758    
2759 ph10 121 EXIT:
2760 ph10 685 #ifdef SUPPORT_PCREGREP_JIT
2761     if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2762     #endif
2763 ph10 644 if (main_buffer != NULL) free(main_buffer);
2764 ph10 121 if (pattern_list != NULL)
2765     {
2766 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2767 ph10 121 free(pattern_list);
2768 ph10 123 }
2769 ph10 121 if (hints_list != NULL)
2770     {
2771 ph10 579 for (i = 0; i < hint_count; i++)
2772 ph10 561 {
2773 ph10 667 if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2774 ph10 579 }
2775 ph10 121 free(hints_list);
2776 ph10 123 }
2777 ph10 561 pcregrep_exit(rc);
2778 ph10 121
2779     EXIT2:
2780     rc = 2;
2781     goto EXIT;
2782 nigel 49 }
2783    
2784 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12