/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 667 - (hide annotations) (download)
Mon Aug 22 14:57:32 2011 UTC (3 years ago) by ph10
File MIME type: text/plain
File size: 78994 byte(s)
Commit all the changes for JIT support, but without any documentation yet.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 584 Copyright (c) 1997-2011 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 ph10 378 #define OFFSET_SIZE 99
75 nigel 49
76 nigel 77 #if BUFSIZ > 8192
77 ph10 644 #define PATBUFSIZE BUFSIZ
78 nigel 77 #else
79 ph10 644 #define PATBUFSIZE 8192
80 nigel 77 #endif
81 nigel 49
82 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
83     output. The order is important; it is assumed that a file name is wanted for
84     all values greater than FN_DEFAULT. */
85 nigel 77
86 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87 nigel 87
88 ph10 286 /* File reading styles */
89    
90     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92 nigel 87 /* Actions for the -d and -D options */
93    
94     enum { dee_READ, dee_SKIP, dee_RECURSE };
95     enum { DEE_READ, DEE_SKIP };
96    
97     /* Actions for special processing options (flag bits) */
98    
99     #define PO_WORD_MATCH 0x0001
100     #define PO_LINE_MATCH 0x0002
101     #define PO_FIXED_STRINGS 0x0004
102    
103 nigel 93 /* Line ending types */
104 nigel 87
105 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 nigel 87
107 ph10 535 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108     environments), a warning is issued if the value of fwrite() is ignored.
109     Unfortunately, casting to (void) does not suppress the warning. To get round
110     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 ph10 515 apply to fprintf(). */
112 nigel 93
113 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114 nigel 93
115 ph10 515
116    
117 nigel 49 /*************************************************
118     * Global variables *
119     *************************************************/
120    
121 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
122     regular code. */
123    
124     #ifdef JFRIEDL_DEBUG
125     static int S_arg = -1;
126 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128     static const char *jfriedl_prefix = "";
129     static const char *jfriedl_postfix = "";
130 nigel 87 #endif
131    
132 nigel 93 static int endlinetype;
133 nigel 91
134 nigel 87 static char *colour_string = (char *)"1;31";
135     static char *colour_option = NULL;
136     static char *dee_option = NULL;
137     static char *DEE_option = NULL;
138 ph10 644 static char *main_buffer = NULL;
139 nigel 91 static char *newline = NULL;
140 nigel 53 static char *pattern_filename = NULL;
141 nigel 77 static char *stdin_name = (char *)"(standard input)";
142 nigel 87 static char *locale = NULL;
143    
144     static const unsigned char *pcretables = NULL;
145    
146 nigel 53 static int pattern_count = 0;
147 ph10 121 static pcre **pattern_list = NULL;
148     static pcre_extra **hints_list = NULL;
149 nigel 49
150 nigel 77 static char *include_pattern = NULL;
151     static char *exclude_pattern = NULL;
152 ph10 325 static char *include_dir_pattern = NULL;
153     static char *exclude_dir_pattern = NULL;
154 nigel 77
155     static pcre *include_compiled = NULL;
156     static pcre *exclude_compiled = NULL;
157 ph10 325 static pcre *include_dir_compiled = NULL;
158     static pcre *exclude_dir_compiled = NULL;
159 nigel 77
160     static int after_context = 0;
161     static int before_context = 0;
162     static int both_context = 0;
163 ph10 644 static int bufthird = PCREGREP_BUFSIZE;
164     static int bufsize = 3*PCREGREP_BUFSIZE;
165 nigel 87 static int dee_action = dee_READ;
166     static int DEE_action = DEE_READ;
167     static int error_count = 0;
168     static int filenames = FN_DEFAULT;
169 ph10 565 static int only_matching = -1;
170 nigel 87 static int process_options = 0;
171 ph10 667 static int study_options = 0;
172 nigel 77
173 ph10 561 static unsigned long int match_limit = 0;
174     static unsigned long int match_limit_recursion = 0;
175    
176 nigel 49 static BOOL count_only = FALSE;
177 nigel 87 static BOOL do_colour = FALSE;
178 ph10 280 static BOOL file_offsets = FALSE;
179 nigel 77 static BOOL hyphenpending = FALSE;
180 nigel 49 static BOOL invert = FALSE;
181 ph10 519 static BOOL line_buffered = FALSE;
182 ph10 280 static BOOL line_offsets = FALSE;
183 nigel 77 static BOOL multiline = FALSE;
184 nigel 49 static BOOL number = FALSE;
185 ph10 420 static BOOL omit_zero_count = FALSE;
186 ph10 561 static BOOL resource_error = FALSE;
187 nigel 77 static BOOL quiet = FALSE;
188 nigel 49 static BOOL silent = FALSE;
189 nigel 93 static BOOL utf8 = FALSE;
190 nigel 49
191 nigel 53 /* Structure for options and list of them */
192 nigel 49
193 ph10 584 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
194     OP_OP_NUMBER, OP_PATLIST };
195 nigel 77
196 nigel 53 typedef struct option_item {
197 nigel 77 int type;
198 nigel 53 int one_char;
199 nigel 77 void *dataptr;
200 nigel 67 const char *long_name;
201     const char *help_text;
202 nigel 53 } option_item;
203 nigel 49
204 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
205     used to identify them. */
206    
207 ph10 325 #define N_COLOUR (-1)
208     #define N_EXCLUDE (-2)
209     #define N_EXCLUDE_DIR (-3)
210     #define N_HELP (-4)
211     #define N_INCLUDE (-5)
212     #define N_INCLUDE_DIR (-6)
213     #define N_LABEL (-7)
214     #define N_LOCALE (-8)
215     #define N_NULL (-9)
216     #define N_LOFFSETS (-10)
217     #define N_FOFFSETS (-11)
218 ph10 519 #define N_LBUFFER (-12)
219 ph10 561 #define N_M_LIMIT (-13)
220     #define N_M_LIMIT_REC (-14)
221 ph10 644 #define N_BUFSIZE (-15)
222 nigel 87
223 nigel 53 static option_item optionlist[] = {
224 ph10 584 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
225     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
226     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
227     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
228 ph10 644 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
229 ph10 584 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
230     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
231     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
232     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
233     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
234     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
235     { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
236     { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
237     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
238     { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
239     { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
240     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
241     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
242 ph10 667 { OP_NODATA, 'j', NULL, "jit", "use JIT compiler if available" },
243 ph10 584 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
244     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
245     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
246     { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
247     { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
248     { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
249     { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
250     { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
251     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
252     { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
253     { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
254     { OP_OP_NUMBER, 'o', &only_matching, "only-matching=n", "show only the part of the line that matched" },
255     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
256     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
257     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
258     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
259     { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude-dir=pattern","exclude matching directories when recursing" },
260     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include-dir=pattern","include matching directories when recursing" },
261 ph10 571
262     /* These two were accidentally implemented with underscores instead of
263     hyphens in the option names. As this was not discovered for several releases,
264     the incorrect versions are left in the table for compatibility. However, the
265     --help function misses out any option that has an underscore in its name. */
266 ph10 579
267 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
268     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
269 ph10 571
270 nigel 87 #ifdef JFRIEDL_DEBUG
271     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
272     #endif
273     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
274     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
275     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
276     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
277     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
278     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
279     { OP_NODATA, 0, NULL, NULL, NULL }
280 nigel 53 };
281    
282 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
283     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
284     that the combination of -w and -x has the same effect as -x on its own, so we
285     can treat them as the same. */
286 nigel 53
287 nigel 87 static const char *prefix[] = {
288     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
289    
290     static const char *suffix[] = {
291     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
292    
293 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
294 nigel 87
295 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
296 nigel 87
297 nigel 93 const char utf8_table4[] = {
298     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
299     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
300     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
301     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
302    
303    
304    
305 nigel 53 /*************************************************
306 ph10 586 * Exit from the program *
307     *************************************************/
308    
309     /* If there has been a resource error, give a suitable message.
310    
311     Argument: the return code
312     Returns: does not return
313     */
314    
315     static void
316     pcregrep_exit(int rc)
317     {
318     if (resource_error)
319     {
320     fprintf(stderr, "pcregrep: Error %d or %d means that a resource limit "
321     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT);
322     fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
323     }
324    
325     exit(rc);
326     }
327    
328    
329     /*************************************************
330 nigel 87 * OS-specific functions *
331 nigel 53 *************************************************/
332    
333     /* These functions are defined so that they can be made system specific,
334 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
335 nigel 53
336    
337     /************* Directory scanning in Unix ***********/
338    
339 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
340 nigel 53 #include <sys/types.h>
341     #include <sys/stat.h>
342     #include <dirent.h>
343    
344     typedef DIR directory_type;
345    
346 nigel 67 static int
347 nigel 53 isdirectory(char *filename)
348     {
349     struct stat statbuf;
350     if (stat(filename, &statbuf) < 0)
351     return 0; /* In the expectation that opening as a file will fail */
352     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
353     }
354    
355 nigel 67 static directory_type *
356 nigel 53 opendirectory(char *filename)
357     {
358     return opendir(filename);
359     }
360    
361 nigel 67 static char *
362 nigel 53 readdirectory(directory_type *dir)
363     {
364     for (;;)
365     {
366     struct dirent *dent = readdir(dir);
367     if (dent == NULL) return NULL;
368     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
369     return dent->d_name;
370     }
371 ph10 151 /* Control never reaches here */
372 nigel 53 }
373    
374 nigel 67 static void
375 nigel 53 closedirectory(directory_type *dir)
376     {
377     closedir(dir);
378     }
379    
380    
381 nigel 87 /************* Test for regular file in Unix **********/
382    
383     static int
384     isregfile(char *filename)
385     {
386     struct stat statbuf;
387     if (stat(filename, &statbuf) < 0)
388     return 1; /* In the expectation that opening as a file will fail */
389     return (statbuf.st_mode & S_IFMT) == S_IFREG;
390     }
391    
392    
393 ph10 519 /************* Test for a terminal in Unix **********/
394 nigel 87
395     static BOOL
396     is_stdout_tty(void)
397     {
398     return isatty(fileno(stdout));
399     }
400    
401 ph10 519 static BOOL
402     is_file_tty(FILE *f)
403     {
404     return isatty(fileno(f));
405     }
406 nigel 87
407 ph10 519
408 nigel 63 /************* Directory scanning in Win32 ***********/
409 nigel 53
410 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
411 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
412 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
413     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
414 ph10 558 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
415     undefined when it is indeed undefined. */
416 nigel 53
417 ph10 558 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
418 nigel 63
419     #ifndef STRICT
420     # define STRICT
421     #endif
422     #ifndef WIN32_LEAN_AND_MEAN
423     # define WIN32_LEAN_AND_MEAN
424     #endif
425 ph10 283
426     #include <windows.h>
427    
428 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
429     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
430     #endif
431    
432 nigel 63 typedef struct directory_type
433     {
434     HANDLE handle;
435     BOOL first;
436     WIN32_FIND_DATA data;
437     } directory_type;
438    
439     int
440     isdirectory(char *filename)
441     {
442     DWORD attr = GetFileAttributes(filename);
443     if (attr == INVALID_FILE_ATTRIBUTES)
444     return 0;
445     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
446     }
447    
448     directory_type *
449     opendirectory(char *filename)
450     {
451     size_t len;
452     char *pattern;
453     directory_type *dir;
454     DWORD err;
455     len = strlen(filename);
456     pattern = (char *) malloc(len + 3);
457     dir = (directory_type *) malloc(sizeof(*dir));
458     if ((pattern == NULL) || (dir == NULL))
459     {
460     fprintf(stderr, "pcregrep: malloc failed\n");
461 ph10 561 pcregrep_exit(2);
462 nigel 63 }
463     memcpy(pattern, filename, len);
464     memcpy(&(pattern[len]), "\\*", 3);
465     dir->handle = FindFirstFile(pattern, &(dir->data));
466     if (dir->handle != INVALID_HANDLE_VALUE)
467     {
468     free(pattern);
469     dir->first = TRUE;
470     return dir;
471     }
472     err = GetLastError();
473     free(pattern);
474     free(dir);
475     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
476     return NULL;
477     }
478    
479     char *
480     readdirectory(directory_type *dir)
481     {
482     for (;;)
483     {
484     if (!dir->first)
485     {
486     if (!FindNextFile(dir->handle, &(dir->data)))
487     return NULL;
488     }
489     else
490     {
491     dir->first = FALSE;
492     }
493     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
494     return dir->data.cFileName;
495     }
496     #ifndef _MSC_VER
497     return NULL; /* Keep compiler happy; never executed */
498     #endif
499     }
500    
501     void
502     closedirectory(directory_type *dir)
503     {
504     FindClose(dir->handle);
505     free(dir);
506     }
507    
508    
509 nigel 87 /************* Test for regular file in Win32 **********/
510    
511     /* I don't know how to do this, or if it can be done; assume all paths are
512     regular if they are not directories. */
513    
514     int isregfile(char *filename)
515     {
516 ph10 283 return !isdirectory(filename);
517 nigel 87 }
518    
519    
520 ph10 519 /************* Test for a terminal in Win32 **********/
521 nigel 87
522     /* I don't know how to do this; assume never */
523    
524     static BOOL
525     is_stdout_tty(void)
526     {
527 ph10 283 return FALSE;
528 nigel 87 }
529    
530 ph10 519 static BOOL
531     is_file_tty(FILE *f)
532     {
533     return FALSE;
534     }
535 nigel 87
536 ph10 519
537 nigel 53 /************* Directory scanning when we can't do it ***********/
538    
539     /* The type is void, and apart from isdirectory(), the functions do nothing. */
540    
541 nigel 63 #else
542    
543 nigel 53 typedef void directory_type;
544    
545 nigel 87 int isdirectory(char *filename) { return 0; }
546 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
547     char *readdirectory(directory_type *dir) { return (char*)0;}
548 nigel 53 void closedirectory(directory_type *dir) {}
549    
550 nigel 87
551     /************* Test for regular when we can't do it **********/
552    
553     /* Assume all files are regular. */
554    
555     int isregfile(char *filename) { return 1; }
556    
557    
558 ph10 519 /************* Test for a terminal when we can't do it **********/
559 nigel 87
560     static BOOL
561     is_stdout_tty(void)
562     {
563     return FALSE;
564     }
565    
566 ph10 519 static BOOL
567     is_file_tty(FILE *f)
568     {
569     return FALSE;
570     }
571 nigel 87
572 nigel 53 #endif
573    
574    
575    
576 ph10 137 #ifndef HAVE_STRERROR
577 nigel 49 /*************************************************
578     * Provide strerror() for non-ANSI libraries *
579     *************************************************/
580    
581     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
582     in their libraries, but can provide the same facility by this simple
583     alternative function. */
584    
585     extern int sys_nerr;
586     extern char *sys_errlist[];
587    
588     char *
589     strerror(int n)
590     {
591     if (n < 0 || n >= sys_nerr) return "unknown error number";
592     return sys_errlist[n];
593     }
594     #endif /* HAVE_STRERROR */
595    
596    
597    
598     /*************************************************
599 ph10 519 * Read one line of input *
600     *************************************************/
601    
602 ph10 535 /* Normally, input is read using fread() into a large buffer, so many lines may
603     be read at once. However, doing this for tty input means that no output appears
604 ph10 519 until a lot of input has been typed. Instead, tty input is handled line by
605     line. We cannot use fgets() for this, because it does not stop at a binary
606 ph10 535 zero, and therefore there is no way of telling how many characters it has read,
607 ph10 519 because there may be binary zeros embedded in the data.
608    
609     Arguments:
610     buffer the buffer to read into
611     length the maximum number of characters to read
612     f the file
613 ph10 535
614 ph10 519 Returns: the number of characters read, zero at end of file
615 ph10 535 */
616 ph10 519
617     static int
618     read_one_line(char *buffer, int length, FILE *f)
619     {
620     int c;
621     int yield = 0;
622     while ((c = fgetc(f)) != EOF)
623     {
624     buffer[yield++] = c;
625 ph10 535 if (c == '\n' || yield >= length) break;
626     }
627     return yield;
628 ph10 519 }
629    
630    
631    
632     /*************************************************
633 nigel 93 * Find end of line *
634     *************************************************/
635    
636     /* The length of the endline sequence that is found is set via lenptr. This may
637     be zero at the very end of the file if there is no line-ending sequence there.
638    
639     Arguments:
640     p current position in line
641     endptr end of available data
642     lenptr where to put the length of the eol sequence
643    
644 ph10 654 Returns: pointer after the last byte of the line,
645 ph10 644 including the newline byte(s)
646 nigel 93 */
647    
648     static char *
649     end_of_line(char *p, char *endptr, int *lenptr)
650     {
651     switch(endlinetype)
652     {
653     default: /* Just in case */
654     case EL_LF:
655     while (p < endptr && *p != '\n') p++;
656     if (p < endptr)
657     {
658     *lenptr = 1;
659     return p + 1;
660     }
661     *lenptr = 0;
662     return endptr;
663    
664     case EL_CR:
665     while (p < endptr && *p != '\r') p++;
666     if (p < endptr)
667     {
668     *lenptr = 1;
669     return p + 1;
670     }
671     *lenptr = 0;
672     return endptr;
673    
674     case EL_CRLF:
675     for (;;)
676     {
677     while (p < endptr && *p != '\r') p++;
678     if (++p >= endptr)
679     {
680     *lenptr = 0;
681     return endptr;
682     }
683     if (*p == '\n')
684     {
685     *lenptr = 2;
686     return p + 1;
687     }
688     }
689     break;
690    
691 ph10 149 case EL_ANYCRLF:
692     while (p < endptr)
693     {
694     int extra = 0;
695     register int c = *((unsigned char *)p);
696    
697     if (utf8 && c >= 0xc0)
698     {
699     int gcii, gcss;
700     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
701     gcss = 6*extra;
702     c = (c & utf8_table3[extra]) << gcss;
703     for (gcii = 1; gcii <= extra; gcii++)
704     {
705     gcss -= 6;
706     c |= (p[gcii] & 0x3f) << gcss;
707     }
708     }
709    
710     p += 1 + extra;
711    
712     switch (c)
713     {
714     case 0x0a: /* LF */
715     *lenptr = 1;
716     return p;
717    
718     case 0x0d: /* CR */
719     if (p < endptr && *p == 0x0a)
720     {
721     *lenptr = 2;
722     p++;
723     }
724     else *lenptr = 1;
725     return p;
726 ph10 150
727 ph10 149 default:
728     break;
729     }
730     } /* End of loop for ANYCRLF case */
731 ph10 150
732 ph10 149 *lenptr = 0; /* Must have hit the end */
733     return endptr;
734    
735 nigel 93 case EL_ANY:
736     while (p < endptr)
737     {
738     int extra = 0;
739     register int c = *((unsigned char *)p);
740    
741     if (utf8 && c >= 0xc0)
742     {
743     int gcii, gcss;
744     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
745     gcss = 6*extra;
746     c = (c & utf8_table3[extra]) << gcss;
747     for (gcii = 1; gcii <= extra; gcii++)
748     {
749     gcss -= 6;
750     c |= (p[gcii] & 0x3f) << gcss;
751     }
752     }
753    
754     p += 1 + extra;
755    
756     switch (c)
757     {
758     case 0x0a: /* LF */
759     case 0x0b: /* VT */
760     case 0x0c: /* FF */
761     *lenptr = 1;
762     return p;
763    
764     case 0x0d: /* CR */
765     if (p < endptr && *p == 0x0a)
766     {
767     *lenptr = 2;
768     p++;
769     }
770     else *lenptr = 1;
771     return p;
772    
773     case 0x85: /* NEL */
774     *lenptr = utf8? 2 : 1;
775     return p;
776    
777     case 0x2028: /* LS */
778     case 0x2029: /* PS */
779     *lenptr = 3;
780     return p;
781    
782     default:
783     break;
784     }
785     } /* End of loop for ANY case */
786    
787     *lenptr = 0; /* Must have hit the end */
788     return endptr;
789     } /* End of overall switch */
790     }
791    
792    
793    
794     /*************************************************
795     * Find start of previous line *
796     *************************************************/
797    
798     /* This is called when looking back for before lines to print.
799    
800     Arguments:
801     p start of the subsequent line
802     startptr start of available data
803    
804     Returns: pointer to the start of the previous line
805     */
806    
807     static char *
808     previous_line(char *p, char *startptr)
809     {
810     switch(endlinetype)
811     {
812     default: /* Just in case */
813     case EL_LF:
814     p--;
815     while (p > startptr && p[-1] != '\n') p--;
816     return p;
817    
818     case EL_CR:
819     p--;
820     while (p > startptr && p[-1] != '\n') p--;
821     return p;
822    
823     case EL_CRLF:
824     for (;;)
825     {
826     p -= 2;
827     while (p > startptr && p[-1] != '\n') p--;
828     if (p <= startptr + 1 || p[-2] == '\r') return p;
829     }
830     return p; /* But control should never get here */
831    
832     case EL_ANY:
833 ph10 150 case EL_ANYCRLF:
834 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
835     if (utf8) while ((*p & 0xc0) == 0x80) p--;
836    
837     while (p > startptr)
838     {
839     register int c;
840     char *pp = p - 1;
841    
842     if (utf8)
843     {
844     int extra = 0;
845     while ((*pp & 0xc0) == 0x80) pp--;
846     c = *((unsigned char *)pp);
847     if (c >= 0xc0)
848     {
849     int gcii, gcss;
850     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
851     gcss = 6*extra;
852     c = (c & utf8_table3[extra]) << gcss;
853     for (gcii = 1; gcii <= extra; gcii++)
854     {
855     gcss -= 6;
856     c |= (pp[gcii] & 0x3f) << gcss;
857     }
858     }
859     }
860     else c = *((unsigned char *)pp);
861    
862 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
863 nigel 93 {
864     case 0x0a: /* LF */
865 ph10 149 case 0x0d: /* CR */
866     return p;
867 ph10 150
868 ph10 149 default:
869     break;
870 ph10 150 }
871 ph10 149
872     else switch (c)
873     {
874     case 0x0a: /* LF */
875 nigel 93 case 0x0b: /* VT */
876     case 0x0c: /* FF */
877     case 0x0d: /* CR */
878     case 0x85: /* NEL */
879     case 0x2028: /* LS */
880     case 0x2029: /* PS */
881     return p;
882    
883     default:
884     break;
885     }
886    
887     p = pp; /* Back one character */
888     } /* End of loop for ANY case */
889    
890     return startptr; /* Hit start of data */
891     } /* End of overall switch */
892     }
893    
894    
895    
896    
897    
898     /*************************************************
899 nigel 77 * Print the previous "after" lines *
900 nigel 49 *************************************************/
901    
902 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
903 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
904     that a binary zero does not terminate it.
905 nigel 77
906     Arguments:
907     lastmatchnumber the number of the last matching line, plus one
908     lastmatchrestart where we restarted after the last match
909     endptr end of available data
910     printname filename for printing
911    
912     Returns: nothing
913     */
914    
915     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
916     char *endptr, char *printname)
917     {
918     if (after_context > 0 && lastmatchnumber > 0)
919     {
920     int count = 0;
921     while (lastmatchrestart < endptr && count++ < after_context)
922     {
923 nigel 93 int ellength;
924 nigel 77 char *pp = lastmatchrestart;
925     if (printname != NULL) fprintf(stdout, "%s-", printname);
926     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
927 nigel 93 pp = end_of_line(pp, endptr, &ellength);
928 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
929 nigel 93 lastmatchrestart = pp;
930 nigel 77 }
931     hyphenpending = TRUE;
932     }
933     }
934    
935    
936    
937     /*************************************************
938 ph10 378 * Apply patterns to subject till one matches *
939     *************************************************/
940    
941 ph10 392 /* This function is called to run through all patterns, looking for a match. It
942     is used multiple times for the same subject when colouring is enabled, in order
943 ph10 378 to find all possible matches.
944    
945     Arguments:
946 ph10 632 matchptr the start of the subject
947     length the length of the subject to match
948     startoffset where to start matching
949     offsets the offets vector to fill in
950     mrc address of where to put the result of pcre_exec()
951 ph10 392
952     Returns: TRUE if there was a match
953 ph10 378 FALSE if there was no match
954     invert if there was a non-fatal error
955 ph10 392 */
956 ph10 378
957     static BOOL
958 ph10 654 match_patterns(char *matchptr, size_t length, int startoffset, int *offsets,
959 ph10 632 int *mrc)
960 ph10 378 {
961     int i;
962 ph10 561 size_t slen = length;
963     const char *msg = "this text:\n\n";
964     if (slen > 200)
965     {
966     slen = 200;
967     msg = "text that starts:\n\n";
968 ph10 579 }
969 ph10 378 for (i = 0; i < pattern_count; i++)
970     {
971 ph10 632 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length,
972     startoffset, PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
973 ph10 378 if (*mrc >= 0) return TRUE;
974     if (*mrc == PCRE_ERROR_NOMATCH) continue;
975 ph10 561 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
976 ph10 378 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
977 ph10 561 fprintf(stderr, "%s", msg);
978     FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
979     fprintf(stderr, "\n\n");
980     if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT)
981     resource_error = TRUE;
982 ph10 378 if (error_count++ > 20)
983     {
984 ph10 561 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
985     pcregrep_exit(2);
986 ph10 378 }
987     return invert; /* No more matching; don't show the line again */
988     }
989    
990     return FALSE; /* No match, no errors */
991     }
992    
993    
994    
995     /*************************************************
996 nigel 77 * Grep an individual file *
997     *************************************************/
998    
999     /* This is called from grep_or_recurse() below. It uses a buffer that is three
1000 ph10 644 times the value of bufthird. The matching point is never allowed to stray into
1001 nigel 77 the top third of the buffer, thus keeping more of the file available for
1002     context printing or for multiline scanning. For large files, the pointer will
1003     be in the middle third most of the time, so the bottom third is available for
1004     "before" context printing.
1005    
1006     Arguments:
1007 ph10 286 handle the fopened FILE stream for a normal file
1008     the gzFile pointer when reading is via libz
1009     the BZFILE pointer when reading is via libbz2
1010     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1011 ph10 644 filename the file name or NULL (for errors)
1012 nigel 77 printname the file name if it is to be printed for each match
1013     or NULL if the file name is not to be printed
1014     it cannot be NULL if filenames[_nomatch]_only is set
1015    
1016     Returns: 0 if there was at least one match
1017     1 otherwise (no matches)
1018 ph10 654 2 if an overlong line is encountered
1019 ph10 644 3 if there is a read error on a .bz2 file
1020 nigel 77 */
1021    
1022 nigel 49 static int
1023 ph10 644 pcregrep(void *handle, int frtype, char *filename, char *printname)
1024 nigel 49 {
1025     int rc = 1;
1026 nigel 77 int linenumber = 1;
1027     int lastmatchnumber = 0;
1028 nigel 49 int count = 0;
1029 ph10 280 int filepos = 0;
1030 ph10 378 int offsets[OFFSET_SIZE];
1031 nigel 77 char *lastmatchrestart = NULL;
1032 ph10 644 char *ptr = main_buffer;
1033 nigel 77 char *endptr;
1034     size_t bufflength;
1035     BOOL endhyphenpending = FALSE;
1036 ph10 519 BOOL input_line_buffered = line_buffered;
1037 ph10 286 FILE *in = NULL; /* Ensure initialized */
1038 nigel 49
1039 ph10 286 #ifdef SUPPORT_LIBZ
1040     gzFile ingz = NULL;
1041     #endif
1042 nigel 77
1043 ph10 286 #ifdef SUPPORT_LIBBZ2
1044     BZFILE *inbz2 = NULL;
1045     #endif
1046    
1047    
1048     /* Do the first read into the start of the buffer and set up the pointer to end
1049     of what we have. In the case of libz, a non-zipped .gz file will be read as a
1050     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1051     fail. */
1052    
1053     #ifdef SUPPORT_LIBZ
1054     if (frtype == FR_LIBZ)
1055     {
1056     ingz = (gzFile)handle;
1057 ph10 644 bufflength = gzread (ingz, main_buffer, bufsize);
1058 ph10 286 }
1059     else
1060     #endif
1061    
1062     #ifdef SUPPORT_LIBBZ2
1063     if (frtype == FR_LIBBZ2)
1064     {
1065     inbz2 = (BZFILE *)handle;
1066 ph10 644 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1067 ph10 286 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1068     } /* without the cast it is unsigned. */
1069     else
1070     #endif
1071    
1072     {
1073     in = (FILE *)handle;
1074 ph10 519 if (is_file_tty(in)) input_line_buffered = TRUE;
1075 ph10 535 bufflength = input_line_buffered?
1076 ph10 644 read_one_line(main_buffer, bufsize, in) :
1077     fread(main_buffer, 1, bufsize, in);
1078 ph10 286 }
1079 ph10 535
1080 ph10 644 endptr = main_buffer + bufflength;
1081 nigel 77
1082     /* Loop while the current pointer is not at the end of the file. For large
1083     files, endptr will be at the end of the buffer when we are in the middle of the
1084     file, but ptr will never get there, because as soon as it gets over 2/3 of the
1085     way, the buffer is shifted left and re-filled. */
1086    
1087     while (ptr < endptr)
1088 nigel 49 {
1089 ph10 378 int endlinelength;
1090 nigel 87 int mrc = 0;
1091 ph10 654 int startoffset = 0;
1092 ph10 378 BOOL match;
1093 ph10 286 char *matchptr = ptr;
1094 nigel 77 char *t = ptr;
1095     size_t length, linelength;
1096 nigel 49
1097 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
1098     of the subject string to pass to pcre_exec(). In multiline mode, it is the
1099     length remainder of the data in the buffer. Otherwise, it is the length of
1100 ph10 378 the next line, excluding the terminating newline. After matching, we always
1101     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1102     option is used for compiling, so that any match is constrained to be in the
1103     first line. */
1104 nigel 77
1105 nigel 93 t = end_of_line(t, endptr, &endlinelength);
1106     linelength = t - ptr - endlinelength;
1107 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
1108 ph10 654
1109     /* Check to see if the line we are looking at extends right to the very end
1110     of the buffer without a line terminator. This means the line is too long to
1111 ph10 644 handle. */
1112 ph10 654
1113 ph10 644 if (endlinelength == 0 && t == main_buffer + bufsize)
1114     {
1115     fprintf(stderr, "pcregrep: line %d%s%s is too long for the internal buffer\n"
1116 ph10 646 "pcregrep: check the --buffer-size option\n",
1117 ph10 654 linenumber,
1118 ph10 644 (filename == NULL)? "" : " of file ",
1119     (filename == NULL)? "" : filename);
1120     return 2;
1121 ph10 654 }
1122 nigel 77
1123 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
1124    
1125     #ifdef JFRIEDL_DEBUG
1126     if (jfriedl_XT || jfriedl_XR)
1127     {
1128     #include <sys/time.h>
1129     #include <time.h>
1130     struct timeval start_time, end_time;
1131     struct timezone dummy;
1132 ph10 392 int i;
1133 nigel 89
1134     if (jfriedl_XT)
1135     {
1136     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1137     const char *orig = ptr;
1138     ptr = malloc(newlen + 1);
1139     if (!ptr) {
1140     printf("out of memory");
1141 ph10 561 pcregrep_exit(2);
1142 nigel 89 }
1143     endptr = ptr;
1144     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1145     for (i = 0; i < jfriedl_XT; i++) {
1146     strncpy(endptr, orig, length);
1147     endptr += length;
1148     }
1149     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1150     length = newlen;
1151     }
1152    
1153     if (gettimeofday(&start_time, &dummy) != 0)
1154     perror("bad gettimeofday");
1155    
1156    
1157     for (i = 0; i < jfriedl_XR; i++)
1158 ph10 392 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1159 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1160 nigel 89
1161     if (gettimeofday(&end_time, &dummy) != 0)
1162     perror("bad gettimeofday");
1163    
1164     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1165     -
1166     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1167    
1168     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1169     return 0;
1170     }
1171     #endif
1172    
1173 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
1174 ph10 279 in order to find any further matches in the same line. */
1175 nigel 89
1176 ph10 286 ONLY_MATCHING_RESTART:
1177    
1178 ph10 392 /* Run through all the patterns until one matches or there is an error other
1179 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1180     finding subsequent matches when colouring matched lines. */
1181 ph10 392
1182 ph10 632 match = match_patterns(matchptr, length, startoffset, offsets, &mrc);
1183 nigel 77
1184 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1185 nigel 77
1186 nigel 49 if (match != invert)
1187     {
1188 nigel 77 BOOL hyphenprinted = FALSE;
1189    
1190 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1191 nigel 77
1192 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1193    
1194     /* Just count if just counting is wanted. */
1195    
1196 nigel 49 if (count_only) count++;
1197    
1198 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1199     in the file. */
1200    
1201 ph10 420 else if (filenames == FN_MATCH_ONLY)
1202 nigel 49 {
1203 nigel 77 fprintf(stdout, "%s\n", printname);
1204 nigel 49 return 0;
1205     }
1206    
1207 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1208    
1209 nigel 77 else if (quiet) return 0;
1210 nigel 49
1211 ph10 579 /* The --only-matching option prints just the substring that matched, or a
1212 ph10 565 captured portion of it, as long as this string is not empty, and the
1213     --file-offsets and --line-offsets options output offsets for the matching
1214     substring (they both force --only-matching = 0). None of these options
1215 ph10 636 prints any context. Afterwards, adjust the start and then jump back to look
1216     for further matches in the same line. If we are in invert mode, however,
1217     nothing is printed and we do not restart - this could still be useful
1218     because the return code is set. */
1219 nigel 87
1220 ph10 565 else if (only_matching >= 0)
1221 nigel 87 {
1222 ph10 279 if (!invert)
1223 ph10 286 {
1224 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1225     if (number) fprintf(stdout, "%d:", linenumber);
1226 ph10 280 if (line_offsets)
1227 ph10 565 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1228 ph10 286 offsets[1] - offsets[0]);
1229 ph10 280 else if (file_offsets)
1230 ph10 579 fprintf(stdout, "%d,%d\n",
1231 ph10 565 (int)(filepos + matchptr + offsets[0] - ptr),
1232 ph10 286 offsets[1] - offsets[0]);
1233 ph10 565 else if (only_matching < mrc)
1234 ph10 377 {
1235 ph10 565 int plen = offsets[2*only_matching + 1] - offsets[2*only_matching];
1236     if (plen > 0)
1237 ph10 579 {
1238 ph10 565 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1239     FWRITE(matchptr + offsets[only_matching*2], 1, plen, stdout);
1240     if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1241     fprintf(stdout, "\n");
1242 ph10 579 }
1243 ph10 392 }
1244 ph10 565 else if (printname != NULL || number) fprintf(stdout, "\n");
1245 ph10 286 match = FALSE;
1246 ph10 564 if (line_buffered) fflush(stdout);
1247 ph10 636 rc = 0; /* Had some success */
1248     startoffset = offsets[1]; /* Restart after the match */
1249 ph10 286 goto ONLY_MATCHING_RESTART;
1250     }
1251 nigel 87 }
1252    
1253     /* This is the default case when none of the above options is set. We print
1254     the matching lines(s), possibly preceded and/or followed by other lines of
1255     context. */
1256    
1257 nigel 49 else
1258     {
1259 nigel 77 /* See if there is a requirement to print some "after" lines from a
1260     previous match. We never print any overlaps. */
1261    
1262     if (after_context > 0 && lastmatchnumber > 0)
1263     {
1264 nigel 93 int ellength;
1265 nigel 77 int linecount = 0;
1266     char *p = lastmatchrestart;
1267    
1268     while (p < ptr && linecount < after_context)
1269     {
1270 nigel 93 p = end_of_line(p, ptr, &ellength);
1271 nigel 77 linecount++;
1272     }
1273    
1274     /* It is important to advance lastmatchrestart during this printing so
1275 nigel 87 that it interacts correctly with any "before" printing below. Print
1276     each line's data using fwrite() in case there are binary zeroes. */
1277 nigel 77
1278     while (lastmatchrestart < p)
1279     {
1280     char *pp = lastmatchrestart;
1281     if (printname != NULL) fprintf(stdout, "%s-", printname);
1282     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1283 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1284 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1285 nigel 93 lastmatchrestart = pp;
1286 nigel 77 }
1287     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1288     }
1289    
1290     /* If there were non-contiguous lines printed above, insert hyphens. */
1291    
1292     if (hyphenpending)
1293     {
1294     fprintf(stdout, "--\n");
1295     hyphenpending = FALSE;
1296     hyphenprinted = TRUE;
1297     }
1298    
1299     /* See if there is a requirement to print some "before" lines for this
1300     match. Again, don't print overlaps. */
1301    
1302     if (before_context > 0)
1303     {
1304     int linecount = 0;
1305     char *p = ptr;
1306    
1307 ph10 644 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1308 nigel 87 linecount < before_context)
1309 nigel 77 {
1310 nigel 87 linecount++;
1311 ph10 644 p = previous_line(p, main_buffer);
1312 nigel 77 }
1313    
1314     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1315     fprintf(stdout, "--\n");
1316    
1317     while (p < ptr)
1318     {
1319 nigel 93 int ellength;
1320 nigel 77 char *pp = p;
1321     if (printname != NULL) fprintf(stdout, "%s-", printname);
1322     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1323 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1324 ph10 515 FWRITE(p, 1, pp - p, stdout);
1325 nigel 93 p = pp;
1326 nigel 77 }
1327     }
1328    
1329     /* Now print the matching line(s); ensure we set hyphenpending at the end
1330 nigel 85 of the file if any context lines are being output. */
1331 nigel 77
1332 nigel 85 if (after_context > 0 || before_context > 0)
1333     endhyphenpending = TRUE;
1334    
1335 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1336 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1337 nigel 77
1338     /* In multiline mode, we want to print to the end of the line in which
1339     the end of the matched string is found, so we adjust linelength and the
1340 ph10 222 line number appropriately, but only when there actually was a match
1341     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1342     the match will always be before the first newline sequence. */
1343 nigel 77
1344 ph10 587 if (multiline & !invert)
1345 nigel 77 {
1346 ph10 587 char *endmatch = ptr + offsets[1];
1347     t = ptr;
1348     while (t < endmatch)
1349 nigel 93 {
1350 ph10 587 t = end_of_line(t, endptr, &endlinelength);
1351     if (t < endmatch) linenumber++; else break;
1352 nigel 93 }
1353 ph10 587 linelength = t - ptr - endlinelength;
1354 nigel 77 }
1355    
1356 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1357     zeroes are treated as just another data character. */
1358    
1359     /* This extra option, for Jeffrey Friedl's debugging requirements,
1360     replaces the matched string, or a specific captured string if it exists,
1361     with X. When this happens, colouring is ignored. */
1362    
1363     #ifdef JFRIEDL_DEBUG
1364     if (S_arg >= 0 && S_arg < mrc)
1365     {
1366     int first = S_arg * 2;
1367     int last = first + 1;
1368 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1369 nigel 87 fprintf(stdout, "X");
1370 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1371 nigel 87 }
1372     else
1373     #endif
1374    
1375 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1376 ph10 585 matches, but not of course if the line is a non-match. */
1377 ph10 589
1378 ph10 585 if (do_colour && !invert)
1379 nigel 87 {
1380 ph10 589 int plength;
1381 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1382 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1383 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1384 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1385 ph10 378 for (;;)
1386     {
1387 ph10 632 startoffset = offsets[1];
1388 ph10 636 if (startoffset >= linelength + endlinelength ||
1389 ph10 654 !match_patterns(matchptr, length, startoffset, offsets, &mrc))
1390 ph10 632 break;
1391     FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1392 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1393 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1394 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1395     }
1396 ph10 587
1397     /* In multiline mode, we may have already printed the complete line
1398 ph10 589 and its line-ending characters (if they matched the pattern), so there
1399 ph10 587 may be no more to print. */
1400 ph10 589
1401 ph10 636 plength = (linelength + endlinelength) - startoffset;
1402     if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1403 nigel 87 }
1404 ph10 392
1405 ph10 378 /* Not colouring; no need to search for further matches */
1406 ph10 392
1407 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1408 nigel 49 }
1409    
1410 ph10 519 /* End of doing what has to be done for a match. If --line-buffered was
1411     given, flush the output. */
1412 nigel 87
1413 ph10 519 if (line_buffered) fflush(stdout);
1414 nigel 77 rc = 0; /* Had some success */
1415    
1416     /* Remember where the last match happened for after_context. We remember
1417     where we are about to restart, and that line's number. */
1418    
1419 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1420 nigel 77 lastmatchnumber = linenumber + 1;
1421 nigel 49 }
1422 nigel 77
1423 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1424     anything to be printed), we have to move on to the end of the match before
1425     proceeding. */
1426    
1427     if (multiline && invert && match)
1428     {
1429     int ellength;
1430     char *endmatch = ptr + offsets[1];
1431     t = ptr;
1432     while (t < endmatch)
1433     {
1434     t = end_of_line(t, endptr, &ellength);
1435     if (t <= endmatch) linenumber++; else break;
1436     }
1437     endmatch = end_of_line(endmatch, endptr, &ellength);
1438     linelength = endmatch - ptr - ellength;
1439     }
1440    
1441 ph10 286 /* Advance to after the newline and increment the line number. The file
1442 ph10 280 offset to the current line is maintained in filepos. */
1443 nigel 77
1444 nigel 93 ptr += linelength + endlinelength;
1445 ph10 530 filepos += (int)(linelength + endlinelength);
1446 nigel 77 linenumber++;
1447 ph10 535
1448     /* If input is line buffered, and the buffer is not yet full, read another
1449 ph10 519 line and add it into the buffer. */
1450 ph10 535
1451 ph10 644 if (input_line_buffered && bufflength < bufsize)
1452 ph10 519 {
1453 ph10 644 int add = read_one_line(ptr, bufsize - (ptr - main_buffer), in);
1454 ph10 519 bufflength += add;
1455 ph10 535 endptr += add;
1456     }
1457 nigel 77
1458     /* If we haven't yet reached the end of the file (the buffer is full), and
1459     the current point is in the top 1/3 of the buffer, slide the buffer down by
1460     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1461     about to be lost, print them. */
1462    
1463 ph10 644 if (bufflength >= bufsize && ptr > main_buffer + 2*bufthird)
1464 nigel 77 {
1465     if (after_context > 0 &&
1466     lastmatchnumber > 0 &&
1467 ph10 644 lastmatchrestart < main_buffer + bufthird)
1468 nigel 77 {
1469     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1470     lastmatchnumber = 0;
1471     }
1472    
1473     /* Now do the shuffle */
1474    
1475 ph10 644 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
1476     ptr -= bufthird;
1477 ph10 286
1478     #ifdef SUPPORT_LIBZ
1479     if (frtype == FR_LIBZ)
1480 ph10 644 bufflength = 2*bufthird +
1481     gzread (ingz, main_buffer + 2*bufthird, bufthird);
1482 ph10 286 else
1483     #endif
1484    
1485     #ifdef SUPPORT_LIBBZ2
1486     if (frtype == FR_LIBBZ2)
1487 ph10 644 bufflength = 2*bufthird +
1488     BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
1489 ph10 286 else
1490     #endif
1491    
1492 ph10 644 bufflength = 2*bufthird +
1493 ph10 535 (input_line_buffered?
1494 ph10 644 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
1495     fread(main_buffer + 2*bufthird, 1, bufthird, in));
1496     endptr = main_buffer + bufflength;
1497 nigel 77
1498     /* Adjust any last match point */
1499    
1500 ph10 644 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
1501 nigel 77 }
1502     } /* Loop through the whole file */
1503    
1504     /* End of file; print final "after" lines if wanted; do_after_lines sets
1505     hyphenpending if it prints something. */
1506    
1507 ph10 565 if (only_matching < 0 && !count_only)
1508 nigel 87 {
1509     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1510     hyphenpending |= endhyphenpending;
1511     }
1512 nigel 77
1513     /* Print the file name if we are looking for those without matches and there
1514     were none. If we found a match, we won't have got this far. */
1515    
1516 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1517 nigel 77 {
1518     fprintf(stdout, "%s\n", printname);
1519     return 0;
1520 nigel 49 }
1521    
1522 nigel 77 /* Print the match count if wanted */
1523    
1524 nigel 49 if (count_only)
1525     {
1526 ph10 420 if (count > 0 || !omit_zero_count)
1527 ph10 461 {
1528     if (printname != NULL && filenames != FN_NONE)
1529 ph10 420 fprintf(stdout, "%s:", printname);
1530     fprintf(stdout, "%d\n", count);
1531 ph10 461 }
1532 nigel 49 }
1533    
1534     return rc;
1535     }
1536    
1537    
1538    
1539     /*************************************************
1540 nigel 53 * Grep a file or recurse into a directory *
1541     *************************************************/
1542    
1543 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1544     recursing; if it's a file, grep it.
1545    
1546     Arguments:
1547     pathname the path to investigate
1548 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1549 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1550    
1551     Returns: 0 if there was at least one match
1552     1 if there were no matches
1553     2 there was some kind of error
1554    
1555     However, file opening failures are suppressed if "silent" is set.
1556     */
1557    
1558 nigel 53 static int
1559 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1560 nigel 53 {
1561     int rc = 1;
1562     int sep;
1563 ph10 286 int frtype;
1564     int pathlen;
1565     void *handle;
1566     FILE *in = NULL; /* Ensure initialized */
1567 nigel 53
1568 ph10 286 #ifdef SUPPORT_LIBZ
1569     gzFile ingz = NULL;
1570     #endif
1571    
1572     #ifdef SUPPORT_LIBBZ2
1573     BZFILE *inbz2 = NULL;
1574     #endif
1575    
1576 nigel 77 /* If the file name is "-" we scan stdin */
1577 nigel 53
1578 nigel 77 if (strcmp(pathname, "-") == 0)
1579 nigel 53 {
1580 ph10 644 return pcregrep(stdin, FR_PLAIN, stdin_name,
1581 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1582 nigel 77 stdin_name : NULL);
1583     }
1584    
1585 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1586 ph10 325 each file and directory within it, subject to any include or exclude patterns
1587     that were set. The scanning code is localized so it can be made
1588     system-specific. */
1589 nigel 87
1590     if ((sep = isdirectory(pathname)) != 0)
1591 nigel 77 {
1592 nigel 87 if (dee_action == dee_SKIP) return 1;
1593     if (dee_action == dee_RECURSE)
1594 nigel 53 {
1595 nigel 87 char buffer[1024];
1596     char *nextfile;
1597     directory_type *dir = opendirectory(pathname);
1598 nigel 53
1599 nigel 87 if (dir == NULL)
1600     {
1601     if (!silent)
1602     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1603     strerror(errno));
1604     return 2;
1605     }
1606 nigel 77
1607 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1608     {
1609 ph10 324 int frc, nflen;
1610 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1611 ph10 530 nflen = (int)(strlen(nextfile));
1612 ph10 345
1613 ph10 325 if (isdirectory(buffer))
1614     {
1615     if (exclude_dir_compiled != NULL &&
1616     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1617     continue;
1618 ph10 345
1619 ph10 325 if (include_dir_compiled != NULL &&
1620     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1621     continue;
1622     }
1623 ph10 345 else
1624     {
1625 ph10 324 if (exclude_compiled != NULL &&
1626     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1627     continue;
1628 ph10 345
1629 ph10 324 if (include_compiled != NULL &&
1630     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1631     continue;
1632 ph10 345 }
1633 nigel 77
1634 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1635     if (frc > 1) rc = frc;
1636     else if (frc == 0 && rc == 1) rc = 0;
1637     }
1638    
1639     closedirectory(dir);
1640     return rc;
1641 nigel 53 }
1642     }
1643    
1644 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1645     been requested. */
1646 nigel 53
1647 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1648    
1649     /* Control reaches here if we have a regular file, or if we have a directory
1650     and recursion or skipping was not requested, or if we have anything else and
1651     skipping was not requested. The scan proceeds. If this is the first and only
1652     argument at top level, we don't show the file name, unless we are only showing
1653     the file name, or the filename was forced (-H). */
1654    
1655 ph10 530 pathlen = (int)(strlen(pathname));
1656 ph10 286
1657     /* Open using zlib if it is supported and the file name ends with .gz. */
1658    
1659     #ifdef SUPPORT_LIBZ
1660     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1661 nigel 53 {
1662 ph10 286 ingz = gzopen(pathname, "rb");
1663     if (ingz == NULL)
1664     {
1665     if (!silent)
1666     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1667     strerror(errno));
1668     return 2;
1669     }
1670     handle = (void *)ingz;
1671     frtype = FR_LIBZ;
1672     }
1673     else
1674     #endif
1675    
1676     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1677    
1678     #ifdef SUPPORT_LIBBZ2
1679     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1680     {
1681     inbz2 = BZ2_bzopen(pathname, "rb");
1682     handle = (void *)inbz2;
1683     frtype = FR_LIBBZ2;
1684     }
1685     else
1686     #endif
1687    
1688     /* Otherwise use plain fopen(). The label is so that we can come back here if
1689     an attempt to read a .bz2 file indicates that it really is a plain file. */
1690    
1691     #ifdef SUPPORT_LIBBZ2
1692     PLAIN_FILE:
1693     #endif
1694     {
1695 ph10 419 in = fopen(pathname, "rb");
1696 ph10 286 handle = (void *)in;
1697     frtype = FR_PLAIN;
1698     }
1699    
1700     /* All the opening methods return errno when they fail. */
1701    
1702     if (handle == NULL)
1703     {
1704 nigel 77 if (!silent)
1705     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1706     strerror(errno));
1707 nigel 53 return 2;
1708     }
1709    
1710 ph10 286 /* Now grep the file */
1711    
1712 ph10 644 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
1713 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1714 nigel 77
1715 ph10 286 /* Close in an appropriate manner. */
1716    
1717     #ifdef SUPPORT_LIBZ
1718     if (frtype == FR_LIBZ)
1719     gzclose(ingz);
1720     else
1721     #endif
1722    
1723 ph10 644 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
1724 ph10 286 read failed. If the error indicates that the file isn't in fact bzipped, try
1725     again as a normal file. */
1726    
1727     #ifdef SUPPORT_LIBBZ2
1728     if (frtype == FR_LIBBZ2)
1729     {
1730 ph10 644 if (rc == 3)
1731 ph10 286 {
1732     int errnum;
1733     const char *err = BZ2_bzerror(inbz2, &errnum);
1734     if (errnum == BZ_DATA_ERROR_MAGIC)
1735     {
1736     BZ2_bzclose(inbz2);
1737     goto PLAIN_FILE;
1738     }
1739     else if (!silent)
1740     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1741     pathname, err);
1742 ph10 654 rc = 2; /* The normal "something went wrong" code */
1743 ph10 286 }
1744     BZ2_bzclose(inbz2);
1745     }
1746     else
1747     #endif
1748    
1749     /* Normal file close */
1750    
1751 nigel 53 fclose(in);
1752 ph10 286
1753     /* Pass back the yield from pcregrep(). */
1754    
1755 nigel 53 return rc;
1756     }
1757    
1758    
1759    
1760    
1761     /*************************************************
1762 nigel 49 * Usage function *
1763     *************************************************/
1764    
1765     static int
1766     usage(int rc)
1767     {
1768 nigel 87 option_item *op;
1769     fprintf(stderr, "Usage: pcregrep [-");
1770     for (op = optionlist; op->one_char != 0; op++)
1771     {
1772     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1773     }
1774     fprintf(stderr, "] [long options] [pattern] [files]\n");
1775 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1776     "options.\n");
1777 nigel 49 return rc;
1778     }
1779    
1780    
1781    
1782    
1783     /*************************************************
1784 nigel 53 * Help function *
1785     *************************************************/
1786    
1787     static void
1788     help(void)
1789     {
1790     option_item *op;
1791    
1792 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1793 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1794 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1795 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1796    
1797     #ifdef SUPPORT_LIBZ
1798     printf("Files whose names end in .gz are read using zlib.\n");
1799     #endif
1800    
1801     #ifdef SUPPORT_LIBBZ2
1802     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1803     #endif
1804    
1805     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1806     printf("Other files and the standard input are read as plain files.\n\n");
1807     #else
1808     printf("All files are read as plain files, without any interpretation.\n\n");
1809     #endif
1810    
1811 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1812     printf("Options:\n");
1813    
1814     for (op = optionlist; op->one_char != 0; op++)
1815     {
1816     int n;
1817     char s[4];
1818 ph10 579
1819 ph10 571 /* Two options were accidentally implemented and documented with underscores
1820     instead of hyphens in their names, something that was not noticed for quite a
1821 ph10 579 few releases. When fixing this, I left the underscored versions in the list
1822     in case people were using them. However, we don't want to display them in the
1823     help data. There are no other options that contain underscores, and we do not
1824     expect ever to implement such options. Therefore, just omit any option that
1825 ph10 571 contains an underscore. */
1826 ph10 579
1827     if (strchr(op->long_name, '_') != NULL) continue;
1828    
1829 nigel 53 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1830 ph10 571 n = 31 - printf(" %s --%s", s, op->long_name);
1831 nigel 53 if (n < 1) n = 1;
1832 ph10 571 printf("%.*s%s\n", n, " ", op->help_text);
1833 nigel 53 }
1834    
1835 ph10 654 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
1836 ph10 644 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
1837     printf("When reading patterns from a file instead of using a command line option,\n");
1838 nigel 77 printf("trailing white space is removed and blank lines are ignored.\n");
1839 ph10 654 printf("There is a maximum of %d patterns, each of maximum size %d bytes.\n",
1840 ph10 644 MAX_PATTERN_COUNT, PATBUFSIZE);
1841 nigel 53
1842 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1843 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1844     }
1845    
1846    
1847    
1848    
1849     /*************************************************
1850 nigel 77 * Handle a single-letter, no data option *
1851 nigel 53 *************************************************/
1852    
1853     static int
1854     handle_option(int letter, int options)
1855     {
1856     switch(letter)
1857     {
1858 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1859 ph10 561 case N_HELP: help(); pcregrep_exit(0);
1860 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1861 ph10 535 case N_LBUFFER: line_buffered = TRUE; break;
1862 nigel 53 case 'c': count_only = TRUE; break;
1863 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1864     case 'H': filenames = FN_FORCE; break;
1865     case 'h': filenames = FN_NONE; break;
1866 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1867 ph10 667 case 'j': study_options |= PCRE_STUDY_JIT_COMPILE; break;
1868 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1869 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
1870 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1871 nigel 53 case 'n': number = TRUE; break;
1872 ph10 565 case 'o': only_matching = 0; break;
1873 nigel 77 case 'q': quiet = TRUE; break;
1874 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1875 nigel 53 case 's': silent = TRUE; break;
1876 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1877 nigel 53 case 'v': invert = TRUE; break;
1878 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1879     case 'x': process_options |= PO_LINE_MATCH; break;
1880 nigel 53
1881     case 'V':
1882 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1883 ph10 561 pcregrep_exit(0);
1884 nigel 53 break;
1885    
1886     default:
1887     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1888 ph10 561 pcregrep_exit(usage(2));
1889 nigel 53 }
1890    
1891     return options;
1892     }
1893    
1894    
1895    
1896    
1897     /*************************************************
1898 nigel 87 * Construct printed ordinal *
1899     *************************************************/
1900    
1901     /* This turns a number into "1st", "3rd", etc. */
1902    
1903     static char *
1904     ordin(int n)
1905     {
1906     static char buffer[8];
1907     char *p = buffer;
1908     sprintf(p, "%d", n);
1909     while (*p != 0) p++;
1910     switch (n%10)
1911     {
1912     case 1: strcpy(p, "st"); break;
1913     case 2: strcpy(p, "nd"); break;
1914     case 3: strcpy(p, "rd"); break;
1915     default: strcpy(p, "th"); break;
1916     }
1917     return buffer;
1918     }
1919    
1920    
1921    
1922     /*************************************************
1923     * Compile a single pattern *
1924     *************************************************/
1925    
1926     /* When the -F option has been used, this is called for each substring.
1927     Otherwise it's called for each supplied pattern.
1928    
1929     Arguments:
1930     pattern the pattern string
1931     options the PCRE options
1932     filename the file name, or NULL for a command-line pattern
1933     count 0 if this is the only command line pattern, or
1934     number of the command line pattern, or
1935     linenumber for a pattern from a file
1936    
1937     Returns: TRUE on success, FALSE after an error
1938     */
1939    
1940     static BOOL
1941     compile_single_pattern(char *pattern, int options, char *filename, int count)
1942     {
1943 ph10 644 char buffer[PATBUFSIZE];
1944 nigel 87 const char *error;
1945     int errptr;
1946    
1947     if (pattern_count >= MAX_PATTERN_COUNT)
1948     {
1949     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1950     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1951     return FALSE;
1952     }
1953    
1954 ph10 644 sprintf(buffer, "%s%.*s%s", prefix[process_options], bufthird, pattern,
1955 nigel 87 suffix[process_options]);
1956     pattern_list[pattern_count] =
1957     pcre_compile(buffer, options, &error, &errptr, pcretables);
1958 ph10 142 if (pattern_list[pattern_count] != NULL)
1959 ph10 141 {
1960 ph10 142 pattern_count++;
1961 ph10 141 return TRUE;
1962 ph10 142 }
1963 nigel 87
1964     /* Handle compile errors */
1965    
1966     errptr -= (int)strlen(prefix[process_options]);
1967     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1968    
1969     if (filename == NULL)
1970     {
1971     if (count == 0)
1972     fprintf(stderr, "pcregrep: Error in command-line regex "
1973     "at offset %d: %s\n", errptr, error);
1974     else
1975     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1976     "at offset %d: %s\n", ordin(count), errptr, error);
1977     }
1978     else
1979     {
1980     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1981     "at offset %d: %s\n", count, filename, errptr, error);
1982     }
1983    
1984     return FALSE;
1985     }
1986    
1987    
1988    
1989     /*************************************************
1990     * Compile one supplied pattern *
1991     *************************************************/
1992    
1993     /* When the -F option has been used, each string may be a list of strings,
1994 nigel 91 separated by line breaks. They will be matched literally.
1995 nigel 87
1996     Arguments:
1997     pattern the pattern string
1998     options the PCRE options
1999     filename the file name, or NULL for a command-line pattern
2000     count 0 if this is the only command line pattern, or
2001     number of the command line pattern, or
2002     linenumber for a pattern from a file
2003    
2004     Returns: TRUE on success, FALSE after an error
2005     */
2006    
2007     static BOOL
2008     compile_pattern(char *pattern, int options, char *filename, int count)
2009     {
2010     if ((process_options & PO_FIXED_STRINGS) != 0)
2011     {
2012 nigel 93 char *eop = pattern + strlen(pattern);
2013 ph10 644 char buffer[PATBUFSIZE];
2014 nigel 87 for(;;)
2015     {
2016 nigel 93 int ellength;
2017     char *p = end_of_line(pattern, eop, &ellength);
2018     if (ellength == 0)
2019 nigel 87 return compile_single_pattern(pattern, options, filename, count);
2020 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
2021 nigel 93 pattern = p;
2022 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
2023     return FALSE;
2024     }
2025     }
2026     else return compile_single_pattern(pattern, options, filename, count);
2027     }
2028    
2029    
2030    
2031     /*************************************************
2032 nigel 49 * Main program *
2033     *************************************************/
2034    
2035 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2036    
2037 nigel 49 int
2038     main(int argc, char **argv)
2039     {
2040 nigel 53 int i, j;
2041 nigel 49 int rc = 1;
2042 nigel 87 int pcre_options = 0;
2043     int cmd_pattern_count = 0;
2044 ph10 141 int hint_count = 0;
2045 nigel 49 int errptr;
2046 nigel 87 BOOL only_one_at_top;
2047     char *patterns[MAX_PATTERN_COUNT];
2048     const char *locale_from = "--locale";
2049 nigel 49 const char *error;
2050    
2051 nigel 93 /* Set the default line ending value from the default in the PCRE library;
2052     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2053 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
2054 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
2055 ph10 391 rather than escapes such as as '\r'. */
2056 nigel 91
2057     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2058     switch(i)
2059     {
2060 ph10 391 default: newline = (char *)"lf"; break;
2061     case 13: newline = (char *)"cr"; break;
2062     case (13 << 8) | 10: newline = (char *)"crlf"; break;
2063     case -1: newline = (char *)"any"; break;
2064     case -2: newline = (char *)"anycrlf"; break;
2065 nigel 91 }
2066    
2067 nigel 49 /* Process the options */
2068    
2069     for (i = 1; i < argc; i++)
2070     {
2071 nigel 77 option_item *op = NULL;
2072     char *option_data = (char *)""; /* default to keep compiler happy */
2073     BOOL longop;
2074     BOOL longopwasequals = FALSE;
2075    
2076 nigel 49 if (argv[i][0] != '-') break;
2077 nigel 53
2078 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2079 nigel 87 but only if we have previously had -e or -f to define the patterns. */
2080 nigel 63
2081 nigel 77 if (argv[i][1] == 0)
2082     {
2083 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
2084 ph10 561 else pcregrep_exit(usage(2));
2085 nigel 77 }
2086 nigel 63
2087 nigel 77 /* Handle a long name option, or -- to terminate the options */
2088 nigel 53
2089     if (argv[i][1] == '-')
2090 nigel 49 {
2091 nigel 77 char *arg = argv[i] + 2;
2092     char *argequals = strchr(arg, '=');
2093 nigel 53
2094 nigel 77 if (*arg == 0) /* -- terminates options */
2095 nigel 49 {
2096 nigel 77 i++;
2097     break; /* out of the options-handling loop */
2098 nigel 53 }
2099 nigel 49
2100 nigel 77 longop = TRUE;
2101    
2102     /* Some long options have data that follows after =, for example file=name.
2103     Some options have variations in the long name spelling: specifically, we
2104     allow "regexp" because GNU grep allows it, though I personally go along
2105 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2106 ph10 422 These options are entered in the table as "regex(p)". Options can be in
2107     both these categories. */
2108 nigel 77
2109 nigel 53 for (op = optionlist; op->one_char != 0; op++)
2110     {
2111 nigel 77 char *opbra = strchr(op->long_name, '(');
2112     char *equals = strchr(op->long_name, '=');
2113 ph10 461
2114 ph10 422 /* Handle options with only one spelling of the name */
2115 ph10 461
2116 ph10 422 if (opbra == NULL) /* Does not contain '(' */
2117 nigel 53 {
2118 nigel 77 if (equals == NULL) /* Not thing=data case */
2119     {
2120     if (strcmp(arg, op->long_name) == 0) break;
2121     }
2122     else /* Special case xxx=data */
2123     {
2124 ph10 530 int oplen = (int)(equals - op->long_name);
2125 ph10 535 int arglen = (argequals == NULL)?
2126 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2127 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2128     {
2129     option_data = arg + arglen;
2130     if (*option_data == '=')
2131     {
2132     option_data++;
2133     longopwasequals = TRUE;
2134     }
2135     break;
2136     }
2137     }
2138 nigel 53 }
2139 ph10 461
2140 ph10 422 /* Handle options with an alternate spelling of the name */
2141 ph10 461
2142     else
2143 nigel 77 {
2144     char buff1[24];
2145     char buff2[24];
2146 ph10 461
2147 ph10 530 int baselen = (int)(opbra - op->long_name);
2148     int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2149 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
2150 ph10 530 (int)strlen(arg) : (int)(argequals - arg);
2151 ph10 461
2152 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
2153 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2154 ph10 461
2155     if (strncmp(arg, buff1, arglen) == 0 ||
2156 ph10 422 strncmp(arg, buff2, arglen) == 0)
2157     {
2158     if (equals != NULL && argequals != NULL)
2159     {
2160 ph10 461 option_data = argequals;
2161 ph10 422 if (*option_data == '=')
2162     {
2163 ph10 461 option_data++;
2164 ph10 422 longopwasequals = TRUE;
2165 ph10 461 }
2166     }
2167 nigel 77 break;
2168 ph10 461 }
2169 nigel 77 }
2170 nigel 53 }
2171 nigel 77
2172 nigel 53 if (op->one_char == 0)
2173     {
2174     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2175 ph10 561 pcregrep_exit(usage(2));
2176 nigel 53 }
2177     }
2178 nigel 49
2179 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2180     are not in the right form for putting in the option table because they use
2181     only one hyphen, yet are more than one character long. By putting them
2182     separately here, they will not get displayed as part of the help() output,
2183     but I don't think Jeffrey will care about that. */
2184    
2185     #ifdef JFRIEDL_DEBUG
2186     else if (strcmp(argv[i], "-pre") == 0) {
2187     jfriedl_prefix = argv[++i];
2188     continue;
2189     } else if (strcmp(argv[i], "-post") == 0) {
2190     jfriedl_postfix = argv[++i];
2191     continue;
2192     } else if (strcmp(argv[i], "-XT") == 0) {
2193     sscanf(argv[++i], "%d", &jfriedl_XT);
2194     continue;
2195     } else if (strcmp(argv[i], "-XR") == 0) {
2196     sscanf(argv[++i], "%d", &jfriedl_XR);
2197     continue;
2198     }
2199     #endif
2200    
2201    
2202 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2203     continue till we hit the last one or one that needs data. */
2204 nigel 53
2205     else
2206     {
2207     char *s = argv[i] + 1;
2208 nigel 77 longop = FALSE;
2209 nigel 53 while (*s != 0)
2210     {
2211 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2212 ph10 579 {
2213     if (*s == op->one_char) break;
2214 ph10 565 }
2215 nigel 77 if (op->one_char == 0)
2216 nigel 53 {
2217 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2218     *s, argv[i]);
2219 ph10 561 pcregrep_exit(usage(2));
2220 nigel 77 }
2221 ph10 579
2222 ph10 565 /* Check for a single-character option that has data: OP_OP_NUMBER
2223 ph10 579 is used for one that either has a numerical number or defaults, i.e. the
2224 ph10 565 data is optional. If a digit follows, there is data; if not, carry on
2225     with other single-character options in the same string. */
2226 ph10 579
2227 ph10 565 option_data = s+1;
2228     if (op->type == OP_OP_NUMBER)
2229 ph10 579 {
2230     if (isdigit((unsigned char)s[1])) break;
2231 nigel 53 }
2232 ph10 565 else /* Check for end or a dataless option */
2233 ph10 579 {
2234 ph10 565 if (op->type != OP_NODATA || s[1] == 0) break;
2235 ph10 579 }
2236    
2237     /* Handle a single-character option with no data, then loop for the
2238 ph10 565 next character in the string. */
2239    
2240 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2241 nigel 49 }
2242     }
2243 nigel 77
2244 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2245     is NO_DATA, it means that there is no data, and the option might set
2246     something in the PCRE options. */
2247 nigel 77
2248     if (op->type == OP_NODATA)
2249     {
2250 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2251     continue;
2252     }
2253    
2254     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2255     either has a value or defaults to something. It cannot have data in a
2256 ph10 579 separate item. At the moment, the only such options are "colo(u)r",
2257 ph10 565 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2258 nigel 87
2259     if (*option_data == 0 &&
2260     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2261     {
2262     switch (op->one_char)
2263 nigel 77 {
2264 nigel 87 case N_COLOUR:
2265     colour_option = (char *)"auto";
2266     break;
2267 ph10 579
2268 ph10 565 case 'o':
2269     only_matching = 0;
2270 ph10 579 break;
2271    
2272 nigel 87 #ifdef JFRIEDL_DEBUG
2273     case 'S':
2274     S_arg = 0;
2275     break;
2276     #endif
2277 nigel 77 }
2278 nigel 87 continue;
2279     }
2280 nigel 77
2281 nigel 87 /* Otherwise, find the data string for the option. */
2282    
2283     if (*option_data == 0)
2284     {
2285     if (i >= argc - 1 || longopwasequals)
2286 nigel 77 {
2287 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2288 ph10 561 pcregrep_exit(usage(2));
2289 nigel 87 }
2290     option_data = argv[++i];
2291     }
2292    
2293     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2294     multiple times to create a list of patterns. */
2295    
2296     if (op->type == OP_PATLIST)
2297     {
2298     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2299     {
2300     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2301     MAX_PATTERN_COUNT);
2302     return 2;
2303     }
2304     patterns[cmd_pattern_count++] = option_data;
2305     }
2306    
2307     /* Otherwise, deal with single string or numeric data values. */
2308    
2309 ph10 584 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2310     op->type != OP_OP_NUMBER)
2311 nigel 87 {
2312     *((char **)op->dataptr) = option_data;
2313     }
2314 ph10 558
2315     /* Avoid the use of strtoul() because SunOS4 doesn't have it. This is used
2316     only for unpicking arguments, so just keep it simple. */
2317    
2318 nigel 87 else
2319     {
2320 ph10 561 unsigned long int n = 0;
2321 ph10 558 char *endptr = option_data;
2322     while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
2323     while (isdigit((unsigned char)(*endptr)))
2324     n = n * 10 + (int)(*endptr++ - '0');
2325 ph10 644 if (toupper(*endptr) == 'K')
2326     {
2327 ph10 654 n *= 1024;
2328     endptr++;
2329     }
2330 ph10 644 else if (toupper(*endptr) == 'M')
2331     {
2332 ph10 654 n *= 1024*1024;
2333     endptr++;
2334     }
2335 nigel 87 if (*endptr != 0)
2336     {
2337     if (longop)
2338 nigel 77 {
2339 nigel 87 char *equals = strchr(op->long_name, '=');
2340     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2341 ph10 530 (int)(equals - op->long_name);
2342 nigel 87 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2343     option_data, nlen, op->long_name);
2344 nigel 77 }
2345 nigel 87 else
2346     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2347     option_data, op->one_char);
2348 ph10 561 pcregrep_exit(usage(2));
2349 nigel 77 }
2350 ph10 584 if (op->type == OP_LONGNUMBER)
2351     *((unsigned long int *)op->dataptr) = n;
2352     else
2353     *((int *)op->dataptr) = n;
2354 nigel 77 }
2355 nigel 49 }
2356    
2357 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2358     for -A and -B. */
2359    
2360     if (both_context > 0)
2361     {
2362     if (after_context == 0) after_context = both_context;
2363     if (before_context == 0) before_context = both_context;
2364     }
2365 ph10 286
2366     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2367 ph10 565 However, the latter two set only_matching. */
2368 nigel 77
2369 ph10 565 if ((only_matching >= 0 && (file_offsets || line_offsets)) ||
2370 ph10 286 (file_offsets && line_offsets))
2371 ph10 280 {
2372     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2373     "and/or --line-offsets\n");
2374 ph10 561 pcregrep_exit(usage(2));
2375 ph10 280 }
2376    
2377 ph10 565 if (file_offsets || line_offsets) only_matching = 0;
2378 ph10 286
2379 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2380     LC_ALL environment variable is set, and if so, use it. */
2381 nigel 49
2382 nigel 87 if (locale == NULL)
2383 nigel 53 {
2384 nigel 87 locale = getenv("LC_ALL");
2385     locale_from = "LCC_ALL";
2386 nigel 53 }
2387 nigel 49
2388 nigel 87 if (locale == NULL)
2389     {
2390     locale = getenv("LC_CTYPE");
2391     locale_from = "LC_CTYPE";
2392     }
2393 nigel 49
2394 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2395     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2396    
2397     if (locale != NULL)
2398 nigel 49 {
2399 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2400 nigel 53 {
2401 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2402     locale, locale_from);
2403 nigel 53 return 2;
2404     }
2405 nigel 87 pcretables = pcre_maketables();
2406     }
2407 nigel 77
2408 nigel 87 /* Sort out colouring */
2409    
2410     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2411     {
2412     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2413     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2414     else
2415 nigel 53 {
2416 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2417     colour_option);
2418     return 2;
2419 nigel 77 }
2420 nigel 87 if (do_colour)
2421 nigel 77 {
2422 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2423     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2424     if (cs != NULL) colour_string = cs;
2425 nigel 77 }
2426 nigel 87 }
2427 ph10 535
2428 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2429    
2430     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2431     {
2432     pcre_options |= PCRE_NEWLINE_CR;
2433 nigel 93 endlinetype = EL_CR;
2434 nigel 91 }
2435     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2436     {
2437     pcre_options |= PCRE_NEWLINE_LF;
2438 nigel 93 endlinetype = EL_LF;
2439 nigel 91 }
2440     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2441     {
2442     pcre_options |= PCRE_NEWLINE_CRLF;
2443 nigel 93 endlinetype = EL_CRLF;
2444 nigel 91 }
2445 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2446     {
2447     pcre_options |= PCRE_NEWLINE_ANY;
2448     endlinetype = EL_ANY;
2449     }
2450 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2451     {
2452     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2453     endlinetype = EL_ANYCRLF;
2454     }
2455 nigel 91 else
2456     {
2457     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2458     return 2;
2459     }
2460    
2461 nigel 87 /* Interpret the text values for -d and -D */
2462    
2463     if (dee_option != NULL)
2464     {
2465     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2466     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2467     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2468     else
2469 nigel 77 {
2470 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2471     return 2;
2472 nigel 53 }
2473 nigel 49 }
2474    
2475 nigel 87 if (DEE_option != NULL)
2476     {
2477     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2478     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2479     else
2480     {
2481     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2482     return 2;
2483     }
2484     }
2485 nigel 49
2486 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2487 nigel 87
2488     #ifdef JFRIEDL_DEBUG
2489     if (S_arg > 9)
2490 nigel 49 {
2491 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2492     return 2;
2493     }
2494 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2495     {
2496     if (jfriedl_XT == 0) jfriedl_XT = 1;
2497     if (jfriedl_XR == 0) jfriedl_XR = 1;
2498     }
2499 nigel 87 #endif
2500 nigel 77
2501 ph10 644 /* Get memory for the main buffer, and to store the pattern and hints lists. */
2502 nigel 87
2503 ph10 644 bufsize = 3*bufthird;
2504     main_buffer = (char *)malloc(bufsize);
2505 nigel 87 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2506     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2507    
2508 ph10 644 if (main_buffer == NULL || pattern_list == NULL || hints_list == NULL)
2509 nigel 87 {
2510     fprintf(stderr, "pcregrep: malloc failed\n");
2511 ph10 123 goto EXIT2;
2512 nigel 87 }
2513    
2514     /* If no patterns were provided by -e, and there is no file provided by -f,
2515     the first argument is the one and only pattern, and it must exist. */
2516    
2517     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2518     {
2519 nigel 63 if (i >= argc) return usage(2);
2520 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2521     }
2522 nigel 77
2523 nigel 87 /* Compile the patterns that were provided on the command line, either by
2524     multiple uses of -e or as a single unkeyed pattern. */
2525    
2526     for (j = 0; j < cmd_pattern_count; j++)
2527     {
2528     if (!compile_pattern(patterns[j], pcre_options, NULL,
2529     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2530 ph10 123 goto EXIT2;
2531 nigel 87 }
2532    
2533     /* Compile the regular expressions that are provided in a file. */
2534    
2535     if (pattern_filename != NULL)
2536     {
2537     int linenumber = 0;
2538     FILE *f;
2539     char *filename;
2540 ph10 644 char buffer[PATBUFSIZE];
2541 nigel 87
2542     if (strcmp(pattern_filename, "-") == 0)
2543 nigel 77 {
2544 nigel 87 f = stdin;
2545     filename = stdin_name;
2546 nigel 77 }
2547 nigel 87 else
2548 nigel 77 {
2549 nigel 87 f = fopen(pattern_filename, "r");
2550     if (f == NULL)
2551     {
2552     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2553     strerror(errno));
2554 ph10 123 goto EXIT2;
2555 nigel 87 }
2556     filename = pattern_filename;
2557 nigel 77 }
2558    
2559 ph10 644 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2560 nigel 53 {
2561 nigel 87 char *s = buffer + (int)strlen(buffer);
2562     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2563     *s = 0;
2564     linenumber++;
2565     if (buffer[0] == 0) continue; /* Skip blank lines */
2566     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2567 ph10 121 goto EXIT2;
2568 nigel 53 }
2569 nigel 87
2570     if (f != stdin) fclose(f);
2571 nigel 49 }
2572    
2573 nigel 77 /* Study the regular expressions, as we will be running them many times */
2574 nigel 53
2575     for (j = 0; j < pattern_count; j++)
2576     {
2577 ph10 667 hints_list[j] = pcre_study(pattern_list[j], study_options, &error);
2578 nigel 53 if (error != NULL)
2579     {
2580     char s[16];
2581     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2582     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2583 ph10 121 goto EXIT2;
2584 nigel 53 }
2585 ph10 142 hint_count++;
2586 nigel 53 }
2587 ph10 579
2588 ph10 561 /* If --match-limit or --recursion-limit was set, put the value(s) into the
2589     pcre_extra block for each pattern. */
2590 nigel 53
2591 ph10 561 if (match_limit > 0 || match_limit_recursion > 0)
2592     {
2593     for (j = 0; j < pattern_count; j++)
2594     {
2595     if (hints_list[j] == NULL)
2596     {
2597     hints_list[j] = malloc(sizeof(pcre_extra));
2598 ph10 579 if (hints_list[j] == NULL)
2599 ph10 561 {
2600     fprintf(stderr, "pcregrep: malloc failed\n");
2601     pcregrep_exit(2);
2602     }
2603     }
2604     if (match_limit > 0)
2605 ph10 579 {
2606 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT;
2607     hints_list[j]->match_limit = match_limit;
2608 ph10 579 }
2609 ph10 561 if (match_limit_recursion > 0)
2610 ph10 579 {
2611 ph10 561 hints_list[j]->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2612     hints_list[j]->match_limit_recursion = match_limit_recursion;
2613 ph10 579 }
2614 ph10 561 }
2615 ph10 579 }
2616 ph10 561
2617 nigel 77 /* If there are include or exclude patterns, compile them. */
2618    
2619     if (exclude_pattern != NULL)
2620     {
2621 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2622     pcretables);
2623 nigel 77 if (exclude_compiled == NULL)
2624     {
2625     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2626     errptr, error);
2627 ph10 121 goto EXIT2;
2628 nigel 77 }
2629     }
2630    
2631     if (include_pattern != NULL)
2632     {
2633 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2634     pcretables);
2635 nigel 77 if (include_compiled == NULL)
2636     {
2637     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2638     errptr, error);
2639 ph10 121 goto EXIT2;
2640 nigel 77 }
2641     }
2642    
2643 ph10 325 if (exclude_dir_pattern != NULL)
2644     {
2645     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2646     pcretables);
2647     if (exclude_dir_compiled == NULL)
2648     {
2649     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2650     errptr, error);
2651     goto EXIT2;
2652     }
2653     }
2654    
2655     if (include_dir_pattern != NULL)
2656     {
2657     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2658     pcretables);
2659     if (include_dir_compiled == NULL)
2660     {
2661     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2662     errptr, error);
2663     goto EXIT2;
2664     }
2665     }
2666    
2667 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2668 nigel 49
2669 nigel 87 if (i >= argc)
2670 ph10 121 {
2671 ph10 654 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
2672 ph10 644 (filenames > FN_DEFAULT)? stdin_name : NULL);
2673 ph10 121 goto EXIT;
2674 ph10 123 }
2675 nigel 49
2676 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2677     Pass in the fact that there is only one argument at top level - this suppresses
2678 nigel 87 the file name if the argument is not a directory and filenames are not
2679     otherwise forced. */
2680 nigel 49
2681 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2682 nigel 49
2683     for (; i < argc; i++)
2684     {
2685 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2686     only_one_at_top);
2687 nigel 77 if (frc > 1) rc = frc;
2688     else if (frc == 0 && rc == 1) rc = 0;
2689 nigel 49 }
2690    
2691 ph10 121 EXIT:
2692 ph10 644 if (main_buffer != NULL) free(main_buffer);
2693 ph10 121 if (pattern_list != NULL)
2694     {
2695 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2696 ph10 121 free(pattern_list);
2697 ph10 123 }
2698 ph10 121 if (hints_list != NULL)
2699     {
2700 ph10 579 for (i = 0; i < hint_count; i++)
2701 ph10 561 {
2702 ph10 667 if (hints_list[i] != NULL) pcre_free_study(hints_list[i]);
2703 ph10 579 }
2704 ph10 121 free(hints_list);
2705 ph10 123 }
2706 ph10 561 pcregrep_exit(rc);
2707 ph10 121
2708     EXIT2:
2709     rc = 2;
2710     goto EXIT;
2711 nigel 49 }
2712    
2713 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12