/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 515 - (hide annotations) (download)
Tue May 4 09:12:25 2010 UTC (4 years, 6 months ago) by ph10
File MIME type: text/plain
File size: 70154 byte(s)
Avoid warnings about fwrite in pcregrep.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 515 Copyright (c) 1997-2010 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 ph10 378 #define OFFSET_SIZE 99
75 nigel 49
76 nigel 77 #if BUFSIZ > 8192
77     #define MBUFTHIRD BUFSIZ
78     #else
79     #define MBUFTHIRD 8192
80     #endif
81 nigel 49
82 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
83     output. The order is important; it is assumed that a file name is wanted for
84     all values greater than FN_DEFAULT. */
85 nigel 77
86 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87 nigel 87
88 ph10 286 /* File reading styles */
89    
90     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92 nigel 87 /* Actions for the -d and -D options */
93    
94     enum { dee_READ, dee_SKIP, dee_RECURSE };
95     enum { DEE_READ, DEE_SKIP };
96    
97     /* Actions for special processing options (flag bits) */
98    
99     #define PO_WORD_MATCH 0x0001
100     #define PO_LINE_MATCH 0x0002
101     #define PO_FIXED_STRINGS 0x0004
102    
103 nigel 93 /* Line ending types */
104 nigel 87
105 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 nigel 87
107 ph10 515 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108     environments), a warning is issued if the value of fwrite() is ignored.
109     Unfortunately, casting to (void) does not suppress the warning. To get round
110     this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111     apply to fprintf(). */
112 nigel 93
113 ph10 515 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
114 nigel 93
115 ph10 515
116    
117 nigel 49 /*************************************************
118     * Global variables *
119     *************************************************/
120    
121 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
122     regular code. */
123    
124     #ifdef JFRIEDL_DEBUG
125     static int S_arg = -1;
126 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128     static const char *jfriedl_prefix = "";
129     static const char *jfriedl_postfix = "";
130 nigel 87 #endif
131    
132 nigel 93 static int endlinetype;
133 nigel 91
134 nigel 87 static char *colour_string = (char *)"1;31";
135     static char *colour_option = NULL;
136     static char *dee_option = NULL;
137     static char *DEE_option = NULL;
138 nigel 91 static char *newline = NULL;
139 nigel 53 static char *pattern_filename = NULL;
140 nigel 77 static char *stdin_name = (char *)"(standard input)";
141 nigel 87 static char *locale = NULL;
142    
143     static const unsigned char *pcretables = NULL;
144    
145 nigel 53 static int pattern_count = 0;
146 ph10 121 static pcre **pattern_list = NULL;
147     static pcre_extra **hints_list = NULL;
148 nigel 49
149 nigel 77 static char *include_pattern = NULL;
150     static char *exclude_pattern = NULL;
151 ph10 325 static char *include_dir_pattern = NULL;
152     static char *exclude_dir_pattern = NULL;
153 nigel 77
154     static pcre *include_compiled = NULL;
155     static pcre *exclude_compiled = NULL;
156 ph10 325 static pcre *include_dir_compiled = NULL;
157     static pcre *exclude_dir_compiled = NULL;
158 nigel 77
159     static int after_context = 0;
160     static int before_context = 0;
161     static int both_context = 0;
162 nigel 87 static int dee_action = dee_READ;
163     static int DEE_action = DEE_READ;
164     static int error_count = 0;
165     static int filenames = FN_DEFAULT;
166     static int process_options = 0;
167 nigel 77
168 nigel 49 static BOOL count_only = FALSE;
169 nigel 87 static BOOL do_colour = FALSE;
170 ph10 280 static BOOL file_offsets = FALSE;
171 nigel 77 static BOOL hyphenpending = FALSE;
172 nigel 49 static BOOL invert = FALSE;
173 ph10 280 static BOOL line_offsets = FALSE;
174 nigel 77 static BOOL multiline = FALSE;
175 nigel 49 static BOOL number = FALSE;
176 ph10 420 static BOOL omit_zero_count = FALSE;
177 nigel 87 static BOOL only_matching = FALSE;
178 nigel 77 static BOOL quiet = FALSE;
179 nigel 49 static BOOL silent = FALSE;
180 nigel 93 static BOOL utf8 = FALSE;
181 nigel 49
182 nigel 53 /* Structure for options and list of them */
183 nigel 49
184 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
185     OP_PATLIST };
186 nigel 77
187 nigel 53 typedef struct option_item {
188 nigel 77 int type;
189 nigel 53 int one_char;
190 nigel 77 void *dataptr;
191 nigel 67 const char *long_name;
192     const char *help_text;
193 nigel 53 } option_item;
194 nigel 49
195 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
196     used to identify them. */
197    
198 ph10 325 #define N_COLOUR (-1)
199     #define N_EXCLUDE (-2)
200     #define N_EXCLUDE_DIR (-3)
201     #define N_HELP (-4)
202     #define N_INCLUDE (-5)
203     #define N_INCLUDE_DIR (-6)
204     #define N_LABEL (-7)
205     #define N_LOCALE (-8)
206     #define N_NULL (-9)
207     #define N_LOFFSETS (-10)
208     #define N_FOFFSETS (-11)
209 nigel 87
210 nigel 53 static option_item optionlist[] = {
211 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
212     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
213     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
214     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
215     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
216     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
217     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
218     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
219     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
220     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
221 ph10 422 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
222 ph10 421 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
223 nigel 87 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
224 ph10 280 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
225 nigel 87 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
226     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
227     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
228     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
229     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
230     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
231 ph10 280 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
232 nigel 87 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
233     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
234 ph10 280 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
235 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
236     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
237     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
238     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
239     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
240     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
241 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
242     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
243 nigel 87 #ifdef JFRIEDL_DEBUG
244     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
245     #endif
246     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
247     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
248     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
249     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
250     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
251     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
252     { OP_NODATA, 0, NULL, NULL, NULL }
253 nigel 53 };
254    
255 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
256     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
257     that the combination of -w and -x has the same effect as -x on its own, so we
258     can treat them as the same. */
259 nigel 53
260 nigel 87 static const char *prefix[] = {
261     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
262    
263     static const char *suffix[] = {
264     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
265    
266 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
267 nigel 87
268 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
269 nigel 87
270 nigel 93 const char utf8_table4[] = {
271     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
272     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
273     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
274     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
275    
276    
277    
278 nigel 53 /*************************************************
279 nigel 87 * OS-specific functions *
280 nigel 53 *************************************************/
281    
282     /* These functions are defined so that they can be made system specific,
283 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
284 nigel 53
285    
286     /************* Directory scanning in Unix ***********/
287    
288 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
289 nigel 53 #include <sys/types.h>
290     #include <sys/stat.h>
291     #include <dirent.h>
292    
293     typedef DIR directory_type;
294    
295 nigel 67 static int
296 nigel 53 isdirectory(char *filename)
297     {
298     struct stat statbuf;
299     if (stat(filename, &statbuf) < 0)
300     return 0; /* In the expectation that opening as a file will fail */
301     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
302     }
303    
304 nigel 67 static directory_type *
305 nigel 53 opendirectory(char *filename)
306     {
307     return opendir(filename);
308     }
309    
310 nigel 67 static char *
311 nigel 53 readdirectory(directory_type *dir)
312     {
313     for (;;)
314     {
315     struct dirent *dent = readdir(dir);
316     if (dent == NULL) return NULL;
317     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
318     return dent->d_name;
319     }
320 ph10 151 /* Control never reaches here */
321 nigel 53 }
322    
323 nigel 67 static void
324 nigel 53 closedirectory(directory_type *dir)
325     {
326     closedir(dir);
327     }
328    
329    
330 nigel 87 /************* Test for regular file in Unix **********/
331    
332     static int
333     isregfile(char *filename)
334     {
335     struct stat statbuf;
336     if (stat(filename, &statbuf) < 0)
337     return 1; /* In the expectation that opening as a file will fail */
338     return (statbuf.st_mode & S_IFMT) == S_IFREG;
339     }
340    
341    
342     /************* Test stdout for being a terminal in Unix **********/
343    
344     static BOOL
345     is_stdout_tty(void)
346     {
347     return isatty(fileno(stdout));
348     }
349    
350    
351 nigel 63 /************* Directory scanning in Win32 ***********/
352 nigel 53
353 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
354 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
355 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
356     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
357 ph10 283 */
358 nigel 53
359 ph10 97 #elif HAVE_WINDOWS_H
360 nigel 63
361     #ifndef STRICT
362     # define STRICT
363     #endif
364     #ifndef WIN32_LEAN_AND_MEAN
365     # define WIN32_LEAN_AND_MEAN
366     #endif
367 ph10 283
368     #include <windows.h>
369    
370 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
371     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
372     #endif
373    
374 nigel 63 typedef struct directory_type
375     {
376     HANDLE handle;
377     BOOL first;
378     WIN32_FIND_DATA data;
379     } directory_type;
380    
381     int
382     isdirectory(char *filename)
383     {
384     DWORD attr = GetFileAttributes(filename);
385     if (attr == INVALID_FILE_ATTRIBUTES)
386     return 0;
387     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
388     }
389    
390     directory_type *
391     opendirectory(char *filename)
392     {
393     size_t len;
394     char *pattern;
395     directory_type *dir;
396     DWORD err;
397     len = strlen(filename);
398     pattern = (char *) malloc(len + 3);
399     dir = (directory_type *) malloc(sizeof(*dir));
400     if ((pattern == NULL) || (dir == NULL))
401     {
402     fprintf(stderr, "pcregrep: malloc failed\n");
403     exit(2);
404     }
405     memcpy(pattern, filename, len);
406     memcpy(&(pattern[len]), "\\*", 3);
407     dir->handle = FindFirstFile(pattern, &(dir->data));
408     if (dir->handle != INVALID_HANDLE_VALUE)
409     {
410     free(pattern);
411     dir->first = TRUE;
412     return dir;
413     }
414     err = GetLastError();
415     free(pattern);
416     free(dir);
417     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
418     return NULL;
419     }
420    
421     char *
422     readdirectory(directory_type *dir)
423     {
424     for (;;)
425     {
426     if (!dir->first)
427     {
428     if (!FindNextFile(dir->handle, &(dir->data)))
429     return NULL;
430     }
431     else
432     {
433     dir->first = FALSE;
434     }
435     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
436     return dir->data.cFileName;
437     }
438     #ifndef _MSC_VER
439     return NULL; /* Keep compiler happy; never executed */
440     #endif
441     }
442    
443     void
444     closedirectory(directory_type *dir)
445     {
446     FindClose(dir->handle);
447     free(dir);
448     }
449    
450    
451 nigel 87 /************* Test for regular file in Win32 **********/
452    
453     /* I don't know how to do this, or if it can be done; assume all paths are
454     regular if they are not directories. */
455    
456     int isregfile(char *filename)
457     {
458 ph10 283 return !isdirectory(filename);
459 nigel 87 }
460    
461    
462     /************* Test stdout for being a terminal in Win32 **********/
463    
464     /* I don't know how to do this; assume never */
465    
466     static BOOL
467     is_stdout_tty(void)
468     {
469 ph10 283 return FALSE;
470 nigel 87 }
471    
472    
473 nigel 53 /************* Directory scanning when we can't do it ***********/
474    
475     /* The type is void, and apart from isdirectory(), the functions do nothing. */
476    
477 nigel 63 #else
478    
479 nigel 53 typedef void directory_type;
480    
481 nigel 87 int isdirectory(char *filename) { return 0; }
482 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
483     char *readdirectory(directory_type *dir) { return (char*)0;}
484 nigel 53 void closedirectory(directory_type *dir) {}
485    
486 nigel 87
487     /************* Test for regular when we can't do it **********/
488    
489     /* Assume all files are regular. */
490    
491     int isregfile(char *filename) { return 1; }
492    
493    
494     /************* Test stdout for being a terminal when we can't do it **********/
495    
496     static BOOL
497     is_stdout_tty(void)
498     {
499     return FALSE;
500     }
501    
502    
503 nigel 53 #endif
504    
505    
506    
507 ph10 137 #ifndef HAVE_STRERROR
508 nigel 49 /*************************************************
509     * Provide strerror() for non-ANSI libraries *
510     *************************************************/
511    
512     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
513     in their libraries, but can provide the same facility by this simple
514     alternative function. */
515    
516     extern int sys_nerr;
517     extern char *sys_errlist[];
518    
519     char *
520     strerror(int n)
521     {
522     if (n < 0 || n >= sys_nerr) return "unknown error number";
523     return sys_errlist[n];
524     }
525     #endif /* HAVE_STRERROR */
526    
527    
528    
529     /*************************************************
530 nigel 93 * Find end of line *
531     *************************************************/
532    
533     /* The length of the endline sequence that is found is set via lenptr. This may
534     be zero at the very end of the file if there is no line-ending sequence there.
535    
536     Arguments:
537     p current position in line
538     endptr end of available data
539     lenptr where to put the length of the eol sequence
540    
541     Returns: pointer to the last byte of the line
542     */
543    
544     static char *
545     end_of_line(char *p, char *endptr, int *lenptr)
546     {
547     switch(endlinetype)
548     {
549     default: /* Just in case */
550     case EL_LF:
551     while (p < endptr && *p != '\n') p++;
552     if (p < endptr)
553     {
554     *lenptr = 1;
555     return p + 1;
556     }
557     *lenptr = 0;
558     return endptr;
559    
560     case EL_CR:
561     while (p < endptr && *p != '\r') p++;
562     if (p < endptr)
563     {
564     *lenptr = 1;
565     return p + 1;
566     }
567     *lenptr = 0;
568     return endptr;
569    
570     case EL_CRLF:
571     for (;;)
572     {
573     while (p < endptr && *p != '\r') p++;
574     if (++p >= endptr)
575     {
576     *lenptr = 0;
577     return endptr;
578     }
579     if (*p == '\n')
580     {
581     *lenptr = 2;
582     return p + 1;
583     }
584     }
585     break;
586    
587 ph10 149 case EL_ANYCRLF:
588     while (p < endptr)
589     {
590     int extra = 0;
591     register int c = *((unsigned char *)p);
592    
593     if (utf8 && c >= 0xc0)
594     {
595     int gcii, gcss;
596     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
597     gcss = 6*extra;
598     c = (c & utf8_table3[extra]) << gcss;
599     for (gcii = 1; gcii <= extra; gcii++)
600     {
601     gcss -= 6;
602     c |= (p[gcii] & 0x3f) << gcss;
603     }
604     }
605    
606     p += 1 + extra;
607    
608     switch (c)
609     {
610     case 0x0a: /* LF */
611     *lenptr = 1;
612     return p;
613    
614     case 0x0d: /* CR */
615     if (p < endptr && *p == 0x0a)
616     {
617     *lenptr = 2;
618     p++;
619     }
620     else *lenptr = 1;
621     return p;
622 ph10 150
623 ph10 149 default:
624     break;
625     }
626     } /* End of loop for ANYCRLF case */
627 ph10 150
628 ph10 149 *lenptr = 0; /* Must have hit the end */
629     return endptr;
630    
631 nigel 93 case EL_ANY:
632     while (p < endptr)
633     {
634     int extra = 0;
635     register int c = *((unsigned char *)p);
636    
637     if (utf8 && c >= 0xc0)
638     {
639     int gcii, gcss;
640     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
641     gcss = 6*extra;
642     c = (c & utf8_table3[extra]) << gcss;
643     for (gcii = 1; gcii <= extra; gcii++)
644     {
645     gcss -= 6;
646     c |= (p[gcii] & 0x3f) << gcss;
647     }
648     }
649    
650     p += 1 + extra;
651    
652     switch (c)
653     {
654     case 0x0a: /* LF */
655     case 0x0b: /* VT */
656     case 0x0c: /* FF */
657     *lenptr = 1;
658     return p;
659    
660     case 0x0d: /* CR */
661     if (p < endptr && *p == 0x0a)
662     {
663     *lenptr = 2;
664     p++;
665     }
666     else *lenptr = 1;
667     return p;
668    
669     case 0x85: /* NEL */
670     *lenptr = utf8? 2 : 1;
671     return p;
672    
673     case 0x2028: /* LS */
674     case 0x2029: /* PS */
675     *lenptr = 3;
676     return p;
677    
678     default:
679     break;
680     }
681     } /* End of loop for ANY case */
682    
683     *lenptr = 0; /* Must have hit the end */
684     return endptr;
685     } /* End of overall switch */
686     }
687    
688    
689    
690     /*************************************************
691     * Find start of previous line *
692     *************************************************/
693    
694     /* This is called when looking back for before lines to print.
695    
696     Arguments:
697     p start of the subsequent line
698     startptr start of available data
699    
700     Returns: pointer to the start of the previous line
701     */
702    
703     static char *
704     previous_line(char *p, char *startptr)
705     {
706     switch(endlinetype)
707     {
708     default: /* Just in case */
709     case EL_LF:
710     p--;
711     while (p > startptr && p[-1] != '\n') p--;
712     return p;
713    
714     case EL_CR:
715     p--;
716     while (p > startptr && p[-1] != '\n') p--;
717     return p;
718    
719     case EL_CRLF:
720     for (;;)
721     {
722     p -= 2;
723     while (p > startptr && p[-1] != '\n') p--;
724     if (p <= startptr + 1 || p[-2] == '\r') return p;
725     }
726     return p; /* But control should never get here */
727    
728     case EL_ANY:
729 ph10 150 case EL_ANYCRLF:
730 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
731     if (utf8) while ((*p & 0xc0) == 0x80) p--;
732    
733     while (p > startptr)
734     {
735     register int c;
736     char *pp = p - 1;
737    
738     if (utf8)
739     {
740     int extra = 0;
741     while ((*pp & 0xc0) == 0x80) pp--;
742     c = *((unsigned char *)pp);
743     if (c >= 0xc0)
744     {
745     int gcii, gcss;
746     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
747     gcss = 6*extra;
748     c = (c & utf8_table3[extra]) << gcss;
749     for (gcii = 1; gcii <= extra; gcii++)
750     {
751     gcss -= 6;
752     c |= (pp[gcii] & 0x3f) << gcss;
753     }
754     }
755     }
756     else c = *((unsigned char *)pp);
757    
758 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
759 nigel 93 {
760     case 0x0a: /* LF */
761 ph10 149 case 0x0d: /* CR */
762     return p;
763 ph10 150
764 ph10 149 default:
765     break;
766 ph10 150 }
767 ph10 149
768     else switch (c)
769     {
770     case 0x0a: /* LF */
771 nigel 93 case 0x0b: /* VT */
772     case 0x0c: /* FF */
773     case 0x0d: /* CR */
774     case 0x85: /* NEL */
775     case 0x2028: /* LS */
776     case 0x2029: /* PS */
777     return p;
778    
779     default:
780     break;
781     }
782    
783     p = pp; /* Back one character */
784     } /* End of loop for ANY case */
785    
786     return startptr; /* Hit start of data */
787     } /* End of overall switch */
788     }
789    
790    
791    
792    
793    
794     /*************************************************
795 nigel 77 * Print the previous "after" lines *
796 nigel 49 *************************************************/
797    
798 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
799 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
800     that a binary zero does not terminate it.
801 nigel 77
802     Arguments:
803     lastmatchnumber the number of the last matching line, plus one
804     lastmatchrestart where we restarted after the last match
805     endptr end of available data
806     printname filename for printing
807    
808     Returns: nothing
809     */
810    
811     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
812     char *endptr, char *printname)
813     {
814     if (after_context > 0 && lastmatchnumber > 0)
815     {
816     int count = 0;
817     while (lastmatchrestart < endptr && count++ < after_context)
818     {
819 nigel 93 int ellength;
820 nigel 77 char *pp = lastmatchrestart;
821     if (printname != NULL) fprintf(stdout, "%s-", printname);
822     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
823 nigel 93 pp = end_of_line(pp, endptr, &ellength);
824 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
825 nigel 93 lastmatchrestart = pp;
826 nigel 77 }
827     hyphenpending = TRUE;
828     }
829     }
830    
831    
832    
833     /*************************************************
834 ph10 378 * Apply patterns to subject till one matches *
835     *************************************************/
836    
837 ph10 392 /* This function is called to run through all patterns, looking for a match. It
838     is used multiple times for the same subject when colouring is enabled, in order
839 ph10 378 to find all possible matches.
840    
841     Arguments:
842     matchptr the start of the subject
843     length the length of the subject to match
844     offsets the offets vector to fill in
845     mrc address of where to put the result of pcre_exec()
846 ph10 392
847     Returns: TRUE if there was a match
848 ph10 378 FALSE if there was no match
849     invert if there was a non-fatal error
850 ph10 392 */
851 ph10 378
852     static BOOL
853     match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
854     {
855     int i;
856     for (i = 0; i < pattern_count; i++)
857     {
858 ph10 379 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
859     PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
860 ph10 378 if (*mrc >= 0) return TRUE;
861     if (*mrc == PCRE_ERROR_NOMATCH) continue;
862     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
863     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
864     fprintf(stderr, "this text:\n");
865 ph10 515 FWRITE(matchptr, 1, length, stderr); /* In case binary zero included */
866 ph10 378 fprintf(stderr, "\n");
867     if (error_count == 0 &&
868     (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
869     {
870     fprintf(stderr, "pcregrep: error %d means that a resource limit "
871     "was exceeded\n", *mrc);
872     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
873     }
874     if (error_count++ > 20)
875     {
876     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
877     exit(2);
878     }
879     return invert; /* No more matching; don't show the line again */
880     }
881    
882     return FALSE; /* No match, no errors */
883     }
884    
885    
886    
887     /*************************************************
888 nigel 77 * Grep an individual file *
889     *************************************************/
890    
891     /* This is called from grep_or_recurse() below. It uses a buffer that is three
892     times the value of MBUFTHIRD. The matching point is never allowed to stray into
893     the top third of the buffer, thus keeping more of the file available for
894     context printing or for multiline scanning. For large files, the pointer will
895     be in the middle third most of the time, so the bottom third is available for
896     "before" context printing.
897    
898     Arguments:
899 ph10 286 handle the fopened FILE stream for a normal file
900     the gzFile pointer when reading is via libz
901     the BZFILE pointer when reading is via libbz2
902     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
903 nigel 77 printname the file name if it is to be printed for each match
904     or NULL if the file name is not to be printed
905     it cannot be NULL if filenames[_nomatch]_only is set
906    
907     Returns: 0 if there was at least one match
908     1 otherwise (no matches)
909 ph10 286 2 if there is a read error on a .bz2 file
910 nigel 77 */
911    
912 nigel 49 static int
913 ph10 286 pcregrep(void *handle, int frtype, char *printname)
914 nigel 49 {
915     int rc = 1;
916 nigel 77 int linenumber = 1;
917     int lastmatchnumber = 0;
918 nigel 49 int count = 0;
919 ph10 280 int filepos = 0;
920 ph10 378 int offsets[OFFSET_SIZE];
921 nigel 77 char *lastmatchrestart = NULL;
922     char buffer[3*MBUFTHIRD];
923     char *ptr = buffer;
924     char *endptr;
925     size_t bufflength;
926     BOOL endhyphenpending = FALSE;
927 ph10 286 FILE *in = NULL; /* Ensure initialized */
928 nigel 49
929 ph10 286 #ifdef SUPPORT_LIBZ
930     gzFile ingz = NULL;
931     #endif
932 nigel 77
933 ph10 286 #ifdef SUPPORT_LIBBZ2
934     BZFILE *inbz2 = NULL;
935     #endif
936    
937    
938     /* Do the first read into the start of the buffer and set up the pointer to end
939     of what we have. In the case of libz, a non-zipped .gz file will be read as a
940     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
941     fail. */
942    
943     #ifdef SUPPORT_LIBZ
944     if (frtype == FR_LIBZ)
945     {
946     ingz = (gzFile)handle;
947     bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
948     }
949     else
950     #endif
951    
952     #ifdef SUPPORT_LIBBZ2
953     if (frtype == FR_LIBBZ2)
954     {
955     inbz2 = (BZFILE *)handle;
956     bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
957     if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
958     } /* without the cast it is unsigned. */
959     else
960     #endif
961    
962     {
963     in = (FILE *)handle;
964     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
965     }
966    
967 nigel 77 endptr = buffer + bufflength;
968    
969     /* Loop while the current pointer is not at the end of the file. For large
970     files, endptr will be at the end of the buffer when we are in the middle of the
971     file, but ptr will never get there, because as soon as it gets over 2/3 of the
972     way, the buffer is shifted left and re-filled. */
973    
974     while (ptr < endptr)
975 nigel 49 {
976 ph10 378 int endlinelength;
977 nigel 87 int mrc = 0;
978 ph10 378 BOOL match;
979 ph10 286 char *matchptr = ptr;
980 nigel 77 char *t = ptr;
981     size_t length, linelength;
982 nigel 49
983 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
984     of the subject string to pass to pcre_exec(). In multiline mode, it is the
985     length remainder of the data in the buffer. Otherwise, it is the length of
986 ph10 378 the next line, excluding the terminating newline. After matching, we always
987     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
988     option is used for compiling, so that any match is constrained to be in the
989     first line. */
990 nigel 77
991 nigel 93 t = end_of_line(t, endptr, &endlinelength);
992     linelength = t - ptr - endlinelength;
993 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
994 nigel 77
995 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
996    
997     #ifdef JFRIEDL_DEBUG
998     if (jfriedl_XT || jfriedl_XR)
999     {
1000     #include <sys/time.h>
1001     #include <time.h>
1002     struct timeval start_time, end_time;
1003     struct timezone dummy;
1004 ph10 392 int i;
1005 nigel 89
1006     if (jfriedl_XT)
1007     {
1008     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1009     const char *orig = ptr;
1010     ptr = malloc(newlen + 1);
1011     if (!ptr) {
1012     printf("out of memory");
1013     exit(2);
1014     }
1015     endptr = ptr;
1016     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1017     for (i = 0; i < jfriedl_XT; i++) {
1018     strncpy(endptr, orig, length);
1019     endptr += length;
1020     }
1021     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1022     length = newlen;
1023     }
1024    
1025     if (gettimeofday(&start_time, &dummy) != 0)
1026     perror("bad gettimeofday");
1027    
1028    
1029     for (i = 0; i < jfriedl_XR; i++)
1030 ph10 392 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1031 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1032 nigel 89
1033     if (gettimeofday(&end_time, &dummy) != 0)
1034     perror("bad gettimeofday");
1035    
1036     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1037     -
1038     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1039    
1040     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1041     return 0;
1042     }
1043     #endif
1044    
1045 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
1046 ph10 279 in order to find any further matches in the same line. */
1047 nigel 89
1048 ph10 286 ONLY_MATCHING_RESTART:
1049    
1050 ph10 392 /* Run through all the patterns until one matches or there is an error other
1051 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1052     finding subsequent matches when colouring matched lines. */
1053 ph10 392
1054 ph10 378 match = match_patterns(matchptr, length, offsets, &mrc);
1055 nigel 77
1056 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1057 nigel 77
1058 nigel 49 if (match != invert)
1059     {
1060 nigel 77 BOOL hyphenprinted = FALSE;
1061    
1062 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1063 nigel 77
1064 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1065    
1066     /* Just count if just counting is wanted. */
1067    
1068 nigel 49 if (count_only) count++;
1069    
1070 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1071     in the file. */
1072    
1073 ph10 420 else if (filenames == FN_MATCH_ONLY)
1074 nigel 49 {
1075 nigel 77 fprintf(stdout, "%s\n", printname);
1076 nigel 49 return 0;
1077     }
1078    
1079 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1080    
1081 nigel 77 else if (quiet) return 0;
1082 nigel 49
1083 nigel 87 /* The --only-matching option prints just the substring that matched, and
1084 ph10 286 the --file-offsets and --line-offsets options output offsets for the
1085 ph10 280 matching substring (they both force --only-matching). None of these options
1086     prints any context. Afterwards, adjust the start and length, and then jump
1087     back to look for further matches in the same line. If we are in invert
1088     mode, however, nothing is printed - this could be still useful because the
1089     return code is set. */
1090 nigel 87
1091     else if (only_matching)
1092     {
1093 ph10 279 if (!invert)
1094 ph10 286 {
1095 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1096     if (number) fprintf(stdout, "%d:", linenumber);
1097 ph10 280 if (line_offsets)
1098 ph10 357 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1099 ph10 286 offsets[1] - offsets[0]);
1100 ph10 280 else if (file_offsets)
1101 ph10 357 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1102 ph10 286 offsets[1] - offsets[0]);
1103     else
1104 ph10 377 {
1105     if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1106 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1107 ph10 377 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1108 ph10 392 }
1109 ph10 279 fprintf(stdout, "\n");
1110     matchptr += offsets[1];
1111     length -= offsets[1];
1112 ph10 286 match = FALSE;
1113     goto ONLY_MATCHING_RESTART;
1114     }
1115 nigel 87 }
1116    
1117     /* This is the default case when none of the above options is set. We print
1118     the matching lines(s), possibly preceded and/or followed by other lines of
1119     context. */
1120    
1121 nigel 49 else
1122     {
1123 nigel 77 /* See if there is a requirement to print some "after" lines from a
1124     previous match. We never print any overlaps. */
1125    
1126     if (after_context > 0 && lastmatchnumber > 0)
1127     {
1128 nigel 93 int ellength;
1129 nigel 77 int linecount = 0;
1130     char *p = lastmatchrestart;
1131    
1132     while (p < ptr && linecount < after_context)
1133     {
1134 nigel 93 p = end_of_line(p, ptr, &ellength);
1135 nigel 77 linecount++;
1136     }
1137    
1138     /* It is important to advance lastmatchrestart during this printing so
1139 nigel 87 that it interacts correctly with any "before" printing below. Print
1140     each line's data using fwrite() in case there are binary zeroes. */
1141 nigel 77
1142     while (lastmatchrestart < p)
1143     {
1144     char *pp = lastmatchrestart;
1145     if (printname != NULL) fprintf(stdout, "%s-", printname);
1146     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1147 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1148 ph10 515 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1149 nigel 93 lastmatchrestart = pp;
1150 nigel 77 }
1151     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1152     }
1153    
1154     /* If there were non-contiguous lines printed above, insert hyphens. */
1155    
1156     if (hyphenpending)
1157     {
1158     fprintf(stdout, "--\n");
1159     hyphenpending = FALSE;
1160     hyphenprinted = TRUE;
1161     }
1162    
1163     /* See if there is a requirement to print some "before" lines for this
1164     match. Again, don't print overlaps. */
1165    
1166     if (before_context > 0)
1167     {
1168     int linecount = 0;
1169     char *p = ptr;
1170    
1171     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1172 nigel 87 linecount < before_context)
1173 nigel 77 {
1174 nigel 87 linecount++;
1175 nigel 93 p = previous_line(p, buffer);
1176 nigel 77 }
1177    
1178     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1179     fprintf(stdout, "--\n");
1180    
1181     while (p < ptr)
1182     {
1183 nigel 93 int ellength;
1184 nigel 77 char *pp = p;
1185     if (printname != NULL) fprintf(stdout, "%s-", printname);
1186     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1187 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1188 ph10 515 FWRITE(p, 1, pp - p, stdout);
1189 nigel 93 p = pp;
1190 nigel 77 }
1191     }
1192    
1193     /* Now print the matching line(s); ensure we set hyphenpending at the end
1194 nigel 85 of the file if any context lines are being output. */
1195 nigel 77
1196 nigel 85 if (after_context > 0 || before_context > 0)
1197     endhyphenpending = TRUE;
1198    
1199 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1200 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1201 nigel 77
1202     /* In multiline mode, we want to print to the end of the line in which
1203     the end of the matched string is found, so we adjust linelength and the
1204 ph10 222 line number appropriately, but only when there actually was a match
1205     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1206     the match will always be before the first newline sequence. */
1207 nigel 77
1208     if (multiline)
1209     {
1210 nigel 93 int ellength;
1211 ph10 222 char *endmatch = ptr;
1212     if (!invert)
1213 nigel 93 {
1214 ph10 222 endmatch += offsets[1];
1215     t = ptr;
1216     while (t < endmatch)
1217     {
1218     t = end_of_line(t, endptr, &ellength);
1219     if (t <= endmatch) linenumber++; else break;
1220     }
1221 nigel 93 }
1222     endmatch = end_of_line(endmatch, endptr, &ellength);
1223     linelength = endmatch - ptr - ellength;
1224 nigel 77 }
1225    
1226 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1227     zeroes are treated as just another data character. */
1228    
1229     /* This extra option, for Jeffrey Friedl's debugging requirements,
1230     replaces the matched string, or a specific captured string if it exists,
1231     with X. When this happens, colouring is ignored. */
1232    
1233     #ifdef JFRIEDL_DEBUG
1234     if (S_arg >= 0 && S_arg < mrc)
1235     {
1236     int first = S_arg * 2;
1237     int last = first + 1;
1238 ph10 515 FWRITE(ptr, 1, offsets[first], stdout);
1239 nigel 87 fprintf(stdout, "X");
1240 ph10 515 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1241 nigel 87 }
1242     else
1243     #endif
1244    
1245 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1246 ph10 378 matches. */
1247 nigel 87
1248     if (do_colour)
1249     {
1250 ph10 392 int last_offset = 0;
1251 ph10 515 FWRITE(ptr, 1, offsets[0], stdout);
1252 nigel 87 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1253 ph10 515 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1254 nigel 87 fprintf(stdout, "%c[00m", 0x1b);
1255 ph10 378 for (;;)
1256     {
1257 ph10 392 last_offset += offsets[1];
1258 ph10 378 matchptr += offsets[1];
1259     length -= offsets[1];
1260     if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1261 ph10 515 FWRITE(matchptr, 1, offsets[0], stdout);
1262 ph10 378 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1263 ph10 515 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1264 ph10 378 fprintf(stdout, "%c[00m", 0x1b);
1265     }
1266 ph10 515 FWRITE(ptr + last_offset, 1,
1267     (linelength + endlinelength) - last_offset, stdout);
1268 nigel 87 }
1269 ph10 392
1270 ph10 378 /* Not colouring; no need to search for further matches */
1271 ph10 392
1272 ph10 515 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1273 nigel 49 }
1274    
1275 nigel 87 /* End of doing what has to be done for a match */
1276    
1277 nigel 77 rc = 0; /* Had some success */
1278    
1279     /* Remember where the last match happened for after_context. We remember
1280     where we are about to restart, and that line's number. */
1281    
1282 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1283 nigel 77 lastmatchnumber = linenumber + 1;
1284 nigel 49 }
1285 nigel 77
1286 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1287     anything to be printed), we have to move on to the end of the match before
1288     proceeding. */
1289    
1290     if (multiline && invert && match)
1291     {
1292     int ellength;
1293     char *endmatch = ptr + offsets[1];
1294     t = ptr;
1295     while (t < endmatch)
1296     {
1297     t = end_of_line(t, endptr, &ellength);
1298     if (t <= endmatch) linenumber++; else break;
1299     }
1300     endmatch = end_of_line(endmatch, endptr, &ellength);
1301     linelength = endmatch - ptr - ellength;
1302     }
1303    
1304 ph10 286 /* Advance to after the newline and increment the line number. The file
1305 ph10 280 offset to the current line is maintained in filepos. */
1306 nigel 77
1307 nigel 93 ptr += linelength + endlinelength;
1308 ph10 280 filepos += linelength + endlinelength;
1309 nigel 77 linenumber++;
1310    
1311     /* If we haven't yet reached the end of the file (the buffer is full), and
1312     the current point is in the top 1/3 of the buffer, slide the buffer down by
1313     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1314     about to be lost, print them. */
1315    
1316     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1317     {
1318     if (after_context > 0 &&
1319     lastmatchnumber > 0 &&
1320     lastmatchrestart < buffer + MBUFTHIRD)
1321     {
1322     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1323     lastmatchnumber = 0;
1324     }
1325    
1326     /* Now do the shuffle */
1327    
1328     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1329     ptr -= MBUFTHIRD;
1330 ph10 286
1331     #ifdef SUPPORT_LIBZ
1332     if (frtype == FR_LIBZ)
1333     bufflength = 2*MBUFTHIRD +
1334     gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1335     else
1336     #endif
1337    
1338     #ifdef SUPPORT_LIBBZ2
1339     if (frtype == FR_LIBBZ2)
1340     bufflength = 2*MBUFTHIRD +
1341     BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1342     else
1343     #endif
1344    
1345 nigel 77 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1346 ph10 286
1347 nigel 77 endptr = buffer + bufflength;
1348    
1349     /* Adjust any last match point */
1350    
1351     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1352     }
1353     } /* Loop through the whole file */
1354    
1355     /* End of file; print final "after" lines if wanted; do_after_lines sets
1356     hyphenpending if it prints something. */
1357    
1358 nigel 87 if (!only_matching && !count_only)
1359     {
1360     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1361     hyphenpending |= endhyphenpending;
1362     }
1363 nigel 77
1364     /* Print the file name if we are looking for those without matches and there
1365     were none. If we found a match, we won't have got this far. */
1366    
1367 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1368 nigel 77 {
1369     fprintf(stdout, "%s\n", printname);
1370     return 0;
1371 nigel 49 }
1372    
1373 nigel 77 /* Print the match count if wanted */
1374    
1375 nigel 49 if (count_only)
1376     {
1377 ph10 420 if (count > 0 || !omit_zero_count)
1378 ph10 461 {
1379     if (printname != NULL && filenames != FN_NONE)
1380 ph10 420 fprintf(stdout, "%s:", printname);
1381     fprintf(stdout, "%d\n", count);
1382 ph10 461 }
1383 nigel 49 }
1384    
1385     return rc;
1386     }
1387    
1388    
1389    
1390     /*************************************************
1391 nigel 53 * Grep a file or recurse into a directory *
1392     *************************************************/
1393    
1394 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1395     recursing; if it's a file, grep it.
1396    
1397     Arguments:
1398     pathname the path to investigate
1399 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1400 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1401    
1402     Returns: 0 if there was at least one match
1403     1 if there were no matches
1404     2 there was some kind of error
1405    
1406     However, file opening failures are suppressed if "silent" is set.
1407     */
1408    
1409 nigel 53 static int
1410 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1411 nigel 53 {
1412     int rc = 1;
1413     int sep;
1414 ph10 286 int frtype;
1415     int pathlen;
1416     void *handle;
1417     FILE *in = NULL; /* Ensure initialized */
1418 nigel 53
1419 ph10 286 #ifdef SUPPORT_LIBZ
1420     gzFile ingz = NULL;
1421     #endif
1422    
1423     #ifdef SUPPORT_LIBBZ2
1424     BZFILE *inbz2 = NULL;
1425     #endif
1426    
1427 nigel 77 /* If the file name is "-" we scan stdin */
1428 nigel 53
1429 nigel 77 if (strcmp(pathname, "-") == 0)
1430 nigel 53 {
1431 ph10 286 return pcregrep(stdin, FR_PLAIN,
1432 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1433 nigel 77 stdin_name : NULL);
1434     }
1435    
1436 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1437 ph10 325 each file and directory within it, subject to any include or exclude patterns
1438     that were set. The scanning code is localized so it can be made
1439     system-specific. */
1440 nigel 87
1441     if ((sep = isdirectory(pathname)) != 0)
1442 nigel 77 {
1443 nigel 87 if (dee_action == dee_SKIP) return 1;
1444     if (dee_action == dee_RECURSE)
1445 nigel 53 {
1446 nigel 87 char buffer[1024];
1447     char *nextfile;
1448     directory_type *dir = opendirectory(pathname);
1449 nigel 53
1450 nigel 87 if (dir == NULL)
1451     {
1452     if (!silent)
1453     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1454     strerror(errno));
1455     return 2;
1456     }
1457 nigel 77
1458 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1459     {
1460 ph10 324 int frc, nflen;
1461 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1462 ph10 324 nflen = strlen(nextfile);
1463 ph10 345
1464 ph10 325 if (isdirectory(buffer))
1465     {
1466     if (exclude_dir_compiled != NULL &&
1467     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1468     continue;
1469 ph10 345
1470 ph10 325 if (include_dir_compiled != NULL &&
1471     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1472     continue;
1473     }
1474 ph10 345 else
1475     {
1476 ph10 324 if (exclude_compiled != NULL &&
1477     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1478     continue;
1479 ph10 345
1480 ph10 324 if (include_compiled != NULL &&
1481     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1482     continue;
1483 ph10 345 }
1484 nigel 77
1485 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1486     if (frc > 1) rc = frc;
1487     else if (frc == 0 && rc == 1) rc = 0;
1488     }
1489    
1490     closedirectory(dir);
1491     return rc;
1492 nigel 53 }
1493     }
1494    
1495 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1496     been requested. */
1497 nigel 53
1498 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1499    
1500     /* Control reaches here if we have a regular file, or if we have a directory
1501     and recursion or skipping was not requested, or if we have anything else and
1502     skipping was not requested. The scan proceeds. If this is the first and only
1503     argument at top level, we don't show the file name, unless we are only showing
1504     the file name, or the filename was forced (-H). */
1505    
1506 ph10 286 pathlen = strlen(pathname);
1507    
1508     /* Open using zlib if it is supported and the file name ends with .gz. */
1509    
1510     #ifdef SUPPORT_LIBZ
1511     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1512 nigel 53 {
1513 ph10 286 ingz = gzopen(pathname, "rb");
1514     if (ingz == NULL)
1515     {
1516     if (!silent)
1517     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1518     strerror(errno));
1519     return 2;
1520     }
1521     handle = (void *)ingz;
1522     frtype = FR_LIBZ;
1523     }
1524     else
1525     #endif
1526    
1527     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1528    
1529     #ifdef SUPPORT_LIBBZ2
1530     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1531     {
1532     inbz2 = BZ2_bzopen(pathname, "rb");
1533     handle = (void *)inbz2;
1534     frtype = FR_LIBBZ2;
1535     }
1536     else
1537     #endif
1538    
1539     /* Otherwise use plain fopen(). The label is so that we can come back here if
1540     an attempt to read a .bz2 file indicates that it really is a plain file. */
1541    
1542     #ifdef SUPPORT_LIBBZ2
1543     PLAIN_FILE:
1544     #endif
1545     {
1546 ph10 419 in = fopen(pathname, "rb");
1547 ph10 286 handle = (void *)in;
1548     frtype = FR_PLAIN;
1549     }
1550    
1551     /* All the opening methods return errno when they fail. */
1552    
1553     if (handle == NULL)
1554     {
1555 nigel 77 if (!silent)
1556     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1557     strerror(errno));
1558 nigel 53 return 2;
1559     }
1560    
1561 ph10 286 /* Now grep the file */
1562    
1563     rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1564 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1565 nigel 77
1566 ph10 286 /* Close in an appropriate manner. */
1567    
1568     #ifdef SUPPORT_LIBZ
1569     if (frtype == FR_LIBZ)
1570     gzclose(ingz);
1571     else
1572     #endif
1573    
1574     /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1575     read failed. If the error indicates that the file isn't in fact bzipped, try
1576     again as a normal file. */
1577    
1578     #ifdef SUPPORT_LIBBZ2
1579     if (frtype == FR_LIBBZ2)
1580     {
1581     if (rc == 2)
1582     {
1583     int errnum;
1584     const char *err = BZ2_bzerror(inbz2, &errnum);
1585     if (errnum == BZ_DATA_ERROR_MAGIC)
1586     {
1587     BZ2_bzclose(inbz2);
1588     goto PLAIN_FILE;
1589     }
1590     else if (!silent)
1591     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1592     pathname, err);
1593     }
1594     BZ2_bzclose(inbz2);
1595     }
1596     else
1597     #endif
1598    
1599     /* Normal file close */
1600    
1601 nigel 53 fclose(in);
1602 ph10 286
1603     /* Pass back the yield from pcregrep(). */
1604    
1605 nigel 53 return rc;
1606     }
1607    
1608    
1609    
1610    
1611     /*************************************************
1612 nigel 49 * Usage function *
1613     *************************************************/
1614    
1615     static int
1616     usage(int rc)
1617     {
1618 nigel 87 option_item *op;
1619     fprintf(stderr, "Usage: pcregrep [-");
1620     for (op = optionlist; op->one_char != 0; op++)
1621     {
1622     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1623     }
1624     fprintf(stderr, "] [long options] [pattern] [files]\n");
1625 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1626     "options.\n");
1627 nigel 49 return rc;
1628     }
1629    
1630    
1631    
1632    
1633     /*************************************************
1634 nigel 53 * Help function *
1635     *************************************************/
1636    
1637     static void
1638     help(void)
1639     {
1640     option_item *op;
1641    
1642 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1643 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1644 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1645 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1646    
1647     #ifdef SUPPORT_LIBZ
1648     printf("Files whose names end in .gz are read using zlib.\n");
1649     #endif
1650    
1651     #ifdef SUPPORT_LIBBZ2
1652     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1653     #endif
1654    
1655     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1656     printf("Other files and the standard input are read as plain files.\n\n");
1657     #else
1658     printf("All files are read as plain files, without any interpretation.\n\n");
1659     #endif
1660    
1661 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1662     printf("Options:\n");
1663    
1664     for (op = optionlist; op->one_char != 0; op++)
1665     {
1666     int n;
1667     char s[4];
1668     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1669 ph10 296 n = 30 - printf(" %s --%s", s, op->long_name);
1670 nigel 53 if (n < 1) n = 1;
1671     printf("%.*s%s\n", n, " ", op->help_text);
1672     }
1673    
1674 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1675     printf("trailing white space is removed and blank lines are ignored.\n");
1676     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1677 nigel 53
1678 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1679 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1680     }
1681    
1682    
1683    
1684    
1685     /*************************************************
1686 nigel 77 * Handle a single-letter, no data option *
1687 nigel 53 *************************************************/
1688    
1689     static int
1690     handle_option(int letter, int options)
1691     {
1692     switch(letter)
1693     {
1694 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1695 nigel 87 case N_HELP: help(); exit(0);
1696 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1697 nigel 53 case 'c': count_only = TRUE; break;
1698 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1699     case 'H': filenames = FN_FORCE; break;
1700     case 'h': filenames = FN_NONE; break;
1701 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1702 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1703 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
1704 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1705 nigel 53 case 'n': number = TRUE; break;
1706 nigel 87 case 'o': only_matching = TRUE; break;
1707 nigel 77 case 'q': quiet = TRUE; break;
1708 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1709 nigel 53 case 's': silent = TRUE; break;
1710 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1711 nigel 53 case 'v': invert = TRUE; break;
1712 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1713     case 'x': process_options |= PO_LINE_MATCH; break;
1714 nigel 53
1715     case 'V':
1716 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1717 nigel 53 exit(0);
1718     break;
1719    
1720     default:
1721     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1722     exit(usage(2));
1723     }
1724    
1725     return options;
1726     }
1727    
1728    
1729    
1730    
1731     /*************************************************
1732 nigel 87 * Construct printed ordinal *
1733     *************************************************/
1734    
1735     /* This turns a number into "1st", "3rd", etc. */
1736    
1737     static char *
1738     ordin(int n)
1739     {
1740     static char buffer[8];
1741     char *p = buffer;
1742     sprintf(p, "%d", n);
1743     while (*p != 0) p++;
1744     switch (n%10)
1745     {
1746     case 1: strcpy(p, "st"); break;
1747     case 2: strcpy(p, "nd"); break;
1748     case 3: strcpy(p, "rd"); break;
1749     default: strcpy(p, "th"); break;
1750     }
1751     return buffer;
1752     }
1753    
1754    
1755    
1756     /*************************************************
1757     * Compile a single pattern *
1758     *************************************************/
1759    
1760     /* When the -F option has been used, this is called for each substring.
1761     Otherwise it's called for each supplied pattern.
1762    
1763     Arguments:
1764     pattern the pattern string
1765     options the PCRE options
1766     filename the file name, or NULL for a command-line pattern
1767     count 0 if this is the only command line pattern, or
1768     number of the command line pattern, or
1769     linenumber for a pattern from a file
1770    
1771     Returns: TRUE on success, FALSE after an error
1772     */
1773    
1774     static BOOL
1775     compile_single_pattern(char *pattern, int options, char *filename, int count)
1776     {
1777     char buffer[MBUFTHIRD + 16];
1778     const char *error;
1779     int errptr;
1780    
1781     if (pattern_count >= MAX_PATTERN_COUNT)
1782     {
1783     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1784     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1785     return FALSE;
1786     }
1787    
1788     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1789     suffix[process_options]);
1790     pattern_list[pattern_count] =
1791     pcre_compile(buffer, options, &error, &errptr, pcretables);
1792 ph10 142 if (pattern_list[pattern_count] != NULL)
1793 ph10 141 {
1794 ph10 142 pattern_count++;
1795 ph10 141 return TRUE;
1796 ph10 142 }
1797 nigel 87
1798     /* Handle compile errors */
1799    
1800     errptr -= (int)strlen(prefix[process_options]);
1801     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1802    
1803     if (filename == NULL)
1804     {
1805     if (count == 0)
1806     fprintf(stderr, "pcregrep: Error in command-line regex "
1807     "at offset %d: %s\n", errptr, error);
1808     else
1809     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1810     "at offset %d: %s\n", ordin(count), errptr, error);
1811     }
1812     else
1813     {
1814     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1815     "at offset %d: %s\n", count, filename, errptr, error);
1816     }
1817    
1818     return FALSE;
1819     }
1820    
1821    
1822    
1823     /*************************************************
1824     * Compile one supplied pattern *
1825     *************************************************/
1826    
1827     /* When the -F option has been used, each string may be a list of strings,
1828 nigel 91 separated by line breaks. They will be matched literally.
1829 nigel 87
1830     Arguments:
1831     pattern the pattern string
1832     options the PCRE options
1833     filename the file name, or NULL for a command-line pattern
1834     count 0 if this is the only command line pattern, or
1835     number of the command line pattern, or
1836     linenumber for a pattern from a file
1837    
1838     Returns: TRUE on success, FALSE after an error
1839     */
1840    
1841     static BOOL
1842     compile_pattern(char *pattern, int options, char *filename, int count)
1843     {
1844     if ((process_options & PO_FIXED_STRINGS) != 0)
1845     {
1846 nigel 93 char *eop = pattern + strlen(pattern);
1847 nigel 87 char buffer[MBUFTHIRD];
1848     for(;;)
1849     {
1850 nigel 93 int ellength;
1851     char *p = end_of_line(pattern, eop, &ellength);
1852     if (ellength == 0)
1853 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1854 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1855 nigel 93 pattern = p;
1856 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1857     return FALSE;
1858     }
1859     }
1860     else return compile_single_pattern(pattern, options, filename, count);
1861     }
1862    
1863    
1864    
1865     /*************************************************
1866 nigel 49 * Main program *
1867     *************************************************/
1868    
1869 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1870    
1871 nigel 49 int
1872     main(int argc, char **argv)
1873     {
1874 nigel 53 int i, j;
1875 nigel 49 int rc = 1;
1876 nigel 87 int pcre_options = 0;
1877     int cmd_pattern_count = 0;
1878 ph10 141 int hint_count = 0;
1879 nigel 49 int errptr;
1880 nigel 87 BOOL only_one_at_top;
1881     char *patterns[MAX_PATTERN_COUNT];
1882     const char *locale_from = "--locale";
1883 nigel 49 const char *error;
1884    
1885 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1886     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1887 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
1888 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
1889 ph10 391 rather than escapes such as as '\r'. */
1890 nigel 91
1891     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1892     switch(i)
1893     {
1894 ph10 391 default: newline = (char *)"lf"; break;
1895     case 13: newline = (char *)"cr"; break;
1896     case (13 << 8) | 10: newline = (char *)"crlf"; break;
1897     case -1: newline = (char *)"any"; break;
1898     case -2: newline = (char *)"anycrlf"; break;
1899 nigel 91 }
1900    
1901 nigel 49 /* Process the options */
1902    
1903     for (i = 1; i < argc; i++)
1904     {
1905 nigel 77 option_item *op = NULL;
1906     char *option_data = (char *)""; /* default to keep compiler happy */
1907     BOOL longop;
1908     BOOL longopwasequals = FALSE;
1909    
1910 nigel 49 if (argv[i][0] != '-') break;
1911 nigel 53
1912 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1913 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1914 nigel 63
1915 nigel 77 if (argv[i][1] == 0)
1916     {
1917 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1918 nigel 77 else exit(usage(2));
1919     }
1920 nigel 63
1921 nigel 77 /* Handle a long name option, or -- to terminate the options */
1922 nigel 53
1923     if (argv[i][1] == '-')
1924 nigel 49 {
1925 nigel 77 char *arg = argv[i] + 2;
1926     char *argequals = strchr(arg, '=');
1927 nigel 53
1928 nigel 77 if (*arg == 0) /* -- terminates options */
1929 nigel 49 {
1930 nigel 77 i++;
1931     break; /* out of the options-handling loop */
1932 nigel 53 }
1933 nigel 49
1934 nigel 77 longop = TRUE;
1935    
1936     /* Some long options have data that follows after =, for example file=name.
1937     Some options have variations in the long name spelling: specifically, we
1938     allow "regexp" because GNU grep allows it, though I personally go along
1939 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1940 ph10 422 These options are entered in the table as "regex(p)". Options can be in
1941     both these categories. */
1942 nigel 77
1943 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1944     {
1945 nigel 77 char *opbra = strchr(op->long_name, '(');
1946     char *equals = strchr(op->long_name, '=');
1947 ph10 461
1948 ph10 422 /* Handle options with only one spelling of the name */
1949 ph10 461
1950 ph10 422 if (opbra == NULL) /* Does not contain '(' */
1951 nigel 53 {
1952 nigel 77 if (equals == NULL) /* Not thing=data case */
1953     {
1954     if (strcmp(arg, op->long_name) == 0) break;
1955     }
1956     else /* Special case xxx=data */
1957     {
1958     int oplen = equals - op->long_name;
1959 ph10 199 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1960 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1961     {
1962     option_data = arg + arglen;
1963     if (*option_data == '=')
1964     {
1965     option_data++;
1966     longopwasequals = TRUE;
1967     }
1968     break;
1969     }
1970     }
1971 nigel 53 }
1972 ph10 461
1973 ph10 422 /* Handle options with an alternate spelling of the name */
1974 ph10 461
1975     else
1976 nigel 77 {
1977     char buff1[24];
1978     char buff2[24];
1979 ph10 461
1980 nigel 77 int baselen = opbra - op->long_name;
1981 ph10 422 int fulllen = strchr(op->long_name, ')') - op->long_name + 1;
1982 ph10 461 int arglen = (argequals == NULL || equals == NULL)?
1983 ph10 422 (int)strlen(arg) : argequals - arg;
1984 ph10 461
1985 nigel 77 sprintf(buff1, "%.*s", baselen, op->long_name);
1986 ph10 422 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
1987 ph10 461
1988     if (strncmp(arg, buff1, arglen) == 0 ||
1989 ph10 422 strncmp(arg, buff2, arglen) == 0)
1990     {
1991     if (equals != NULL && argequals != NULL)
1992     {
1993 ph10 461 option_data = argequals;
1994 ph10 422 if (*option_data == '=')
1995     {
1996 ph10 461 option_data++;
1997 ph10 422 longopwasequals = TRUE;
1998 ph10 461 }
1999     }
2000 nigel 77 break;
2001 ph10 461 }
2002 nigel 77 }
2003 nigel 53 }
2004 nigel 77
2005 nigel 53 if (op->one_char == 0)
2006     {
2007     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2008     exit(usage(2));
2009     }
2010     }
2011 nigel 49
2012 nigel 89 /* Jeffrey Friedl's debugging harness uses these additional options which
2013     are not in the right form for putting in the option table because they use
2014     only one hyphen, yet are more than one character long. By putting them
2015     separately here, they will not get displayed as part of the help() output,
2016     but I don't think Jeffrey will care about that. */
2017    
2018     #ifdef JFRIEDL_DEBUG
2019     else if (strcmp(argv[i], "-pre") == 0) {
2020     jfriedl_prefix = argv[++i];
2021     continue;
2022     } else if (strcmp(argv[i], "-post") == 0) {
2023     jfriedl_postfix = argv[++i];
2024     continue;
2025     } else if (strcmp(argv[i], "-XT") == 0) {
2026     sscanf(argv[++i], "%d", &jfriedl_XT);
2027     continue;
2028     } else if (strcmp(argv[i], "-XR") == 0) {
2029     sscanf(argv[++i], "%d", &jfriedl_XR);
2030     continue;
2031     }
2032     #endif
2033    
2034    
2035 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2036     continue till we hit the last one or one that needs data. */
2037 nigel 53
2038     else
2039     {
2040     char *s = argv[i] + 1;
2041 nigel 77 longop = FALSE;
2042 nigel 53 while (*s != 0)
2043     {
2044 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2045     { if (*s == op->one_char) break; }
2046     if (op->one_char == 0)
2047 nigel 53 {
2048 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2049     *s, argv[i]);
2050     exit(usage(2));
2051     }
2052     if (op->type != OP_NODATA || s[1] == 0)
2053     {
2054     option_data = s+1;
2055 nigel 53 break;
2056     }
2057 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2058 nigel 49 }
2059     }
2060 nigel 77
2061 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2062     is NO_DATA, it means that there is no data, and the option might set
2063     something in the PCRE options. */
2064 nigel 77
2065     if (op->type == OP_NODATA)
2066     {
2067 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2068     continue;
2069     }
2070    
2071     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2072     either has a value or defaults to something. It cannot have data in a
2073     separate item. At the moment, the only such options are "colo(u)r" and
2074 nigel 89 Jeffrey Friedl's special -S debugging option. */
2075 nigel 87
2076     if (*option_data == 0 &&
2077     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2078     {
2079     switch (op->one_char)
2080 nigel 77 {
2081 nigel 87 case N_COLOUR:
2082     colour_option = (char *)"auto";
2083     break;
2084     #ifdef JFRIEDL_DEBUG
2085     case 'S':
2086     S_arg = 0;
2087     break;
2088     #endif
2089 nigel 77 }
2090 nigel 87 continue;
2091     }
2092 nigel 77
2093 nigel 87 /* Otherwise, find the data string for the option. */
2094    
2095     if (*option_data == 0)
2096     {
2097     if (i >= argc - 1 || longopwasequals)
2098 nigel 77 {
2099 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2100     exit(usage(2));
2101     }
2102     option_data = argv[++i];
2103     }
2104    
2105     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2106     multiple times to create a list of patterns. */
2107    
2108     if (op->type == OP_PATLIST)
2109     {
2110     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2111     {
2112     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2113     MAX_PATTERN_COUNT);
2114     return 2;
2115     }
2116     patterns[cmd_pattern_count++] = option_data;
2117     }
2118    
2119     /* Otherwise, deal with single string or numeric data values. */
2120    
2121     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2122     {
2123     *((char **)op->dataptr) = option_data;
2124     }
2125     else
2126     {
2127     char *endptr;
2128     int n = strtoul(option_data, &endptr, 10);
2129     if (*endptr != 0)
2130     {
2131     if (longop)
2132 nigel 77 {
2133 nigel 87 char *equals = strchr(op->long_name, '=');
2134     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2135     equals - op->long_name;
2136     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2137     option_data, nlen, op->long_name);
2138 nigel 77 }
2139 nigel 87 else
2140     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2141     option_data, op->one_char);
2142     exit(usage(2));
2143 nigel 77 }
2144 nigel 87 *((int *)op->dataptr) = n;
2145 nigel 77 }
2146 nigel 49 }
2147    
2148 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2149     for -A and -B. */
2150    
2151     if (both_context > 0)
2152     {
2153     if (after_context == 0) after_context = both_context;
2154     if (before_context == 0) before_context = both_context;
2155     }
2156 ph10 286
2157     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2158 ph10 280 However, the latter two set the only_matching flag. */
2159 nigel 77
2160 ph10 280 if ((only_matching && (file_offsets || line_offsets)) ||
2161 ph10 286 (file_offsets && line_offsets))
2162 ph10 280 {
2163     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2164     "and/or --line-offsets\n");
2165     exit(usage(2));
2166     }
2167    
2168 ph10 286 if (file_offsets || line_offsets) only_matching = TRUE;
2169    
2170 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2171     LC_ALL environment variable is set, and if so, use it. */
2172 nigel 49
2173 nigel 87 if (locale == NULL)
2174 nigel 53 {
2175 nigel 87 locale = getenv("LC_ALL");
2176     locale_from = "LCC_ALL";
2177 nigel 53 }
2178 nigel 49
2179 nigel 87 if (locale == NULL)
2180     {
2181     locale = getenv("LC_CTYPE");
2182     locale_from = "LC_CTYPE";
2183     }
2184 nigel 49
2185 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2186     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2187    
2188     if (locale != NULL)
2189 nigel 49 {
2190 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2191 nigel 53 {
2192 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2193     locale, locale_from);
2194 nigel 53 return 2;
2195     }
2196 nigel 87 pcretables = pcre_maketables();
2197     }
2198 nigel 77
2199 nigel 87 /* Sort out colouring */
2200    
2201     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2202     {
2203     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2204     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2205     else
2206 nigel 53 {
2207 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2208     colour_option);
2209     return 2;
2210 nigel 77 }
2211 nigel 87 if (do_colour)
2212 nigel 77 {
2213 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2214     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2215     if (cs != NULL) colour_string = cs;
2216 nigel 77 }
2217 nigel 87 }
2218 nigel 77
2219 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2220    
2221     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2222     {
2223     pcre_options |= PCRE_NEWLINE_CR;
2224 nigel 93 endlinetype = EL_CR;
2225 nigel 91 }
2226     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2227     {
2228     pcre_options |= PCRE_NEWLINE_LF;
2229 nigel 93 endlinetype = EL_LF;
2230 nigel 91 }
2231     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2232     {
2233     pcre_options |= PCRE_NEWLINE_CRLF;
2234 nigel 93 endlinetype = EL_CRLF;
2235 nigel 91 }
2236 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2237     {
2238     pcre_options |= PCRE_NEWLINE_ANY;
2239     endlinetype = EL_ANY;
2240     }
2241 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2242     {
2243     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2244     endlinetype = EL_ANYCRLF;
2245     }
2246 nigel 91 else
2247     {
2248     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2249     return 2;
2250     }
2251    
2252 nigel 87 /* Interpret the text values for -d and -D */
2253    
2254     if (dee_option != NULL)
2255     {
2256     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2257     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2258     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2259     else
2260 nigel 77 {
2261 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2262     return 2;
2263 nigel 53 }
2264 nigel 49 }
2265    
2266 nigel 87 if (DEE_option != NULL)
2267     {
2268     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2269     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2270     else
2271     {
2272     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2273     return 2;
2274     }
2275     }
2276 nigel 49
2277 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2278 nigel 87
2279     #ifdef JFRIEDL_DEBUG
2280     if (S_arg > 9)
2281 nigel 49 {
2282 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2283     return 2;
2284     }
2285 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2286     {
2287     if (jfriedl_XT == 0) jfriedl_XT = 1;
2288     if (jfriedl_XR == 0) jfriedl_XR = 1;
2289     }
2290 nigel 87 #endif
2291 nigel 77
2292 nigel 87 /* Get memory to store the pattern and hints lists. */
2293    
2294     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2295     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2296    
2297     if (pattern_list == NULL || hints_list == NULL)
2298     {
2299     fprintf(stderr, "pcregrep: malloc failed\n");
2300 ph10 123 goto EXIT2;
2301 nigel 87 }
2302    
2303     /* If no patterns were provided by -e, and there is no file provided by -f,
2304     the first argument is the one and only pattern, and it must exist. */
2305    
2306     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2307     {
2308 nigel 63 if (i >= argc) return usage(2);
2309 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2310     }
2311 nigel 77
2312 nigel 87 /* Compile the patterns that were provided on the command line, either by
2313     multiple uses of -e or as a single unkeyed pattern. */
2314    
2315     for (j = 0; j < cmd_pattern_count; j++)
2316     {
2317     if (!compile_pattern(patterns[j], pcre_options, NULL,
2318     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2319 ph10 123 goto EXIT2;
2320 nigel 87 }
2321    
2322     /* Compile the regular expressions that are provided in a file. */
2323    
2324     if (pattern_filename != NULL)
2325     {
2326     int linenumber = 0;
2327     FILE *f;
2328     char *filename;
2329     char buffer[MBUFTHIRD];
2330    
2331     if (strcmp(pattern_filename, "-") == 0)
2332 nigel 77 {
2333 nigel 87 f = stdin;
2334     filename = stdin_name;
2335 nigel 77 }
2336 nigel 87 else
2337 nigel 77 {
2338 nigel 87 f = fopen(pattern_filename, "r");
2339     if (f == NULL)
2340     {
2341     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2342     strerror(errno));
2343 ph10 123 goto EXIT2;
2344 nigel 87 }
2345     filename = pattern_filename;
2346 nigel 77 }
2347    
2348 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2349 nigel 53 {
2350 nigel 87 char *s = buffer + (int)strlen(buffer);
2351     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2352     *s = 0;
2353     linenumber++;
2354     if (buffer[0] == 0) continue; /* Skip blank lines */
2355     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2356 ph10 121 goto EXIT2;
2357 nigel 53 }
2358 nigel 87
2359     if (f != stdin) fclose(f);
2360 nigel 49 }
2361    
2362 nigel 77 /* Study the regular expressions, as we will be running them many times */
2363 nigel 53
2364     for (j = 0; j < pattern_count; j++)
2365     {
2366     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2367     if (error != NULL)
2368     {
2369     char s[16];
2370     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2371     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2372 ph10 121 goto EXIT2;
2373 nigel 53 }
2374 ph10 142 hint_count++;
2375 nigel 53 }
2376    
2377 nigel 77 /* If there are include or exclude patterns, compile them. */
2378    
2379     if (exclude_pattern != NULL)
2380     {
2381 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2382     pcretables);
2383 nigel 77 if (exclude_compiled == NULL)
2384     {
2385     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2386     errptr, error);
2387 ph10 121 goto EXIT2;
2388 nigel 77 }
2389     }
2390    
2391     if (include_pattern != NULL)
2392     {
2393 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2394     pcretables);
2395 nigel 77 if (include_compiled == NULL)
2396     {
2397     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2398     errptr, error);
2399 ph10 121 goto EXIT2;
2400 nigel 77 }
2401     }
2402    
2403 ph10 325 if (exclude_dir_pattern != NULL)
2404     {
2405     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2406     pcretables);
2407     if (exclude_dir_compiled == NULL)
2408     {
2409     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2410     errptr, error);
2411     goto EXIT2;
2412     }
2413     }
2414    
2415     if (include_dir_pattern != NULL)
2416     {
2417     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2418     pcretables);
2419     if (include_dir_compiled == NULL)
2420     {
2421     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2422     errptr, error);
2423     goto EXIT2;
2424     }
2425     }
2426    
2427 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2428 nigel 49
2429 nigel 87 if (i >= argc)
2430 ph10 121 {
2431 ph10 286 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2432 ph10 121 goto EXIT;
2433 ph10 123 }
2434 nigel 49
2435 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2436     Pass in the fact that there is only one argument at top level - this suppresses
2437 nigel 87 the file name if the argument is not a directory and filenames are not
2438     otherwise forced. */
2439 nigel 49
2440 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2441 nigel 49
2442     for (; i < argc; i++)
2443     {
2444 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2445     only_one_at_top);
2446 nigel 77 if (frc > 1) rc = frc;
2447     else if (frc == 0 && rc == 1) rc = 0;
2448 nigel 49 }
2449    
2450 ph10 121 EXIT:
2451     if (pattern_list != NULL)
2452     {
2453 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2454 ph10 121 free(pattern_list);
2455 ph10 123 }
2456 ph10 121 if (hints_list != NULL)
2457     {
2458 ph10 141 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2459 ph10 121 free(hints_list);
2460 ph10 123 }
2461 nigel 49 return rc;
2462 ph10 121
2463     EXIT2:
2464     rc = 2;
2465     goto EXIT;
2466 nigel 49 }
2467    
2468 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12