/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 421 - (hide annotations) (download)
Fri Aug 14 15:43:27 2009 UTC (5 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 69238 byte(s)
Fix --fixed-strings.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 377 Copyright (c) 1997-2009 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 ph10 378 #define OFFSET_SIZE 99
75 nigel 49
76 nigel 77 #if BUFSIZ > 8192
77     #define MBUFTHIRD BUFSIZ
78     #else
79     #define MBUFTHIRD 8192
80     #endif
81 nigel 49
82 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
83     output. The order is important; it is assumed that a file name is wanted for
84     all values greater than FN_DEFAULT. */
85 nigel 77
86 ph10 420 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87 nigel 87
88 ph10 286 /* File reading styles */
89    
90     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92 nigel 87 /* Actions for the -d and -D options */
93    
94     enum { dee_READ, dee_SKIP, dee_RECURSE };
95     enum { DEE_READ, DEE_SKIP };
96    
97     /* Actions for special processing options (flag bits) */
98    
99     #define PO_WORD_MATCH 0x0001
100     #define PO_LINE_MATCH 0x0002
101     #define PO_FIXED_STRINGS 0x0004
102    
103 nigel 93 /* Line ending types */
104 nigel 87
105 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 nigel 87
107 nigel 93
108    
109 nigel 49 /*************************************************
110     * Global variables *
111     *************************************************/
112    
113 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
114     regular code. */
115    
116     #ifdef JFRIEDL_DEBUG
117     static int S_arg = -1;
118 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
119     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
120     static const char *jfriedl_prefix = "";
121     static const char *jfriedl_postfix = "";
122 nigel 87 #endif
123    
124 nigel 93 static int endlinetype;
125 nigel 91
126 nigel 87 static char *colour_string = (char *)"1;31";
127     static char *colour_option = NULL;
128     static char *dee_option = NULL;
129     static char *DEE_option = NULL;
130 nigel 91 static char *newline = NULL;
131 nigel 53 static char *pattern_filename = NULL;
132 nigel 77 static char *stdin_name = (char *)"(standard input)";
133 nigel 87 static char *locale = NULL;
134    
135     static const unsigned char *pcretables = NULL;
136    
137 nigel 53 static int pattern_count = 0;
138 ph10 121 static pcre **pattern_list = NULL;
139     static pcre_extra **hints_list = NULL;
140 nigel 49
141 nigel 77 static char *include_pattern = NULL;
142     static char *exclude_pattern = NULL;
143 ph10 325 static char *include_dir_pattern = NULL;
144     static char *exclude_dir_pattern = NULL;
145 nigel 77
146     static pcre *include_compiled = NULL;
147     static pcre *exclude_compiled = NULL;
148 ph10 325 static pcre *include_dir_compiled = NULL;
149     static pcre *exclude_dir_compiled = NULL;
150 nigel 77
151     static int after_context = 0;
152     static int before_context = 0;
153     static int both_context = 0;
154 nigel 87 static int dee_action = dee_READ;
155     static int DEE_action = DEE_READ;
156     static int error_count = 0;
157     static int filenames = FN_DEFAULT;
158     static int process_options = 0;
159 nigel 77
160 nigel 49 static BOOL count_only = FALSE;
161 nigel 87 static BOOL do_colour = FALSE;
162 ph10 280 static BOOL file_offsets = FALSE;
163 nigel 77 static BOOL hyphenpending = FALSE;
164 nigel 49 static BOOL invert = FALSE;
165 ph10 280 static BOOL line_offsets = FALSE;
166 nigel 77 static BOOL multiline = FALSE;
167 nigel 49 static BOOL number = FALSE;
168 ph10 420 static BOOL omit_zero_count = FALSE;
169 nigel 87 static BOOL only_matching = FALSE;
170 nigel 77 static BOOL quiet = FALSE;
171 nigel 49 static BOOL silent = FALSE;
172 nigel 93 static BOOL utf8 = FALSE;
173 nigel 49
174 nigel 53 /* Structure for options and list of them */
175 nigel 49
176 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
177     OP_PATLIST };
178 nigel 77
179 nigel 53 typedef struct option_item {
180 nigel 77 int type;
181 nigel 53 int one_char;
182 nigel 77 void *dataptr;
183 nigel 67 const char *long_name;
184     const char *help_text;
185 nigel 53 } option_item;
186 nigel 49
187 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
188     used to identify them. */
189    
190 ph10 325 #define N_COLOUR (-1)
191     #define N_EXCLUDE (-2)
192     #define N_EXCLUDE_DIR (-3)
193     #define N_HELP (-4)
194     #define N_INCLUDE (-5)
195     #define N_INCLUDE_DIR (-6)
196     #define N_LABEL (-7)
197     #define N_LOCALE (-8)
198     #define N_NULL (-9)
199     #define N_LOFFSETS (-10)
200     #define N_FOFFSETS (-11)
201 nigel 87
202 nigel 53 static option_item optionlist[] = {
203 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
204     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
205     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
206     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
207     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
208     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
209     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
210     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
211     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
212     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
213     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
214 ph10 421 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
215 nigel 87 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
216 ph10 280 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
217 nigel 87 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
218     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
219     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
220     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
221     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
222     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
223 ph10 280 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
224 nigel 87 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
225     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
226 ph10 280 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
227 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
228     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
229     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
230     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
231     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
232     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
233 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
234     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
235 nigel 87 #ifdef JFRIEDL_DEBUG
236     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
237     #endif
238     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
239     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
240     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
241     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
242     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
243     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
244     { OP_NODATA, 0, NULL, NULL, NULL }
245 nigel 53 };
246    
247 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
248     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
249     that the combination of -w and -x has the same effect as -x on its own, so we
250     can treat them as the same. */
251 nigel 53
252 nigel 87 static const char *prefix[] = {
253     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
254    
255     static const char *suffix[] = {
256     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
257    
258 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
259 nigel 87
260 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
261 nigel 87
262 nigel 93 const char utf8_table4[] = {
263     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
264     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
265     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
266     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
267    
268    
269    
270 nigel 53 /*************************************************
271 nigel 87 * OS-specific functions *
272 nigel 53 *************************************************/
273    
274     /* These functions are defined so that they can be made system specific,
275 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
276 nigel 53
277    
278     /************* Directory scanning in Unix ***********/
279    
280 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
281 nigel 53 #include <sys/types.h>
282     #include <sys/stat.h>
283     #include <dirent.h>
284    
285     typedef DIR directory_type;
286    
287 nigel 67 static int
288 nigel 53 isdirectory(char *filename)
289     {
290     struct stat statbuf;
291     if (stat(filename, &statbuf) < 0)
292     return 0; /* In the expectation that opening as a file will fail */
293     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
294     }
295    
296 nigel 67 static directory_type *
297 nigel 53 opendirectory(char *filename)
298     {
299     return opendir(filename);
300     }
301    
302 nigel 67 static char *
303 nigel 53 readdirectory(directory_type *dir)
304     {
305     for (;;)
306     {
307     struct dirent *dent = readdir(dir);
308     if (dent == NULL) return NULL;
309     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
310     return dent->d_name;
311     }
312 ph10 151 /* Control never reaches here */
313 nigel 53 }
314    
315 nigel 67 static void
316 nigel 53 closedirectory(directory_type *dir)
317     {
318     closedir(dir);
319     }
320    
321    
322 nigel 87 /************* Test for regular file in Unix **********/
323    
324     static int
325     isregfile(char *filename)
326     {
327     struct stat statbuf;
328     if (stat(filename, &statbuf) < 0)
329     return 1; /* In the expectation that opening as a file will fail */
330     return (statbuf.st_mode & S_IFMT) == S_IFREG;
331     }
332    
333    
334     /************* Test stdout for being a terminal in Unix **********/
335    
336     static BOOL
337     is_stdout_tty(void)
338     {
339     return isatty(fileno(stdout));
340     }
341    
342    
343 nigel 63 /************* Directory scanning in Win32 ***********/
344 nigel 53
345 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
346 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
347 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
348     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
349 ph10 283 */
350 nigel 53
351 ph10 97 #elif HAVE_WINDOWS_H
352 nigel 63
353     #ifndef STRICT
354     # define STRICT
355     #endif
356     #ifndef WIN32_LEAN_AND_MEAN
357     # define WIN32_LEAN_AND_MEAN
358     #endif
359 ph10 283
360     #include <windows.h>
361    
362 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
363     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
364     #endif
365    
366 nigel 63 typedef struct directory_type
367     {
368     HANDLE handle;
369     BOOL first;
370     WIN32_FIND_DATA data;
371     } directory_type;
372    
373     int
374     isdirectory(char *filename)
375     {
376     DWORD attr = GetFileAttributes(filename);
377     if (attr == INVALID_FILE_ATTRIBUTES)
378     return 0;
379     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
380     }
381    
382     directory_type *
383     opendirectory(char *filename)
384     {
385     size_t len;
386     char *pattern;
387     directory_type *dir;
388     DWORD err;
389     len = strlen(filename);
390     pattern = (char *) malloc(len + 3);
391     dir = (directory_type *) malloc(sizeof(*dir));
392     if ((pattern == NULL) || (dir == NULL))
393     {
394     fprintf(stderr, "pcregrep: malloc failed\n");
395     exit(2);
396     }
397     memcpy(pattern, filename, len);
398     memcpy(&(pattern[len]), "\\*", 3);
399     dir->handle = FindFirstFile(pattern, &(dir->data));
400     if (dir->handle != INVALID_HANDLE_VALUE)
401     {
402     free(pattern);
403     dir->first = TRUE;
404     return dir;
405     }
406     err = GetLastError();
407     free(pattern);
408     free(dir);
409     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
410     return NULL;
411     }
412    
413     char *
414     readdirectory(directory_type *dir)
415     {
416     for (;;)
417     {
418     if (!dir->first)
419     {
420     if (!FindNextFile(dir->handle, &(dir->data)))
421     return NULL;
422     }
423     else
424     {
425     dir->first = FALSE;
426     }
427     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
428     return dir->data.cFileName;
429     }
430     #ifndef _MSC_VER
431     return NULL; /* Keep compiler happy; never executed */
432     #endif
433     }
434    
435     void
436     closedirectory(directory_type *dir)
437     {
438     FindClose(dir->handle);
439     free(dir);
440     }
441    
442    
443 nigel 87 /************* Test for regular file in Win32 **********/
444    
445     /* I don't know how to do this, or if it can be done; assume all paths are
446     regular if they are not directories. */
447    
448     int isregfile(char *filename)
449     {
450 ph10 283 return !isdirectory(filename);
451 nigel 87 }
452    
453    
454     /************* Test stdout for being a terminal in Win32 **********/
455    
456     /* I don't know how to do this; assume never */
457    
458     static BOOL
459     is_stdout_tty(void)
460     {
461 ph10 283 return FALSE;
462 nigel 87 }
463    
464    
465 nigel 53 /************* Directory scanning when we can't do it ***********/
466    
467     /* The type is void, and apart from isdirectory(), the functions do nothing. */
468    
469 nigel 63 #else
470    
471 nigel 53 typedef void directory_type;
472    
473 nigel 87 int isdirectory(char *filename) { return 0; }
474 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
475     char *readdirectory(directory_type *dir) { return (char*)0;}
476 nigel 53 void closedirectory(directory_type *dir) {}
477    
478 nigel 87
479     /************* Test for regular when we can't do it **********/
480    
481     /* Assume all files are regular. */
482    
483     int isregfile(char *filename) { return 1; }
484    
485    
486     /************* Test stdout for being a terminal when we can't do it **********/
487    
488     static BOOL
489     is_stdout_tty(void)
490     {
491     return FALSE;
492     }
493    
494    
495 nigel 53 #endif
496    
497    
498    
499 ph10 137 #ifndef HAVE_STRERROR
500 nigel 49 /*************************************************
501     * Provide strerror() for non-ANSI libraries *
502     *************************************************/
503    
504     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
505     in their libraries, but can provide the same facility by this simple
506     alternative function. */
507    
508     extern int sys_nerr;
509     extern char *sys_errlist[];
510    
511     char *
512     strerror(int n)
513     {
514     if (n < 0 || n >= sys_nerr) return "unknown error number";
515     return sys_errlist[n];
516     }
517     #endif /* HAVE_STRERROR */
518    
519    
520    
521     /*************************************************
522 nigel 93 * Find end of line *
523     *************************************************/
524    
525     /* The length of the endline sequence that is found is set via lenptr. This may
526     be zero at the very end of the file if there is no line-ending sequence there.
527    
528     Arguments:
529     p current position in line
530     endptr end of available data
531     lenptr where to put the length of the eol sequence
532    
533     Returns: pointer to the last byte of the line
534     */
535    
536     static char *
537     end_of_line(char *p, char *endptr, int *lenptr)
538     {
539     switch(endlinetype)
540     {
541     default: /* Just in case */
542     case EL_LF:
543     while (p < endptr && *p != '\n') p++;
544     if (p < endptr)
545     {
546     *lenptr = 1;
547     return p + 1;
548     }
549     *lenptr = 0;
550     return endptr;
551    
552     case EL_CR:
553     while (p < endptr && *p != '\r') p++;
554     if (p < endptr)
555     {
556     *lenptr = 1;
557     return p + 1;
558     }
559     *lenptr = 0;
560     return endptr;
561    
562     case EL_CRLF:
563     for (;;)
564     {
565     while (p < endptr && *p != '\r') p++;
566     if (++p >= endptr)
567     {
568     *lenptr = 0;
569     return endptr;
570     }
571     if (*p == '\n')
572     {
573     *lenptr = 2;
574     return p + 1;
575     }
576     }
577     break;
578    
579 ph10 149 case EL_ANYCRLF:
580     while (p < endptr)
581     {
582     int extra = 0;
583     register int c = *((unsigned char *)p);
584    
585     if (utf8 && c >= 0xc0)
586     {
587     int gcii, gcss;
588     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
589     gcss = 6*extra;
590     c = (c & utf8_table3[extra]) << gcss;
591     for (gcii = 1; gcii <= extra; gcii++)
592     {
593     gcss -= 6;
594     c |= (p[gcii] & 0x3f) << gcss;
595     }
596     }
597    
598     p += 1 + extra;
599    
600     switch (c)
601     {
602     case 0x0a: /* LF */
603     *lenptr = 1;
604     return p;
605    
606     case 0x0d: /* CR */
607     if (p < endptr && *p == 0x0a)
608     {
609     *lenptr = 2;
610     p++;
611     }
612     else *lenptr = 1;
613     return p;
614 ph10 150
615 ph10 149 default:
616     break;
617     }
618     } /* End of loop for ANYCRLF case */
619 ph10 150
620 ph10 149 *lenptr = 0; /* Must have hit the end */
621     return endptr;
622    
623 nigel 93 case EL_ANY:
624     while (p < endptr)
625     {
626     int extra = 0;
627     register int c = *((unsigned char *)p);
628    
629     if (utf8 && c >= 0xc0)
630     {
631     int gcii, gcss;
632     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
633     gcss = 6*extra;
634     c = (c & utf8_table3[extra]) << gcss;
635     for (gcii = 1; gcii <= extra; gcii++)
636     {
637     gcss -= 6;
638     c |= (p[gcii] & 0x3f) << gcss;
639     }
640     }
641    
642     p += 1 + extra;
643    
644     switch (c)
645     {
646     case 0x0a: /* LF */
647     case 0x0b: /* VT */
648     case 0x0c: /* FF */
649     *lenptr = 1;
650     return p;
651    
652     case 0x0d: /* CR */
653     if (p < endptr && *p == 0x0a)
654     {
655     *lenptr = 2;
656     p++;
657     }
658     else *lenptr = 1;
659     return p;
660    
661     case 0x85: /* NEL */
662     *lenptr = utf8? 2 : 1;
663     return p;
664    
665     case 0x2028: /* LS */
666     case 0x2029: /* PS */
667     *lenptr = 3;
668     return p;
669    
670     default:
671     break;
672     }
673     } /* End of loop for ANY case */
674    
675     *lenptr = 0; /* Must have hit the end */
676     return endptr;
677     } /* End of overall switch */
678     }
679    
680    
681    
682     /*************************************************
683     * Find start of previous line *
684     *************************************************/
685    
686     /* This is called when looking back for before lines to print.
687    
688     Arguments:
689     p start of the subsequent line
690     startptr start of available data
691    
692     Returns: pointer to the start of the previous line
693     */
694    
695     static char *
696     previous_line(char *p, char *startptr)
697     {
698     switch(endlinetype)
699     {
700     default: /* Just in case */
701     case EL_LF:
702     p--;
703     while (p > startptr && p[-1] != '\n') p--;
704     return p;
705    
706     case EL_CR:
707     p--;
708     while (p > startptr && p[-1] != '\n') p--;
709     return p;
710    
711     case EL_CRLF:
712     for (;;)
713     {
714     p -= 2;
715     while (p > startptr && p[-1] != '\n') p--;
716     if (p <= startptr + 1 || p[-2] == '\r') return p;
717     }
718     return p; /* But control should never get here */
719    
720     case EL_ANY:
721 ph10 150 case EL_ANYCRLF:
722 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
723     if (utf8) while ((*p & 0xc0) == 0x80) p--;
724    
725     while (p > startptr)
726     {
727     register int c;
728     char *pp = p - 1;
729    
730     if (utf8)
731     {
732     int extra = 0;
733     while ((*pp & 0xc0) == 0x80) pp--;
734     c = *((unsigned char *)pp);
735     if (c >= 0xc0)
736     {
737     int gcii, gcss;
738     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
739     gcss = 6*extra;
740     c = (c & utf8_table3[extra]) << gcss;
741     for (gcii = 1; gcii <= extra; gcii++)
742     {
743     gcss -= 6;
744     c |= (pp[gcii] & 0x3f) << gcss;
745     }
746     }
747     }
748     else c = *((unsigned char *)pp);
749    
750 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
751 nigel 93 {
752     case 0x0a: /* LF */
753 ph10 149 case 0x0d: /* CR */
754     return p;
755 ph10 150
756 ph10 149 default:
757     break;
758 ph10 150 }
759 ph10 149
760     else switch (c)
761     {
762     case 0x0a: /* LF */
763 nigel 93 case 0x0b: /* VT */
764     case 0x0c: /* FF */
765     case 0x0d: /* CR */
766     case 0x85: /* NEL */
767     case 0x2028: /* LS */
768     case 0x2029: /* PS */
769     return p;
770    
771     default:
772     break;
773     }
774    
775     p = pp; /* Back one character */
776     } /* End of loop for ANY case */
777    
778     return startptr; /* Hit start of data */
779     } /* End of overall switch */
780     }
781    
782    
783    
784    
785    
786     /*************************************************
787 nigel 77 * Print the previous "after" lines *
788 nigel 49 *************************************************/
789    
790 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
791 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
792     that a binary zero does not terminate it.
793 nigel 77
794     Arguments:
795     lastmatchnumber the number of the last matching line, plus one
796     lastmatchrestart where we restarted after the last match
797     endptr end of available data
798     printname filename for printing
799    
800     Returns: nothing
801     */
802    
803     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
804     char *endptr, char *printname)
805     {
806     if (after_context > 0 && lastmatchnumber > 0)
807     {
808     int count = 0;
809     while (lastmatchrestart < endptr && count++ < after_context)
810     {
811 nigel 93 int ellength;
812 nigel 77 char *pp = lastmatchrestart;
813     if (printname != NULL) fprintf(stdout, "%s-", printname);
814     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
815 nigel 93 pp = end_of_line(pp, endptr, &ellength);
816     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
817     lastmatchrestart = pp;
818 nigel 77 }
819     hyphenpending = TRUE;
820     }
821     }
822    
823    
824    
825     /*************************************************
826 ph10 378 * Apply patterns to subject till one matches *
827     *************************************************/
828    
829 ph10 392 /* This function is called to run through all patterns, looking for a match. It
830     is used multiple times for the same subject when colouring is enabled, in order
831 ph10 378 to find all possible matches.
832    
833     Arguments:
834     matchptr the start of the subject
835     length the length of the subject to match
836     offsets the offets vector to fill in
837     mrc address of where to put the result of pcre_exec()
838 ph10 392
839     Returns: TRUE if there was a match
840 ph10 378 FALSE if there was no match
841     invert if there was a non-fatal error
842 ph10 392 */
843 ph10 378
844     static BOOL
845     match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
846     {
847     int i;
848     for (i = 0; i < pattern_count; i++)
849     {
850 ph10 379 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
851     PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
852 ph10 378 if (*mrc >= 0) return TRUE;
853     if (*mrc == PCRE_ERROR_NOMATCH) continue;
854     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
855     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
856     fprintf(stderr, "this text:\n");
857     fwrite(matchptr, 1, length, stderr); /* In case binary zero included */
858     fprintf(stderr, "\n");
859     if (error_count == 0 &&
860     (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
861     {
862     fprintf(stderr, "pcregrep: error %d means that a resource limit "
863     "was exceeded\n", *mrc);
864     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
865     }
866     if (error_count++ > 20)
867     {
868     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
869     exit(2);
870     }
871     return invert; /* No more matching; don't show the line again */
872     }
873    
874     return FALSE; /* No match, no errors */
875     }
876    
877    
878    
879     /*************************************************
880 nigel 77 * Grep an individual file *
881     *************************************************/
882    
883     /* This is called from grep_or_recurse() below. It uses a buffer that is three
884     times the value of MBUFTHIRD. The matching point is never allowed to stray into
885     the top third of the buffer, thus keeping more of the file available for
886     context printing or for multiline scanning. For large files, the pointer will
887     be in the middle third most of the time, so the bottom third is available for
888     "before" context printing.
889    
890     Arguments:
891 ph10 286 handle the fopened FILE stream for a normal file
892     the gzFile pointer when reading is via libz
893     the BZFILE pointer when reading is via libbz2
894     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
895 nigel 77 printname the file name if it is to be printed for each match
896     or NULL if the file name is not to be printed
897     it cannot be NULL if filenames[_nomatch]_only is set
898    
899     Returns: 0 if there was at least one match
900     1 otherwise (no matches)
901 ph10 286 2 if there is a read error on a .bz2 file
902 nigel 77 */
903    
904 nigel 49 static int
905 ph10 286 pcregrep(void *handle, int frtype, char *printname)
906 nigel 49 {
907     int rc = 1;
908 nigel 77 int linenumber = 1;
909     int lastmatchnumber = 0;
910 nigel 49 int count = 0;
911 ph10 280 int filepos = 0;
912 ph10 378 int offsets[OFFSET_SIZE];
913 nigel 77 char *lastmatchrestart = NULL;
914     char buffer[3*MBUFTHIRD];
915     char *ptr = buffer;
916     char *endptr;
917     size_t bufflength;
918     BOOL endhyphenpending = FALSE;
919 ph10 286 FILE *in = NULL; /* Ensure initialized */
920 nigel 49
921 ph10 286 #ifdef SUPPORT_LIBZ
922     gzFile ingz = NULL;
923     #endif
924 nigel 77
925 ph10 286 #ifdef SUPPORT_LIBBZ2
926     BZFILE *inbz2 = NULL;
927     #endif
928    
929    
930     /* Do the first read into the start of the buffer and set up the pointer to end
931     of what we have. In the case of libz, a non-zipped .gz file will be read as a
932     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
933     fail. */
934    
935     #ifdef SUPPORT_LIBZ
936     if (frtype == FR_LIBZ)
937     {
938     ingz = (gzFile)handle;
939     bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
940     }
941     else
942     #endif
943    
944     #ifdef SUPPORT_LIBBZ2
945     if (frtype == FR_LIBBZ2)
946     {
947     inbz2 = (BZFILE *)handle;
948     bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
949     if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
950     } /* without the cast it is unsigned. */
951     else
952     #endif
953    
954     {
955     in = (FILE *)handle;
956     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
957     }
958    
959 nigel 77 endptr = buffer + bufflength;
960    
961     /* Loop while the current pointer is not at the end of the file. For large
962     files, endptr will be at the end of the buffer when we are in the middle of the
963     file, but ptr will never get there, because as soon as it gets over 2/3 of the
964     way, the buffer is shifted left and re-filled. */
965    
966     while (ptr < endptr)
967 nigel 49 {
968 ph10 378 int endlinelength;
969 nigel 87 int mrc = 0;
970 ph10 378 BOOL match;
971 ph10 286 char *matchptr = ptr;
972 nigel 77 char *t = ptr;
973     size_t length, linelength;
974 nigel 49
975 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
976     of the subject string to pass to pcre_exec(). In multiline mode, it is the
977     length remainder of the data in the buffer. Otherwise, it is the length of
978 ph10 378 the next line, excluding the terminating newline. After matching, we always
979     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
980     option is used for compiling, so that any match is constrained to be in the
981     first line. */
982 nigel 77
983 nigel 93 t = end_of_line(t, endptr, &endlinelength);
984     linelength = t - ptr - endlinelength;
985 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
986 nigel 77
987 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
988    
989     #ifdef JFRIEDL_DEBUG
990     if (jfriedl_XT || jfriedl_XR)
991     {
992     #include <sys/time.h>
993     #include <time.h>
994     struct timeval start_time, end_time;
995     struct timezone dummy;
996 ph10 392 int i;
997 nigel 89
998     if (jfriedl_XT)
999     {
1000     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1001     const char *orig = ptr;
1002     ptr = malloc(newlen + 1);
1003     if (!ptr) {
1004     printf("out of memory");
1005     exit(2);
1006     }
1007     endptr = ptr;
1008     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1009     for (i = 0; i < jfriedl_XT; i++) {
1010     strncpy(endptr, orig, length);
1011     endptr += length;
1012     }
1013     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1014     length = newlen;
1015     }
1016    
1017     if (gettimeofday(&start_time, &dummy) != 0)
1018     perror("bad gettimeofday");
1019    
1020    
1021     for (i = 0; i < jfriedl_XR; i++)
1022 ph10 392 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1023 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1024 nigel 89
1025     if (gettimeofday(&end_time, &dummy) != 0)
1026     perror("bad gettimeofday");
1027    
1028     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1029     -
1030     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1031    
1032     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1033     return 0;
1034     }
1035     #endif
1036    
1037 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
1038 ph10 279 in order to find any further matches in the same line. */
1039 nigel 89
1040 ph10 286 ONLY_MATCHING_RESTART:
1041    
1042 ph10 392 /* Run through all the patterns until one matches or there is an error other
1043 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1044     finding subsequent matches when colouring matched lines. */
1045 ph10 392
1046 ph10 378 match = match_patterns(matchptr, length, offsets, &mrc);
1047 nigel 77
1048 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1049 nigel 77
1050 nigel 49 if (match != invert)
1051     {
1052 nigel 77 BOOL hyphenprinted = FALSE;
1053    
1054 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1055 nigel 77
1056 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1057    
1058     /* Just count if just counting is wanted. */
1059    
1060 nigel 49 if (count_only) count++;
1061    
1062 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1063     in the file. */
1064    
1065 ph10 420 else if (filenames == FN_MATCH_ONLY)
1066 nigel 49 {
1067 nigel 77 fprintf(stdout, "%s\n", printname);
1068 nigel 49 return 0;
1069     }
1070    
1071 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1072    
1073 nigel 77 else if (quiet) return 0;
1074 nigel 49
1075 nigel 87 /* The --only-matching option prints just the substring that matched, and
1076 ph10 286 the --file-offsets and --line-offsets options output offsets for the
1077 ph10 280 matching substring (they both force --only-matching). None of these options
1078     prints any context. Afterwards, adjust the start and length, and then jump
1079     back to look for further matches in the same line. If we are in invert
1080     mode, however, nothing is printed - this could be still useful because the
1081     return code is set. */
1082 nigel 87
1083     else if (only_matching)
1084     {
1085 ph10 279 if (!invert)
1086 ph10 286 {
1087 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1088     if (number) fprintf(stdout, "%d:", linenumber);
1089 ph10 280 if (line_offsets)
1090 ph10 357 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1091 ph10 286 offsets[1] - offsets[0]);
1092 ph10 280 else if (file_offsets)
1093 ph10 357 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1094 ph10 286 offsets[1] - offsets[0]);
1095     else
1096 ph10 377 {
1097     if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1098 ph10 280 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1099 ph10 377 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1100 ph10 392 }
1101 ph10 279 fprintf(stdout, "\n");
1102     matchptr += offsets[1];
1103     length -= offsets[1];
1104 ph10 286 match = FALSE;
1105     goto ONLY_MATCHING_RESTART;
1106     }
1107 nigel 87 }
1108    
1109     /* This is the default case when none of the above options is set. We print
1110     the matching lines(s), possibly preceded and/or followed by other lines of
1111     context. */
1112    
1113 nigel 49 else
1114     {
1115 nigel 77 /* See if there is a requirement to print some "after" lines from a
1116     previous match. We never print any overlaps. */
1117    
1118     if (after_context > 0 && lastmatchnumber > 0)
1119     {
1120 nigel 93 int ellength;
1121 nigel 77 int linecount = 0;
1122     char *p = lastmatchrestart;
1123    
1124     while (p < ptr && linecount < after_context)
1125     {
1126 nigel 93 p = end_of_line(p, ptr, &ellength);
1127 nigel 77 linecount++;
1128     }
1129    
1130     /* It is important to advance lastmatchrestart during this printing so
1131 nigel 87 that it interacts correctly with any "before" printing below. Print
1132     each line's data using fwrite() in case there are binary zeroes. */
1133 nigel 77
1134     while (lastmatchrestart < p)
1135     {
1136     char *pp = lastmatchrestart;
1137     if (printname != NULL) fprintf(stdout, "%s-", printname);
1138     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1139 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1140     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1141     lastmatchrestart = pp;
1142 nigel 77 }
1143     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1144     }
1145    
1146     /* If there were non-contiguous lines printed above, insert hyphens. */
1147    
1148     if (hyphenpending)
1149     {
1150     fprintf(stdout, "--\n");
1151     hyphenpending = FALSE;
1152     hyphenprinted = TRUE;
1153     }
1154    
1155     /* See if there is a requirement to print some "before" lines for this
1156     match. Again, don't print overlaps. */
1157    
1158     if (before_context > 0)
1159     {
1160     int linecount = 0;
1161     char *p = ptr;
1162    
1163     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1164 nigel 87 linecount < before_context)
1165 nigel 77 {
1166 nigel 87 linecount++;
1167 nigel 93 p = previous_line(p, buffer);
1168 nigel 77 }
1169    
1170     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1171     fprintf(stdout, "--\n");
1172    
1173     while (p < ptr)
1174     {
1175 nigel 93 int ellength;
1176 nigel 77 char *pp = p;
1177     if (printname != NULL) fprintf(stdout, "%s-", printname);
1178     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1179 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1180     fwrite(p, 1, pp - p, stdout);
1181     p = pp;
1182 nigel 77 }
1183     }
1184    
1185     /* Now print the matching line(s); ensure we set hyphenpending at the end
1186 nigel 85 of the file if any context lines are being output. */
1187 nigel 77
1188 nigel 85 if (after_context > 0 || before_context > 0)
1189     endhyphenpending = TRUE;
1190    
1191 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1192 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1193 nigel 77
1194     /* In multiline mode, we want to print to the end of the line in which
1195     the end of the matched string is found, so we adjust linelength and the
1196 ph10 222 line number appropriately, but only when there actually was a match
1197     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1198     the match will always be before the first newline sequence. */
1199 nigel 77
1200     if (multiline)
1201     {
1202 nigel 93 int ellength;
1203 ph10 222 char *endmatch = ptr;
1204     if (!invert)
1205 nigel 93 {
1206 ph10 222 endmatch += offsets[1];
1207     t = ptr;
1208     while (t < endmatch)
1209     {
1210     t = end_of_line(t, endptr, &ellength);
1211     if (t <= endmatch) linenumber++; else break;
1212     }
1213 nigel 93 }
1214     endmatch = end_of_line(endmatch, endptr, &ellength);
1215     linelength = endmatch - ptr - ellength;
1216 nigel 77 }
1217    
1218 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1219     zeroes are treated as just another data character. */
1220    
1221     /* This extra option, for Jeffrey Friedl's debugging requirements,
1222     replaces the matched string, or a specific captured string if it exists,
1223     with X. When this happens, colouring is ignored. */
1224    
1225     #ifdef JFRIEDL_DEBUG
1226     if (S_arg >= 0 && S_arg < mrc)
1227     {
1228     int first = S_arg * 2;
1229     int last = first + 1;
1230     fwrite(ptr, 1, offsets[first], stdout);
1231     fprintf(stdout, "X");
1232     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1233     }
1234     else
1235     #endif
1236    
1237 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1238 ph10 378 matches. */
1239 nigel 87
1240     if (do_colour)
1241     {
1242 ph10 392 int last_offset = 0;
1243 nigel 87 fwrite(ptr, 1, offsets[0], stdout);
1244     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1245     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1246     fprintf(stdout, "%c[00m", 0x1b);
1247 ph10 378 for (;;)
1248     {
1249 ph10 392 last_offset += offsets[1];
1250 ph10 378 matchptr += offsets[1];
1251     length -= offsets[1];
1252     if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1253     fwrite(matchptr, 1, offsets[0], stdout);
1254     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1255     fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1256     fprintf(stdout, "%c[00m", 0x1b);
1257     }
1258     fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1259 ph10 239 stdout);
1260 nigel 87 }
1261 ph10 392
1262 ph10 378 /* Not colouring; no need to search for further matches */
1263 ph10 392
1264 nigel 93 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1265 nigel 49 }
1266    
1267 nigel 87 /* End of doing what has to be done for a match */
1268    
1269 nigel 77 rc = 0; /* Had some success */
1270    
1271     /* Remember where the last match happened for after_context. We remember
1272     where we are about to restart, and that line's number. */
1273    
1274 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1275 nigel 77 lastmatchnumber = linenumber + 1;
1276 nigel 49 }
1277 nigel 77
1278 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1279     anything to be printed), we have to move on to the end of the match before
1280     proceeding. */
1281    
1282     if (multiline && invert && match)
1283     {
1284     int ellength;
1285     char *endmatch = ptr + offsets[1];
1286     t = ptr;
1287     while (t < endmatch)
1288     {
1289     t = end_of_line(t, endptr, &ellength);
1290     if (t <= endmatch) linenumber++; else break;
1291     }
1292     endmatch = end_of_line(endmatch, endptr, &ellength);
1293     linelength = endmatch - ptr - ellength;
1294     }
1295    
1296 ph10 286 /* Advance to after the newline and increment the line number. The file
1297 ph10 280 offset to the current line is maintained in filepos. */
1298 nigel 77
1299 nigel 93 ptr += linelength + endlinelength;
1300 ph10 280 filepos += linelength + endlinelength;
1301 nigel 77 linenumber++;
1302    
1303     /* If we haven't yet reached the end of the file (the buffer is full), and
1304     the current point is in the top 1/3 of the buffer, slide the buffer down by
1305     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1306     about to be lost, print them. */
1307    
1308     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1309     {
1310     if (after_context > 0 &&
1311     lastmatchnumber > 0 &&
1312     lastmatchrestart < buffer + MBUFTHIRD)
1313     {
1314     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1315     lastmatchnumber = 0;
1316     }
1317    
1318     /* Now do the shuffle */
1319    
1320     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1321     ptr -= MBUFTHIRD;
1322 ph10 286
1323     #ifdef SUPPORT_LIBZ
1324     if (frtype == FR_LIBZ)
1325     bufflength = 2*MBUFTHIRD +
1326     gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1327     else
1328     #endif
1329    
1330     #ifdef SUPPORT_LIBBZ2
1331     if (frtype == FR_LIBBZ2)
1332     bufflength = 2*MBUFTHIRD +
1333     BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1334     else
1335     #endif
1336    
1337 nigel 77 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1338 ph10 286
1339 nigel 77 endptr = buffer + bufflength;
1340    
1341     /* Adjust any last match point */
1342    
1343     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1344     }
1345     } /* Loop through the whole file */
1346    
1347     /* End of file; print final "after" lines if wanted; do_after_lines sets
1348     hyphenpending if it prints something. */
1349    
1350 nigel 87 if (!only_matching && !count_only)
1351     {
1352     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1353     hyphenpending |= endhyphenpending;
1354     }
1355 nigel 77
1356     /* Print the file name if we are looking for those without matches and there
1357     were none. If we found a match, we won't have got this far. */
1358    
1359 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1360 nigel 77 {
1361     fprintf(stdout, "%s\n", printname);
1362     return 0;
1363 nigel 49 }
1364    
1365 nigel 77 /* Print the match count if wanted */
1366    
1367 nigel 49 if (count_only)
1368     {
1369 ph10 420 if (count > 0 || !omit_zero_count)
1370     {
1371     if (printname != NULL && filenames != FN_NONE)
1372     fprintf(stdout, "%s:", printname);
1373     fprintf(stdout, "%d\n", count);
1374     }
1375 nigel 49 }
1376    
1377     return rc;
1378     }
1379    
1380    
1381    
1382     /*************************************************
1383 nigel 53 * Grep a file or recurse into a directory *
1384     *************************************************/
1385    
1386 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1387     recursing; if it's a file, grep it.
1388    
1389     Arguments:
1390     pathname the path to investigate
1391 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1392 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1393    
1394     Returns: 0 if there was at least one match
1395     1 if there were no matches
1396     2 there was some kind of error
1397    
1398     However, file opening failures are suppressed if "silent" is set.
1399     */
1400    
1401 nigel 53 static int
1402 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1403 nigel 53 {
1404     int rc = 1;
1405     int sep;
1406 ph10 286 int frtype;
1407     int pathlen;
1408     void *handle;
1409     FILE *in = NULL; /* Ensure initialized */
1410 nigel 53
1411 ph10 286 #ifdef SUPPORT_LIBZ
1412     gzFile ingz = NULL;
1413     #endif
1414    
1415     #ifdef SUPPORT_LIBBZ2
1416     BZFILE *inbz2 = NULL;
1417     #endif
1418    
1419 nigel 77 /* If the file name is "-" we scan stdin */
1420 nigel 53
1421 nigel 77 if (strcmp(pathname, "-") == 0)
1422 nigel 53 {
1423 ph10 286 return pcregrep(stdin, FR_PLAIN,
1424 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1425 nigel 77 stdin_name : NULL);
1426     }
1427    
1428 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1429 ph10 325 each file and directory within it, subject to any include or exclude patterns
1430     that were set. The scanning code is localized so it can be made
1431     system-specific. */
1432 nigel 87
1433     if ((sep = isdirectory(pathname)) != 0)
1434 nigel 77 {
1435 nigel 87 if (dee_action == dee_SKIP) return 1;
1436     if (dee_action == dee_RECURSE)
1437 nigel 53 {
1438 nigel 87 char buffer[1024];
1439     char *nextfile;
1440     directory_type *dir = opendirectory(pathname);
1441 nigel 53
1442 nigel 87 if (dir == NULL)
1443     {
1444     if (!silent)
1445     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1446     strerror(errno));
1447     return 2;
1448     }
1449 nigel 77
1450 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1451     {
1452 ph10 324 int frc, nflen;
1453 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1454 ph10 324 nflen = strlen(nextfile);
1455 ph10 345
1456 ph10 325 if (isdirectory(buffer))
1457     {
1458     if (exclude_dir_compiled != NULL &&
1459     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1460     continue;
1461 ph10 345
1462 ph10 325 if (include_dir_compiled != NULL &&
1463     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1464     continue;
1465     }
1466 ph10 345 else
1467     {
1468 ph10 324 if (exclude_compiled != NULL &&
1469     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1470     continue;
1471 ph10 345
1472 ph10 324 if (include_compiled != NULL &&
1473     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1474     continue;
1475 ph10 345 }
1476 nigel 77
1477 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1478     if (frc > 1) rc = frc;
1479     else if (frc == 0 && rc == 1) rc = 0;
1480     }
1481    
1482     closedirectory(dir);
1483     return rc;
1484 nigel 53 }
1485     }
1486    
1487 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1488     been requested. */
1489 nigel 53
1490 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1491    
1492     /* Control reaches here if we have a regular file, or if we have a directory
1493     and recursion or skipping was not requested, or if we have anything else and
1494     skipping was not requested. The scan proceeds. If this is the first and only
1495     argument at top level, we don't show the file name, unless we are only showing
1496     the file name, or the filename was forced (-H). */
1497    
1498 ph10 286 pathlen = strlen(pathname);
1499    
1500     /* Open using zlib if it is supported and the file name ends with .gz. */
1501    
1502     #ifdef SUPPORT_LIBZ
1503     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1504 nigel 53 {
1505 ph10 286 ingz = gzopen(pathname, "rb");
1506     if (ingz == NULL)
1507     {
1508     if (!silent)
1509     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1510     strerror(errno));
1511     return 2;
1512     }
1513     handle = (void *)ingz;
1514     frtype = FR_LIBZ;
1515     }
1516     else
1517     #endif
1518    
1519     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1520    
1521     #ifdef SUPPORT_LIBBZ2
1522     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1523     {
1524     inbz2 = BZ2_bzopen(pathname, "rb");
1525     handle = (void *)inbz2;
1526     frtype = FR_LIBBZ2;
1527     }
1528     else
1529     #endif
1530    
1531     /* Otherwise use plain fopen(). The label is so that we can come back here if
1532     an attempt to read a .bz2 file indicates that it really is a plain file. */
1533    
1534     #ifdef SUPPORT_LIBBZ2
1535     PLAIN_FILE:
1536     #endif
1537     {
1538 ph10 419 in = fopen(pathname, "rb");
1539 ph10 286 handle = (void *)in;
1540     frtype = FR_PLAIN;
1541     }
1542    
1543     /* All the opening methods return errno when they fail. */
1544    
1545     if (handle == NULL)
1546     {
1547 nigel 77 if (!silent)
1548     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1549     strerror(errno));
1550 nigel 53 return 2;
1551     }
1552    
1553 ph10 286 /* Now grep the file */
1554    
1555     rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1556 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1557 nigel 77
1558 ph10 286 /* Close in an appropriate manner. */
1559    
1560     #ifdef SUPPORT_LIBZ
1561     if (frtype == FR_LIBZ)
1562     gzclose(ingz);
1563     else
1564     #endif
1565    
1566     /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1567     read failed. If the error indicates that the file isn't in fact bzipped, try
1568     again as a normal file. */
1569    
1570     #ifdef SUPPORT_LIBBZ2
1571     if (frtype == FR_LIBBZ2)
1572     {
1573     if (rc == 2)
1574     {
1575     int errnum;
1576     const char *err = BZ2_bzerror(inbz2, &errnum);
1577     if (errnum == BZ_DATA_ERROR_MAGIC)
1578     {
1579     BZ2_bzclose(inbz2);
1580     goto PLAIN_FILE;
1581     }
1582     else if (!silent)
1583     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1584     pathname, err);
1585     }
1586     BZ2_bzclose(inbz2);
1587     }
1588     else
1589     #endif
1590    
1591     /* Normal file close */
1592    
1593 nigel 53 fclose(in);
1594 ph10 286
1595     /* Pass back the yield from pcregrep(). */
1596    
1597 nigel 53 return rc;
1598     }
1599    
1600    
1601    
1602    
1603     /*************************************************
1604 nigel 49 * Usage function *
1605     *************************************************/
1606    
1607     static int
1608     usage(int rc)
1609     {
1610 nigel 87 option_item *op;
1611     fprintf(stderr, "Usage: pcregrep [-");
1612     for (op = optionlist; op->one_char != 0; op++)
1613     {
1614     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1615     }
1616     fprintf(stderr, "] [long options] [pattern] [files]\n");
1617 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1618     "options.\n");
1619 nigel 49 return rc;
1620     }
1621    
1622    
1623    
1624    
1625     /*************************************************
1626 nigel 53 * Help function *
1627     *************************************************/
1628    
1629     static void
1630     help(void)
1631     {
1632     option_item *op;
1633    
1634 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1635 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1636 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1637 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1638    
1639     #ifdef SUPPORT_LIBZ
1640     printf("Files whose names end in .gz are read using zlib.\n");
1641     #endif
1642    
1643     #ifdef SUPPORT_LIBBZ2
1644     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1645     #endif
1646    
1647     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1648     printf("Other files and the standard input are read as plain files.\n\n");
1649     #else
1650     printf("All files are read as plain files, without any interpretation.\n\n");
1651     #endif
1652    
1653 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1654     printf("Options:\n");
1655    
1656     for (op = optionlist; op->one_char != 0; op++)
1657     {
1658     int n;
1659     char s[4];
1660     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1661 ph10 296 n = 30 - printf(" %s --%s", s, op->long_name);
1662 nigel 53 if (n < 1) n = 1;
1663     printf("%.*s%s\n", n, " ", op->help_text);
1664     }
1665    
1666 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1667     printf("trailing white space is removed and blank lines are ignored.\n");
1668     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1669 nigel 53
1670 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1671 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1672     }
1673    
1674    
1675    
1676    
1677     /*************************************************
1678 nigel 77 * Handle a single-letter, no data option *
1679 nigel 53 *************************************************/
1680    
1681     static int
1682     handle_option(int letter, int options)
1683     {
1684     switch(letter)
1685     {
1686 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1687 nigel 87 case N_HELP: help(); exit(0);
1688 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1689 nigel 53 case 'c': count_only = TRUE; break;
1690 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1691     case 'H': filenames = FN_FORCE; break;
1692     case 'h': filenames = FN_NONE; break;
1693 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1694 ph10 420 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1695 nigel 87 case 'L': filenames = FN_NOMATCH_ONLY; break;
1696 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1697 nigel 53 case 'n': number = TRUE; break;
1698 nigel 87 case 'o': only_matching = TRUE; break;
1699 nigel 77 case 'q': quiet = TRUE; break;
1700 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1701 nigel 53 case 's': silent = TRUE; break;
1702 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1703 nigel 53 case 'v': invert = TRUE; break;
1704 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1705     case 'x': process_options |= PO_LINE_MATCH; break;
1706 nigel 53
1707     case 'V':
1708 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1709 nigel 53 exit(0);
1710     break;
1711    
1712     default:
1713     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1714     exit(usage(2));
1715     }
1716    
1717     return options;
1718     }
1719    
1720    
1721    
1722    
1723     /*************************************************
1724 nigel 87 * Construct printed ordinal *
1725     *************************************************/
1726    
1727     /* This turns a number into "1st", "3rd", etc. */
1728    
1729     static char *
1730     ordin(int n)
1731     {
1732     static char buffer[8];
1733     char *p = buffer;
1734     sprintf(p, "%d", n);
1735     while (*p != 0) p++;
1736     switch (n%10)
1737     {
1738     case 1: strcpy(p, "st"); break;
1739     case 2: strcpy(p, "nd"); break;
1740     case 3: strcpy(p, "rd"); break;
1741     default: strcpy(p, "th"); break;
1742     }
1743     return buffer;
1744     }
1745    
1746    
1747    
1748     /*************************************************
1749     * Compile a single pattern *
1750     *************************************************/
1751    
1752     /* When the -F option has been used, this is called for each substring.
1753     Otherwise it's called for each supplied pattern.
1754    
1755     Arguments:
1756     pattern the pattern string
1757     options the PCRE options
1758     filename the file name, or NULL for a command-line pattern
1759     count 0 if this is the only command line pattern, or
1760     number of the command line pattern, or
1761     linenumber for a pattern from a file
1762    
1763     Returns: TRUE on success, FALSE after an error
1764     */
1765    
1766     static BOOL
1767     compile_single_pattern(char *pattern, int options, char *filename, int count)
1768     {
1769     char buffer[MBUFTHIRD + 16];
1770     const char *error;
1771     int errptr;
1772    
1773     if (pattern_count >= MAX_PATTERN_COUNT)
1774     {
1775     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1776     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1777     return FALSE;
1778     }
1779    
1780     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1781     suffix[process_options]);
1782     pattern_list[pattern_count] =
1783     pcre_compile(buffer, options, &error, &errptr, pcretables);
1784 ph10 142 if (pattern_list[pattern_count] != NULL)
1785 ph10 141 {
1786 ph10 142 pattern_count++;
1787 ph10 141 return TRUE;
1788 ph10 142 }
1789 nigel 87
1790     /* Handle compile errors */
1791    
1792     errptr -= (int)strlen(prefix[process_options]);
1793     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1794    
1795     if (filename == NULL)
1796     {
1797     if (count == 0)
1798     fprintf(stderr, "pcregrep: Error in command-line regex "
1799     "at offset %d: %s\n", errptr, error);
1800     else
1801     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1802     "at offset %d: %s\n", ordin(count), errptr, error);
1803     }
1804     else
1805     {
1806     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1807     "at offset %d: %s\n", count, filename, errptr, error);
1808     }
1809    
1810     return FALSE;
1811     }
1812    
1813    
1814    
1815     /*************************************************
1816     * Compile one supplied pattern *
1817     *************************************************/
1818    
1819     /* When the -F option has been used, each string may be a list of strings,
1820 nigel 91 separated by line breaks. They will be matched literally.
1821 nigel 87
1822     Arguments:
1823     pattern the pattern string
1824     options the PCRE options
1825     filename the file name, or NULL for a command-line pattern
1826     count 0 if this is the only command line pattern, or
1827     number of the command line pattern, or
1828     linenumber for a pattern from a file
1829    
1830     Returns: TRUE on success, FALSE after an error
1831     */
1832    
1833     static BOOL
1834     compile_pattern(char *pattern, int options, char *filename, int count)
1835     {
1836     if ((process_options & PO_FIXED_STRINGS) != 0)
1837     {
1838 nigel 93 char *eop = pattern + strlen(pattern);
1839 nigel 87 char buffer[MBUFTHIRD];
1840     for(;;)
1841     {
1842 nigel 93 int ellength;
1843     char *p = end_of_line(pattern, eop, &ellength);
1844     if (ellength == 0)
1845 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1846 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1847 nigel 93 pattern = p;
1848 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1849     return FALSE;
1850     }
1851     }
1852     else return compile_single_pattern(pattern, options, filename, count);
1853     }
1854    
1855    
1856    
1857     /*************************************************
1858 nigel 49 * Main program *
1859     *************************************************/
1860    
1861 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1862    
1863 nigel 49 int
1864     main(int argc, char **argv)
1865     {
1866 nigel 53 int i, j;
1867 nigel 49 int rc = 1;
1868 nigel 87 int pcre_options = 0;
1869     int cmd_pattern_count = 0;
1870 ph10 141 int hint_count = 0;
1871 nigel 49 int errptr;
1872 nigel 87 BOOL only_one_at_top;
1873     char *patterns[MAX_PATTERN_COUNT];
1874     const char *locale_from = "--locale";
1875 nigel 49 const char *error;
1876    
1877 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1878     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1879 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
1880 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
1881 ph10 391 rather than escapes such as as '\r'. */
1882 nigel 91
1883     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1884     switch(i)
1885     {
1886 ph10 391 default: newline = (char *)"lf"; break;
1887     case 13: newline = (char *)"cr"; break;
1888     case (13 << 8) | 10: newline = (char *)"crlf"; break;
1889     case -1: newline = (char *)"any"; break;
1890     case -2: newline = (char *)"anycrlf"; break;
1891 nigel 91 }
1892    
1893 nigel 49 /* Process the options */
1894    
1895     for (i = 1; i < argc; i++)
1896     {
1897 nigel 77 option_item *op = NULL;
1898     char *option_data = (char *)""; /* default to keep compiler happy */
1899     BOOL longop;
1900     BOOL longopwasequals = FALSE;
1901    
1902 nigel 49 if (argv[i][0] != '-') break;
1903 nigel 53
1904 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1905 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1906 nigel 63
1907 nigel 77 if (argv[i][1] == 0)
1908     {
1909 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1910 nigel 77 else exit(usage(2));
1911     }
1912 nigel 63
1913 nigel 77 /* Handle a long name option, or -- to terminate the options */
1914 nigel 53
1915     if (argv[i][1] == '-')
1916 nigel 49 {
1917 nigel 77 char *arg = argv[i] + 2;
1918     char *argequals = strchr(arg, '=');
1919 nigel 53
1920 nigel 77 if (*arg == 0) /* -- terminates options */
1921 nigel 49 {
1922 nigel 77 i++;
1923     break; /* out of the options-handling loop */
1924 nigel 53 }
1925 nigel 49
1926 nigel 77 longop = TRUE;
1927    
1928     /* Some long options have data that follows after =, for example file=name.
1929     Some options have variations in the long name spelling: specifically, we
1930     allow "regexp" because GNU grep allows it, though I personally go along
1931 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1932     These options are entered in the table as "regex(p)". No option is in both
1933     these categories, fortunately. */
1934 nigel 77
1935 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1936     {
1937 nigel 77 char *opbra = strchr(op->long_name, '(');
1938     char *equals = strchr(op->long_name, '=');
1939     if (opbra == NULL) /* Not a (p) case */
1940 nigel 53 {
1941 nigel 77 if (equals == NULL) /* Not thing=data case */
1942     {
1943     if (strcmp(arg, op->long_name) == 0) break;
1944     }
1945     else /* Special case xxx=data */
1946     {
1947     int oplen = equals - op->long_name;
1948 ph10 199 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1949 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1950     {
1951     option_data = arg + arglen;
1952     if (*option_data == '=')
1953     {
1954     option_data++;
1955     longopwasequals = TRUE;
1956     }
1957     break;
1958     }
1959     }
1960 nigel 53 }
1961 nigel 77 else /* Special case xxxx(p) */
1962     {
1963     char buff1[24];
1964     char buff2[24];
1965     int baselen = opbra - op->long_name;
1966     sprintf(buff1, "%.*s", baselen, op->long_name);
1967 ph10 152 sprintf(buff2, "%s%.*s", buff1,
1968 ph10 151 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1969 nigel 77 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1970     break;
1971     }
1972 nigel 53 }
1973 nigel 77
1974 nigel 53 if (op->one_char == 0)
1975     {
1976     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1977     exit(usage(2));
1978     }
1979     }
1980 nigel 49
1981 nigel 89
1982     /* Jeffrey Friedl's debugging harness uses these additional options which
1983     are not in the right form for putting in the option table because they use
1984     only one hyphen, yet are more than one character long. By putting them
1985     separately here, they will not get displayed as part of the help() output,
1986     but I don't think Jeffrey will care about that. */
1987    
1988     #ifdef JFRIEDL_DEBUG
1989     else if (strcmp(argv[i], "-pre") == 0) {
1990     jfriedl_prefix = argv[++i];
1991     continue;
1992     } else if (strcmp(argv[i], "-post") == 0) {
1993     jfriedl_postfix = argv[++i];
1994     continue;
1995     } else if (strcmp(argv[i], "-XT") == 0) {
1996     sscanf(argv[++i], "%d", &jfriedl_XT);
1997     continue;
1998     } else if (strcmp(argv[i], "-XR") == 0) {
1999     sscanf(argv[++i], "%d", &jfriedl_XR);
2000     continue;
2001     }
2002     #endif
2003    
2004    
2005 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2006     continue till we hit the last one or one that needs data. */
2007 nigel 53
2008     else
2009     {
2010     char *s = argv[i] + 1;
2011 nigel 77 longop = FALSE;
2012 nigel 53 while (*s != 0)
2013     {
2014 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2015     { if (*s == op->one_char) break; }
2016     if (op->one_char == 0)
2017 nigel 53 {
2018 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2019     *s, argv[i]);
2020     exit(usage(2));
2021     }
2022     if (op->type != OP_NODATA || s[1] == 0)
2023     {
2024     option_data = s+1;
2025 nigel 53 break;
2026     }
2027 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2028 nigel 49 }
2029     }
2030 nigel 77
2031 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2032     is NO_DATA, it means that there is no data, and the option might set
2033     something in the PCRE options. */
2034 nigel 77
2035     if (op->type == OP_NODATA)
2036     {
2037 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2038     continue;
2039     }
2040    
2041     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2042     either has a value or defaults to something. It cannot have data in a
2043     separate item. At the moment, the only such options are "colo(u)r" and
2044 nigel 89 Jeffrey Friedl's special -S debugging option. */
2045 nigel 87
2046     if (*option_data == 0 &&
2047     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2048     {
2049     switch (op->one_char)
2050 nigel 77 {
2051 nigel 87 case N_COLOUR:
2052     colour_option = (char *)"auto";
2053     break;
2054     #ifdef JFRIEDL_DEBUG
2055     case 'S':
2056     S_arg = 0;
2057     break;
2058     #endif
2059 nigel 77 }
2060 nigel 87 continue;
2061     }
2062 nigel 77
2063 nigel 87 /* Otherwise, find the data string for the option. */
2064    
2065     if (*option_data == 0)
2066     {
2067     if (i >= argc - 1 || longopwasequals)
2068 nigel 77 {
2069 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2070     exit(usage(2));
2071     }
2072     option_data = argv[++i];
2073     }
2074    
2075     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2076     multiple times to create a list of patterns. */
2077    
2078     if (op->type == OP_PATLIST)
2079     {
2080     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2081     {
2082     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2083     MAX_PATTERN_COUNT);
2084     return 2;
2085     }
2086     patterns[cmd_pattern_count++] = option_data;
2087     }
2088    
2089     /* Otherwise, deal with single string or numeric data values. */
2090    
2091     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2092     {
2093     *((char **)op->dataptr) = option_data;
2094     }
2095     else
2096     {
2097     char *endptr;
2098     int n = strtoul(option_data, &endptr, 10);
2099     if (*endptr != 0)
2100     {
2101     if (longop)
2102 nigel 77 {
2103 nigel 87 char *equals = strchr(op->long_name, '=');
2104     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2105     equals - op->long_name;
2106     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2107     option_data, nlen, op->long_name);
2108 nigel 77 }
2109 nigel 87 else
2110     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2111     option_data, op->one_char);
2112     exit(usage(2));
2113 nigel 77 }
2114 nigel 87 *((int *)op->dataptr) = n;
2115 nigel 77 }
2116 nigel 49 }
2117    
2118 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2119     for -A and -B. */
2120    
2121     if (both_context > 0)
2122     {
2123     if (after_context == 0) after_context = both_context;
2124     if (before_context == 0) before_context = both_context;
2125     }
2126 ph10 286
2127     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2128 ph10 280 However, the latter two set the only_matching flag. */
2129 nigel 77
2130 ph10 280 if ((only_matching && (file_offsets || line_offsets)) ||
2131 ph10 286 (file_offsets && line_offsets))
2132 ph10 280 {
2133     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2134     "and/or --line-offsets\n");
2135     exit(usage(2));
2136     }
2137    
2138 ph10 286 if (file_offsets || line_offsets) only_matching = TRUE;
2139    
2140 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2141     LC_ALL environment variable is set, and if so, use it. */
2142 nigel 49
2143 nigel 87 if (locale == NULL)
2144 nigel 53 {
2145 nigel 87 locale = getenv("LC_ALL");
2146     locale_from = "LCC_ALL";
2147 nigel 53 }
2148 nigel 49
2149 nigel 87 if (locale == NULL)
2150     {
2151     locale = getenv("LC_CTYPE");
2152     locale_from = "LC_CTYPE";
2153     }
2154 nigel 49
2155 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2156     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2157    
2158     if (locale != NULL)
2159 nigel 49 {
2160 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2161 nigel 53 {
2162 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2163     locale, locale_from);
2164 nigel 53 return 2;
2165     }
2166 nigel 87 pcretables = pcre_maketables();
2167     }
2168 nigel 77
2169 nigel 87 /* Sort out colouring */
2170    
2171     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2172     {
2173     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2174     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2175     else
2176 nigel 53 {
2177 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2178     colour_option);
2179     return 2;
2180 nigel 77 }
2181 nigel 87 if (do_colour)
2182 nigel 77 {
2183 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2184     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2185     if (cs != NULL) colour_string = cs;
2186 nigel 77 }
2187 nigel 87 }
2188 nigel 77
2189 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2190    
2191     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2192     {
2193     pcre_options |= PCRE_NEWLINE_CR;
2194 nigel 93 endlinetype = EL_CR;
2195 nigel 91 }
2196     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2197     {
2198     pcre_options |= PCRE_NEWLINE_LF;
2199 nigel 93 endlinetype = EL_LF;
2200 nigel 91 }
2201     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2202     {
2203     pcre_options |= PCRE_NEWLINE_CRLF;
2204 nigel 93 endlinetype = EL_CRLF;
2205 nigel 91 }
2206 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2207     {
2208     pcre_options |= PCRE_NEWLINE_ANY;
2209     endlinetype = EL_ANY;
2210     }
2211 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2212     {
2213     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2214     endlinetype = EL_ANYCRLF;
2215     }
2216 nigel 91 else
2217     {
2218     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2219     return 2;
2220     }
2221    
2222 nigel 87 /* Interpret the text values for -d and -D */
2223    
2224     if (dee_option != NULL)
2225     {
2226     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2227     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2228     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2229     else
2230 nigel 77 {
2231 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2232     return 2;
2233 nigel 53 }
2234 nigel 49 }
2235    
2236 nigel 87 if (DEE_option != NULL)
2237     {
2238     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2239     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2240     else
2241     {
2242     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2243     return 2;
2244     }
2245     }
2246 nigel 49
2247 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2248 nigel 87
2249     #ifdef JFRIEDL_DEBUG
2250     if (S_arg > 9)
2251 nigel 49 {
2252 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2253     return 2;
2254     }
2255 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2256     {
2257     if (jfriedl_XT == 0) jfriedl_XT = 1;
2258     if (jfriedl_XR == 0) jfriedl_XR = 1;
2259     }
2260 nigel 87 #endif
2261 nigel 77
2262 nigel 87 /* Get memory to store the pattern and hints lists. */
2263    
2264     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2265     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2266    
2267     if (pattern_list == NULL || hints_list == NULL)
2268     {
2269     fprintf(stderr, "pcregrep: malloc failed\n");
2270 ph10 123 goto EXIT2;
2271 nigel 87 }
2272    
2273     /* If no patterns were provided by -e, and there is no file provided by -f,
2274     the first argument is the one and only pattern, and it must exist. */
2275    
2276     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2277     {
2278 nigel 63 if (i >= argc) return usage(2);
2279 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2280     }
2281 nigel 77
2282 nigel 87 /* Compile the patterns that were provided on the command line, either by
2283     multiple uses of -e or as a single unkeyed pattern. */
2284    
2285     for (j = 0; j < cmd_pattern_count; j++)
2286     {
2287     if (!compile_pattern(patterns[j], pcre_options, NULL,
2288     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2289 ph10 123 goto EXIT2;
2290 nigel 87 }
2291    
2292     /* Compile the regular expressions that are provided in a file. */
2293    
2294     if (pattern_filename != NULL)
2295     {
2296     int linenumber = 0;
2297     FILE *f;
2298     char *filename;
2299     char buffer[MBUFTHIRD];
2300    
2301     if (strcmp(pattern_filename, "-") == 0)
2302 nigel 77 {
2303 nigel 87 f = stdin;
2304     filename = stdin_name;
2305 nigel 77 }
2306 nigel 87 else
2307 nigel 77 {
2308 nigel 87 f = fopen(pattern_filename, "r");
2309     if (f == NULL)
2310     {
2311     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2312     strerror(errno));
2313 ph10 123 goto EXIT2;
2314 nigel 87 }
2315     filename = pattern_filename;
2316 nigel 77 }
2317    
2318 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2319 nigel 53 {
2320 nigel 87 char *s = buffer + (int)strlen(buffer);
2321     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2322     *s = 0;
2323     linenumber++;
2324     if (buffer[0] == 0) continue; /* Skip blank lines */
2325     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2326 ph10 121 goto EXIT2;
2327 nigel 53 }
2328 nigel 87
2329     if (f != stdin) fclose(f);
2330 nigel 49 }
2331    
2332 nigel 77 /* Study the regular expressions, as we will be running them many times */
2333 nigel 53
2334     for (j = 0; j < pattern_count; j++)
2335     {
2336     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2337     if (error != NULL)
2338     {
2339     char s[16];
2340     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2341     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2342 ph10 121 goto EXIT2;
2343 nigel 53 }
2344 ph10 142 hint_count++;
2345 nigel 53 }
2346    
2347 nigel 77 /* If there are include or exclude patterns, compile them. */
2348    
2349     if (exclude_pattern != NULL)
2350     {
2351 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2352     pcretables);
2353 nigel 77 if (exclude_compiled == NULL)
2354     {
2355     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2356     errptr, error);
2357 ph10 121 goto EXIT2;
2358 nigel 77 }
2359     }
2360    
2361     if (include_pattern != NULL)
2362     {
2363 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2364     pcretables);
2365 nigel 77 if (include_compiled == NULL)
2366     {
2367     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2368     errptr, error);
2369 ph10 121 goto EXIT2;
2370 nigel 77 }
2371     }
2372    
2373 ph10 325 if (exclude_dir_pattern != NULL)
2374     {
2375     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2376     pcretables);
2377     if (exclude_dir_compiled == NULL)
2378     {
2379     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2380     errptr, error);
2381     goto EXIT2;
2382     }
2383     }
2384    
2385     if (include_dir_pattern != NULL)
2386     {
2387     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2388     pcretables);
2389     if (include_dir_compiled == NULL)
2390     {
2391     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2392     errptr, error);
2393     goto EXIT2;
2394     }
2395     }
2396    
2397 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2398 nigel 49
2399 nigel 87 if (i >= argc)
2400 ph10 121 {
2401 ph10 286 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2402 ph10 121 goto EXIT;
2403 ph10 123 }
2404 nigel 49
2405 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2406     Pass in the fact that there is only one argument at top level - this suppresses
2407 nigel 87 the file name if the argument is not a directory and filenames are not
2408     otherwise forced. */
2409 nigel 49
2410 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2411 nigel 49
2412     for (; i < argc; i++)
2413     {
2414 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2415     only_one_at_top);
2416 nigel 77 if (frc > 1) rc = frc;
2417     else if (frc == 0 && rc == 1) rc = 0;
2418 nigel 49 }
2419    
2420 ph10 121 EXIT:
2421     if (pattern_list != NULL)
2422     {
2423 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2424 ph10 121 free(pattern_list);
2425 ph10 123 }
2426 ph10 121 if (hints_list != NULL)
2427     {
2428 ph10 141 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2429 ph10 121 free(hints_list);
2430 ph10 123 }
2431 nigel 49 return rc;
2432 ph10 121
2433     EXIT2:
2434     rc = 2;
2435     goto EXIT;
2436 nigel 49 }
2437    
2438 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12