/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 419 - (hide annotations) (download)
Wed Aug 12 10:45:33 2009 UTC (5 years ago) by ph10
File MIME type: text/plain
File size: 69073 byte(s)
Open with "rb" in pcregrep.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 377 Copyright (c) 1997-2009 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 ph10 378 #define OFFSET_SIZE 99
75 nigel 49
76 nigel 77 #if BUFSIZ > 8192
77     #define MBUFTHIRD BUFSIZ
78     #else
79     #define MBUFTHIRD 8192
80     #endif
81 nigel 49
82 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
83     output. The order is important; it is assumed that a file name is wanted for
84     all values greater than FN_DEFAULT. */
85 nigel 77
86 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88 ph10 286 /* File reading styles */
89    
90     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92 nigel 87 /* Actions for the -d and -D options */
93    
94     enum { dee_READ, dee_SKIP, dee_RECURSE };
95     enum { DEE_READ, DEE_SKIP };
96    
97     /* Actions for special processing options (flag bits) */
98    
99     #define PO_WORD_MATCH 0x0001
100     #define PO_LINE_MATCH 0x0002
101     #define PO_FIXED_STRINGS 0x0004
102    
103 nigel 93 /* Line ending types */
104 nigel 87
105 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 nigel 87
107 nigel 93
108    
109 nigel 49 /*************************************************
110     * Global variables *
111     *************************************************/
112    
113 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
114     regular code. */
115    
116     #ifdef JFRIEDL_DEBUG
117     static int S_arg = -1;
118 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
119     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
120     static const char *jfriedl_prefix = "";
121     static const char *jfriedl_postfix = "";
122 nigel 87 #endif
123    
124 nigel 93 static int endlinetype;
125 nigel 91
126 nigel 87 static char *colour_string = (char *)"1;31";
127     static char *colour_option = NULL;
128     static char *dee_option = NULL;
129     static char *DEE_option = NULL;
130 nigel 91 static char *newline = NULL;
131 nigel 53 static char *pattern_filename = NULL;
132 nigel 77 static char *stdin_name = (char *)"(standard input)";
133 nigel 87 static char *locale = NULL;
134    
135     static const unsigned char *pcretables = NULL;
136    
137 nigel 53 static int pattern_count = 0;
138 ph10 121 static pcre **pattern_list = NULL;
139     static pcre_extra **hints_list = NULL;
140 nigel 49
141 nigel 77 static char *include_pattern = NULL;
142     static char *exclude_pattern = NULL;
143 ph10 325 static char *include_dir_pattern = NULL;
144     static char *exclude_dir_pattern = NULL;
145 nigel 77
146     static pcre *include_compiled = NULL;
147     static pcre *exclude_compiled = NULL;
148 ph10 325 static pcre *include_dir_compiled = NULL;
149     static pcre *exclude_dir_compiled = NULL;
150 nigel 77
151     static int after_context = 0;
152     static int before_context = 0;
153     static int both_context = 0;
154 nigel 87 static int dee_action = dee_READ;
155     static int DEE_action = DEE_READ;
156     static int error_count = 0;
157     static int filenames = FN_DEFAULT;
158     static int process_options = 0;
159 nigel 77
160 nigel 49 static BOOL count_only = FALSE;
161 nigel 87 static BOOL do_colour = FALSE;
162 ph10 280 static BOOL file_offsets = FALSE;
163 nigel 77 static BOOL hyphenpending = FALSE;
164 nigel 49 static BOOL invert = FALSE;
165 ph10 280 static BOOL line_offsets = FALSE;
166 nigel 77 static BOOL multiline = FALSE;
167 nigel 49 static BOOL number = FALSE;
168 nigel 87 static BOOL only_matching = FALSE;
169 nigel 77 static BOOL quiet = FALSE;
170 nigel 49 static BOOL silent = FALSE;
171 nigel 93 static BOOL utf8 = FALSE;
172 nigel 49
173 nigel 53 /* Structure for options and list of them */
174 nigel 49
175 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
176     OP_PATLIST };
177 nigel 77
178 nigel 53 typedef struct option_item {
179 nigel 77 int type;
180 nigel 53 int one_char;
181 nigel 77 void *dataptr;
182 nigel 67 const char *long_name;
183     const char *help_text;
184 nigel 53 } option_item;
185 nigel 49
186 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
187     used to identify them. */
188    
189 ph10 325 #define N_COLOUR (-1)
190     #define N_EXCLUDE (-2)
191     #define N_EXCLUDE_DIR (-3)
192     #define N_HELP (-4)
193     #define N_INCLUDE (-5)
194     #define N_INCLUDE_DIR (-6)
195     #define N_LABEL (-7)
196     #define N_LOCALE (-8)
197     #define N_NULL (-9)
198     #define N_LOFFSETS (-10)
199     #define N_FOFFSETS (-11)
200 nigel 87
201 nigel 53 static option_item optionlist[] = {
202 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
203     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
204     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
205     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
206     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
207     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
208     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
209     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
210     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
211     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
212     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
213     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
214     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
215 ph10 280 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
216 nigel 87 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
217     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
218     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
219     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
220     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
221     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
222 ph10 280 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
223 nigel 87 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
224     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
225 ph10 280 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
226 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
227     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
228     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
229     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
230     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
231     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
232 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
233     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
234 nigel 87 #ifdef JFRIEDL_DEBUG
235     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
236     #endif
237     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
238     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
239     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
240     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
241     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
242     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
243     { OP_NODATA, 0, NULL, NULL, NULL }
244 nigel 53 };
245    
246 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
247     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
248     that the combination of -w and -x has the same effect as -x on its own, so we
249     can treat them as the same. */
250 nigel 53
251 nigel 87 static const char *prefix[] = {
252     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
253    
254     static const char *suffix[] = {
255     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
256    
257 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
258 nigel 87
259 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
260 nigel 87
261 nigel 93 const char utf8_table4[] = {
262     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
264     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
265     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
266    
267    
268    
269 nigel 53 /*************************************************
270 nigel 87 * OS-specific functions *
271 nigel 53 *************************************************/
272    
273     /* These functions are defined so that they can be made system specific,
274 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
275 nigel 53
276    
277     /************* Directory scanning in Unix ***********/
278    
279 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
280 nigel 53 #include <sys/types.h>
281     #include <sys/stat.h>
282     #include <dirent.h>
283    
284     typedef DIR directory_type;
285    
286 nigel 67 static int
287 nigel 53 isdirectory(char *filename)
288     {
289     struct stat statbuf;
290     if (stat(filename, &statbuf) < 0)
291     return 0; /* In the expectation that opening as a file will fail */
292     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
293     }
294    
295 nigel 67 static directory_type *
296 nigel 53 opendirectory(char *filename)
297     {
298     return opendir(filename);
299     }
300    
301 nigel 67 static char *
302 nigel 53 readdirectory(directory_type *dir)
303     {
304     for (;;)
305     {
306     struct dirent *dent = readdir(dir);
307     if (dent == NULL) return NULL;
308     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
309     return dent->d_name;
310     }
311 ph10 151 /* Control never reaches here */
312 nigel 53 }
313    
314 nigel 67 static void
315 nigel 53 closedirectory(directory_type *dir)
316     {
317     closedir(dir);
318     }
319    
320    
321 nigel 87 /************* Test for regular file in Unix **********/
322    
323     static int
324     isregfile(char *filename)
325     {
326     struct stat statbuf;
327     if (stat(filename, &statbuf) < 0)
328     return 1; /* In the expectation that opening as a file will fail */
329     return (statbuf.st_mode & S_IFMT) == S_IFREG;
330     }
331    
332    
333     /************* Test stdout for being a terminal in Unix **********/
334    
335     static BOOL
336     is_stdout_tty(void)
337     {
338     return isatty(fileno(stdout));
339     }
340    
341    
342 nigel 63 /************* Directory scanning in Win32 ***********/
343 nigel 53
344 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
345 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
346 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
347     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
348 ph10 283 */
349 nigel 53
350 ph10 97 #elif HAVE_WINDOWS_H
351 nigel 63
352     #ifndef STRICT
353     # define STRICT
354     #endif
355     #ifndef WIN32_LEAN_AND_MEAN
356     # define WIN32_LEAN_AND_MEAN
357     #endif
358 ph10 283
359     #include <windows.h>
360    
361 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
362     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
363     #endif
364    
365 nigel 63 typedef struct directory_type
366     {
367     HANDLE handle;
368     BOOL first;
369     WIN32_FIND_DATA data;
370     } directory_type;
371    
372     int
373     isdirectory(char *filename)
374     {
375     DWORD attr = GetFileAttributes(filename);
376     if (attr == INVALID_FILE_ATTRIBUTES)
377     return 0;
378     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
379     }
380    
381     directory_type *
382     opendirectory(char *filename)
383     {
384     size_t len;
385     char *pattern;
386     directory_type *dir;
387     DWORD err;
388     len = strlen(filename);
389     pattern = (char *) malloc(len + 3);
390     dir = (directory_type *) malloc(sizeof(*dir));
391     if ((pattern == NULL) || (dir == NULL))
392     {
393     fprintf(stderr, "pcregrep: malloc failed\n");
394     exit(2);
395     }
396     memcpy(pattern, filename, len);
397     memcpy(&(pattern[len]), "\\*", 3);
398     dir->handle = FindFirstFile(pattern, &(dir->data));
399     if (dir->handle != INVALID_HANDLE_VALUE)
400     {
401     free(pattern);
402     dir->first = TRUE;
403     return dir;
404     }
405     err = GetLastError();
406     free(pattern);
407     free(dir);
408     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
409     return NULL;
410     }
411    
412     char *
413     readdirectory(directory_type *dir)
414     {
415     for (;;)
416     {
417     if (!dir->first)
418     {
419     if (!FindNextFile(dir->handle, &(dir->data)))
420     return NULL;
421     }
422     else
423     {
424     dir->first = FALSE;
425     }
426     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
427     return dir->data.cFileName;
428     }
429     #ifndef _MSC_VER
430     return NULL; /* Keep compiler happy; never executed */
431     #endif
432     }
433    
434     void
435     closedirectory(directory_type *dir)
436     {
437     FindClose(dir->handle);
438     free(dir);
439     }
440    
441    
442 nigel 87 /************* Test for regular file in Win32 **********/
443    
444     /* I don't know how to do this, or if it can be done; assume all paths are
445     regular if they are not directories. */
446    
447     int isregfile(char *filename)
448     {
449 ph10 283 return !isdirectory(filename);
450 nigel 87 }
451    
452    
453     /************* Test stdout for being a terminal in Win32 **********/
454    
455     /* I don't know how to do this; assume never */
456    
457     static BOOL
458     is_stdout_tty(void)
459     {
460 ph10 283 return FALSE;
461 nigel 87 }
462    
463    
464 nigel 53 /************* Directory scanning when we can't do it ***********/
465    
466     /* The type is void, and apart from isdirectory(), the functions do nothing. */
467    
468 nigel 63 #else
469    
470 nigel 53 typedef void directory_type;
471    
472 nigel 87 int isdirectory(char *filename) { return 0; }
473 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
474     char *readdirectory(directory_type *dir) { return (char*)0;}
475 nigel 53 void closedirectory(directory_type *dir) {}
476    
477 nigel 87
478     /************* Test for regular when we can't do it **********/
479    
480     /* Assume all files are regular. */
481    
482     int isregfile(char *filename) { return 1; }
483    
484    
485     /************* Test stdout for being a terminal when we can't do it **********/
486    
487     static BOOL
488     is_stdout_tty(void)
489     {
490     return FALSE;
491     }
492    
493    
494 nigel 53 #endif
495    
496    
497    
498 ph10 137 #ifndef HAVE_STRERROR
499 nigel 49 /*************************************************
500     * Provide strerror() for non-ANSI libraries *
501     *************************************************/
502    
503     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
504     in their libraries, but can provide the same facility by this simple
505     alternative function. */
506    
507     extern int sys_nerr;
508     extern char *sys_errlist[];
509    
510     char *
511     strerror(int n)
512     {
513     if (n < 0 || n >= sys_nerr) return "unknown error number";
514     return sys_errlist[n];
515     }
516     #endif /* HAVE_STRERROR */
517    
518    
519    
520     /*************************************************
521 nigel 93 * Find end of line *
522     *************************************************/
523    
524     /* The length of the endline sequence that is found is set via lenptr. This may
525     be zero at the very end of the file if there is no line-ending sequence there.
526    
527     Arguments:
528     p current position in line
529     endptr end of available data
530     lenptr where to put the length of the eol sequence
531    
532     Returns: pointer to the last byte of the line
533     */
534    
535     static char *
536     end_of_line(char *p, char *endptr, int *lenptr)
537     {
538     switch(endlinetype)
539     {
540     default: /* Just in case */
541     case EL_LF:
542     while (p < endptr && *p != '\n') p++;
543     if (p < endptr)
544     {
545     *lenptr = 1;
546     return p + 1;
547     }
548     *lenptr = 0;
549     return endptr;
550    
551     case EL_CR:
552     while (p < endptr && *p != '\r') p++;
553     if (p < endptr)
554     {
555     *lenptr = 1;
556     return p + 1;
557     }
558     *lenptr = 0;
559     return endptr;
560    
561     case EL_CRLF:
562     for (;;)
563     {
564     while (p < endptr && *p != '\r') p++;
565     if (++p >= endptr)
566     {
567     *lenptr = 0;
568     return endptr;
569     }
570     if (*p == '\n')
571     {
572     *lenptr = 2;
573     return p + 1;
574     }
575     }
576     break;
577    
578 ph10 149 case EL_ANYCRLF:
579     while (p < endptr)
580     {
581     int extra = 0;
582     register int c = *((unsigned char *)p);
583    
584     if (utf8 && c >= 0xc0)
585     {
586     int gcii, gcss;
587     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
588     gcss = 6*extra;
589     c = (c & utf8_table3[extra]) << gcss;
590     for (gcii = 1; gcii <= extra; gcii++)
591     {
592     gcss -= 6;
593     c |= (p[gcii] & 0x3f) << gcss;
594     }
595     }
596    
597     p += 1 + extra;
598    
599     switch (c)
600     {
601     case 0x0a: /* LF */
602     *lenptr = 1;
603     return p;
604    
605     case 0x0d: /* CR */
606     if (p < endptr && *p == 0x0a)
607     {
608     *lenptr = 2;
609     p++;
610     }
611     else *lenptr = 1;
612     return p;
613 ph10 150
614 ph10 149 default:
615     break;
616     }
617     } /* End of loop for ANYCRLF case */
618 ph10 150
619 ph10 149 *lenptr = 0; /* Must have hit the end */
620     return endptr;
621    
622 nigel 93 case EL_ANY:
623     while (p < endptr)
624     {
625     int extra = 0;
626     register int c = *((unsigned char *)p);
627    
628     if (utf8 && c >= 0xc0)
629     {
630     int gcii, gcss;
631     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
632     gcss = 6*extra;
633     c = (c & utf8_table3[extra]) << gcss;
634     for (gcii = 1; gcii <= extra; gcii++)
635     {
636     gcss -= 6;
637     c |= (p[gcii] & 0x3f) << gcss;
638     }
639     }
640    
641     p += 1 + extra;
642    
643     switch (c)
644     {
645     case 0x0a: /* LF */
646     case 0x0b: /* VT */
647     case 0x0c: /* FF */
648     *lenptr = 1;
649     return p;
650    
651     case 0x0d: /* CR */
652     if (p < endptr && *p == 0x0a)
653     {
654     *lenptr = 2;
655     p++;
656     }
657     else *lenptr = 1;
658     return p;
659    
660     case 0x85: /* NEL */
661     *lenptr = utf8? 2 : 1;
662     return p;
663    
664     case 0x2028: /* LS */
665     case 0x2029: /* PS */
666     *lenptr = 3;
667     return p;
668    
669     default:
670     break;
671     }
672     } /* End of loop for ANY case */
673    
674     *lenptr = 0; /* Must have hit the end */
675     return endptr;
676     } /* End of overall switch */
677     }
678    
679    
680    
681     /*************************************************
682     * Find start of previous line *
683     *************************************************/
684    
685     /* This is called when looking back for before lines to print.
686    
687     Arguments:
688     p start of the subsequent line
689     startptr start of available data
690    
691     Returns: pointer to the start of the previous line
692     */
693    
694     static char *
695     previous_line(char *p, char *startptr)
696     {
697     switch(endlinetype)
698     {
699     default: /* Just in case */
700     case EL_LF:
701     p--;
702     while (p > startptr && p[-1] != '\n') p--;
703     return p;
704    
705     case EL_CR:
706     p--;
707     while (p > startptr && p[-1] != '\n') p--;
708     return p;
709    
710     case EL_CRLF:
711     for (;;)
712     {
713     p -= 2;
714     while (p > startptr && p[-1] != '\n') p--;
715     if (p <= startptr + 1 || p[-2] == '\r') return p;
716     }
717     return p; /* But control should never get here */
718    
719     case EL_ANY:
720 ph10 150 case EL_ANYCRLF:
721 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
722     if (utf8) while ((*p & 0xc0) == 0x80) p--;
723    
724     while (p > startptr)
725     {
726     register int c;
727     char *pp = p - 1;
728    
729     if (utf8)
730     {
731     int extra = 0;
732     while ((*pp & 0xc0) == 0x80) pp--;
733     c = *((unsigned char *)pp);
734     if (c >= 0xc0)
735     {
736     int gcii, gcss;
737     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
738     gcss = 6*extra;
739     c = (c & utf8_table3[extra]) << gcss;
740     for (gcii = 1; gcii <= extra; gcii++)
741     {
742     gcss -= 6;
743     c |= (pp[gcii] & 0x3f) << gcss;
744     }
745     }
746     }
747     else c = *((unsigned char *)pp);
748    
749 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
750 nigel 93 {
751     case 0x0a: /* LF */
752 ph10 149 case 0x0d: /* CR */
753     return p;
754 ph10 150
755 ph10 149 default:
756     break;
757 ph10 150 }
758 ph10 149
759     else switch (c)
760     {
761     case 0x0a: /* LF */
762 nigel 93 case 0x0b: /* VT */
763     case 0x0c: /* FF */
764     case 0x0d: /* CR */
765     case 0x85: /* NEL */
766     case 0x2028: /* LS */
767     case 0x2029: /* PS */
768     return p;
769    
770     default:
771     break;
772     }
773    
774     p = pp; /* Back one character */
775     } /* End of loop for ANY case */
776    
777     return startptr; /* Hit start of data */
778     } /* End of overall switch */
779     }
780    
781    
782    
783    
784    
785     /*************************************************
786 nigel 77 * Print the previous "after" lines *
787 nigel 49 *************************************************/
788    
789 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
790 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
791     that a binary zero does not terminate it.
792 nigel 77
793     Arguments:
794     lastmatchnumber the number of the last matching line, plus one
795     lastmatchrestart where we restarted after the last match
796     endptr end of available data
797     printname filename for printing
798    
799     Returns: nothing
800     */
801    
802     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
803     char *endptr, char *printname)
804     {
805     if (after_context > 0 && lastmatchnumber > 0)
806     {
807     int count = 0;
808     while (lastmatchrestart < endptr && count++ < after_context)
809     {
810 nigel 93 int ellength;
811 nigel 77 char *pp = lastmatchrestart;
812     if (printname != NULL) fprintf(stdout, "%s-", printname);
813     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
814 nigel 93 pp = end_of_line(pp, endptr, &ellength);
815     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
816     lastmatchrestart = pp;
817 nigel 77 }
818     hyphenpending = TRUE;
819     }
820     }
821    
822    
823    
824     /*************************************************
825 ph10 378 * Apply patterns to subject till one matches *
826     *************************************************/
827    
828 ph10 392 /* This function is called to run through all patterns, looking for a match. It
829     is used multiple times for the same subject when colouring is enabled, in order
830 ph10 378 to find all possible matches.
831    
832     Arguments:
833     matchptr the start of the subject
834     length the length of the subject to match
835     offsets the offets vector to fill in
836     mrc address of where to put the result of pcre_exec()
837 ph10 392
838     Returns: TRUE if there was a match
839 ph10 378 FALSE if there was no match
840     invert if there was a non-fatal error
841 ph10 392 */
842 ph10 378
843     static BOOL
844     match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
845     {
846     int i;
847     for (i = 0; i < pattern_count; i++)
848     {
849 ph10 379 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
850     PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
851 ph10 378 if (*mrc >= 0) return TRUE;
852     if (*mrc == PCRE_ERROR_NOMATCH) continue;
853     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
854     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
855     fprintf(stderr, "this text:\n");
856     fwrite(matchptr, 1, length, stderr); /* In case binary zero included */
857     fprintf(stderr, "\n");
858     if (error_count == 0 &&
859     (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
860     {
861     fprintf(stderr, "pcregrep: error %d means that a resource limit "
862     "was exceeded\n", *mrc);
863     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
864     }
865     if (error_count++ > 20)
866     {
867     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
868     exit(2);
869     }
870     return invert; /* No more matching; don't show the line again */
871     }
872    
873     return FALSE; /* No match, no errors */
874     }
875    
876    
877    
878     /*************************************************
879 nigel 77 * Grep an individual file *
880     *************************************************/
881    
882     /* This is called from grep_or_recurse() below. It uses a buffer that is three
883     times the value of MBUFTHIRD. The matching point is never allowed to stray into
884     the top third of the buffer, thus keeping more of the file available for
885     context printing or for multiline scanning. For large files, the pointer will
886     be in the middle third most of the time, so the bottom third is available for
887     "before" context printing.
888    
889     Arguments:
890 ph10 286 handle the fopened FILE stream for a normal file
891     the gzFile pointer when reading is via libz
892     the BZFILE pointer when reading is via libbz2
893     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
894 nigel 77 printname the file name if it is to be printed for each match
895     or NULL if the file name is not to be printed
896     it cannot be NULL if filenames[_nomatch]_only is set
897    
898     Returns: 0 if there was at least one match
899     1 otherwise (no matches)
900 ph10 286 2 if there is a read error on a .bz2 file
901 nigel 77 */
902    
903 nigel 49 static int
904 ph10 286 pcregrep(void *handle, int frtype, char *printname)
905 nigel 49 {
906     int rc = 1;
907 nigel 77 int linenumber = 1;
908     int lastmatchnumber = 0;
909 nigel 49 int count = 0;
910 ph10 280 int filepos = 0;
911 ph10 378 int offsets[OFFSET_SIZE];
912 nigel 77 char *lastmatchrestart = NULL;
913     char buffer[3*MBUFTHIRD];
914     char *ptr = buffer;
915     char *endptr;
916     size_t bufflength;
917     BOOL endhyphenpending = FALSE;
918 ph10 286 FILE *in = NULL; /* Ensure initialized */
919 nigel 49
920 ph10 286 #ifdef SUPPORT_LIBZ
921     gzFile ingz = NULL;
922     #endif
923 nigel 77
924 ph10 286 #ifdef SUPPORT_LIBBZ2
925     BZFILE *inbz2 = NULL;
926     #endif
927    
928    
929     /* Do the first read into the start of the buffer and set up the pointer to end
930     of what we have. In the case of libz, a non-zipped .gz file will be read as a
931     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
932     fail. */
933    
934     #ifdef SUPPORT_LIBZ
935     if (frtype == FR_LIBZ)
936     {
937     ingz = (gzFile)handle;
938     bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
939     }
940     else
941     #endif
942    
943     #ifdef SUPPORT_LIBBZ2
944     if (frtype == FR_LIBBZ2)
945     {
946     inbz2 = (BZFILE *)handle;
947     bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
948     if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
949     } /* without the cast it is unsigned. */
950     else
951     #endif
952    
953     {
954     in = (FILE *)handle;
955     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
956     }
957    
958 nigel 77 endptr = buffer + bufflength;
959    
960     /* Loop while the current pointer is not at the end of the file. For large
961     files, endptr will be at the end of the buffer when we are in the middle of the
962     file, but ptr will never get there, because as soon as it gets over 2/3 of the
963     way, the buffer is shifted left and re-filled. */
964    
965     while (ptr < endptr)
966 nigel 49 {
967 ph10 378 int endlinelength;
968 nigel 87 int mrc = 0;
969 ph10 378 BOOL match;
970 ph10 286 char *matchptr = ptr;
971 nigel 77 char *t = ptr;
972     size_t length, linelength;
973 nigel 49
974 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
975     of the subject string to pass to pcre_exec(). In multiline mode, it is the
976     length remainder of the data in the buffer. Otherwise, it is the length of
977 ph10 378 the next line, excluding the terminating newline. After matching, we always
978     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
979     option is used for compiling, so that any match is constrained to be in the
980     first line. */
981 nigel 77
982 nigel 93 t = end_of_line(t, endptr, &endlinelength);
983     linelength = t - ptr - endlinelength;
984 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
985 nigel 77
986 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
987    
988     #ifdef JFRIEDL_DEBUG
989     if (jfriedl_XT || jfriedl_XR)
990     {
991     #include <sys/time.h>
992     #include <time.h>
993     struct timeval start_time, end_time;
994     struct timezone dummy;
995 ph10 392 int i;
996 nigel 89
997     if (jfriedl_XT)
998     {
999     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1000     const char *orig = ptr;
1001     ptr = malloc(newlen + 1);
1002     if (!ptr) {
1003     printf("out of memory");
1004     exit(2);
1005     }
1006     endptr = ptr;
1007     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1008     for (i = 0; i < jfriedl_XT; i++) {
1009     strncpy(endptr, orig, length);
1010     endptr += length;
1011     }
1012     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1013     length = newlen;
1014     }
1015    
1016     if (gettimeofday(&start_time, &dummy) != 0)
1017     perror("bad gettimeofday");
1018    
1019    
1020     for (i = 0; i < jfriedl_XR; i++)
1021 ph10 392 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1022 ph10 379 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1023 nigel 89
1024     if (gettimeofday(&end_time, &dummy) != 0)
1025     perror("bad gettimeofday");
1026    
1027     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1028     -
1029     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1030    
1031     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1032     return 0;
1033     }
1034     #endif
1035    
1036 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
1037 ph10 279 in order to find any further matches in the same line. */
1038 nigel 89
1039 ph10 286 ONLY_MATCHING_RESTART:
1040    
1041 ph10 392 /* Run through all the patterns until one matches or there is an error other
1042 ph10 378 than NOMATCH. This code is in a subroutine so that it can be re-used for
1043     finding subsequent matches when colouring matched lines. */
1044 ph10 392
1045 ph10 378 match = match_patterns(matchptr, length, offsets, &mrc);
1046 nigel 77
1047 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1048 nigel 77
1049 nigel 49 if (match != invert)
1050     {
1051 nigel 77 BOOL hyphenprinted = FALSE;
1052    
1053 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1054 nigel 77
1055 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1056    
1057     /* Just count if just counting is wanted. */
1058    
1059 nigel 49 if (count_only) count++;
1060    
1061 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1062     in the file. */
1063    
1064     else if (filenames == FN_ONLY)
1065 nigel 49 {
1066 nigel 77 fprintf(stdout, "%s\n", printname);
1067 nigel 49 return 0;
1068     }
1069    
1070 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1071    
1072 nigel 77 else if (quiet) return 0;
1073 nigel 49
1074 nigel 87 /* The --only-matching option prints just the substring that matched, and
1075 ph10 286 the --file-offsets and --line-offsets options output offsets for the
1076 ph10 280 matching substring (they both force --only-matching). None of these options
1077     prints any context. Afterwards, adjust the start and length, and then jump
1078     back to look for further matches in the same line. If we are in invert
1079     mode, however, nothing is printed - this could be still useful because the
1080     return code is set. */
1081 nigel 87
1082     else if (only_matching)
1083     {
1084 ph10 279 if (!invert)
1085 ph10 286 {
1086 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1087     if (number) fprintf(stdout, "%d:", linenumber);
1088 ph10 280 if (line_offsets)
1089 ph10 357 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1090 ph10 286 offsets[1] - offsets[0]);
1091 ph10 280 else if (file_offsets)
1092 ph10 357 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1093 ph10 286 offsets[1] - offsets[0]);
1094     else
1095 ph10 377 {
1096     if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1097 ph10 280 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1098 ph10 377 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1099 ph10 392 }
1100 ph10 279 fprintf(stdout, "\n");
1101     matchptr += offsets[1];
1102     length -= offsets[1];
1103 ph10 286 match = FALSE;
1104     goto ONLY_MATCHING_RESTART;
1105     }
1106 nigel 87 }
1107    
1108     /* This is the default case when none of the above options is set. We print
1109     the matching lines(s), possibly preceded and/or followed by other lines of
1110     context. */
1111    
1112 nigel 49 else
1113     {
1114 nigel 77 /* See if there is a requirement to print some "after" lines from a
1115     previous match. We never print any overlaps. */
1116    
1117     if (after_context > 0 && lastmatchnumber > 0)
1118     {
1119 nigel 93 int ellength;
1120 nigel 77 int linecount = 0;
1121     char *p = lastmatchrestart;
1122    
1123     while (p < ptr && linecount < after_context)
1124     {
1125 nigel 93 p = end_of_line(p, ptr, &ellength);
1126 nigel 77 linecount++;
1127     }
1128    
1129     /* It is important to advance lastmatchrestart during this printing so
1130 nigel 87 that it interacts correctly with any "before" printing below. Print
1131     each line's data using fwrite() in case there are binary zeroes. */
1132 nigel 77
1133     while (lastmatchrestart < p)
1134     {
1135     char *pp = lastmatchrestart;
1136     if (printname != NULL) fprintf(stdout, "%s-", printname);
1137     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1138 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1139     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1140     lastmatchrestart = pp;
1141 nigel 77 }
1142     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1143     }
1144    
1145     /* If there were non-contiguous lines printed above, insert hyphens. */
1146    
1147     if (hyphenpending)
1148     {
1149     fprintf(stdout, "--\n");
1150     hyphenpending = FALSE;
1151     hyphenprinted = TRUE;
1152     }
1153    
1154     /* See if there is a requirement to print some "before" lines for this
1155     match. Again, don't print overlaps. */
1156    
1157     if (before_context > 0)
1158     {
1159     int linecount = 0;
1160     char *p = ptr;
1161    
1162     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1163 nigel 87 linecount < before_context)
1164 nigel 77 {
1165 nigel 87 linecount++;
1166 nigel 93 p = previous_line(p, buffer);
1167 nigel 77 }
1168    
1169     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1170     fprintf(stdout, "--\n");
1171    
1172     while (p < ptr)
1173     {
1174 nigel 93 int ellength;
1175 nigel 77 char *pp = p;
1176     if (printname != NULL) fprintf(stdout, "%s-", printname);
1177     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1178 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1179     fwrite(p, 1, pp - p, stdout);
1180     p = pp;
1181 nigel 77 }
1182     }
1183    
1184     /* Now print the matching line(s); ensure we set hyphenpending at the end
1185 nigel 85 of the file if any context lines are being output. */
1186 nigel 77
1187 nigel 85 if (after_context > 0 || before_context > 0)
1188     endhyphenpending = TRUE;
1189    
1190 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1191 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1192 nigel 77
1193     /* In multiline mode, we want to print to the end of the line in which
1194     the end of the matched string is found, so we adjust linelength and the
1195 ph10 222 line number appropriately, but only when there actually was a match
1196     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1197     the match will always be before the first newline sequence. */
1198 nigel 77
1199     if (multiline)
1200     {
1201 nigel 93 int ellength;
1202 ph10 222 char *endmatch = ptr;
1203     if (!invert)
1204 nigel 93 {
1205 ph10 222 endmatch += offsets[1];
1206     t = ptr;
1207     while (t < endmatch)
1208     {
1209     t = end_of_line(t, endptr, &ellength);
1210     if (t <= endmatch) linenumber++; else break;
1211     }
1212 nigel 93 }
1213     endmatch = end_of_line(endmatch, endptr, &ellength);
1214     linelength = endmatch - ptr - ellength;
1215 nigel 77 }
1216    
1217 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1218     zeroes are treated as just another data character. */
1219    
1220     /* This extra option, for Jeffrey Friedl's debugging requirements,
1221     replaces the matched string, or a specific captured string if it exists,
1222     with X. When this happens, colouring is ignored. */
1223    
1224     #ifdef JFRIEDL_DEBUG
1225     if (S_arg >= 0 && S_arg < mrc)
1226     {
1227     int first = S_arg * 2;
1228     int last = first + 1;
1229     fwrite(ptr, 1, offsets[first], stdout);
1230     fprintf(stdout, "X");
1231     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1232     }
1233     else
1234     #endif
1235    
1236 ph10 392 /* We have to split the line(s) up if colouring, and search for further
1237 ph10 378 matches. */
1238 nigel 87
1239     if (do_colour)
1240     {
1241 ph10 392 int last_offset = 0;
1242 nigel 87 fwrite(ptr, 1, offsets[0], stdout);
1243     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1244     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1245     fprintf(stdout, "%c[00m", 0x1b);
1246 ph10 378 for (;;)
1247     {
1248 ph10 392 last_offset += offsets[1];
1249 ph10 378 matchptr += offsets[1];
1250     length -= offsets[1];
1251     if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1252     fwrite(matchptr, 1, offsets[0], stdout);
1253     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1254     fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1255     fprintf(stdout, "%c[00m", 0x1b);
1256     }
1257     fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1258 ph10 239 stdout);
1259 nigel 87 }
1260 ph10 392
1261 ph10 378 /* Not colouring; no need to search for further matches */
1262 ph10 392
1263 nigel 93 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1264 nigel 49 }
1265    
1266 nigel 87 /* End of doing what has to be done for a match */
1267    
1268 nigel 77 rc = 0; /* Had some success */
1269    
1270     /* Remember where the last match happened for after_context. We remember
1271     where we are about to restart, and that line's number. */
1272    
1273 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1274 nigel 77 lastmatchnumber = linenumber + 1;
1275 nigel 49 }
1276 nigel 77
1277 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1278     anything to be printed), we have to move on to the end of the match before
1279     proceeding. */
1280    
1281     if (multiline && invert && match)
1282     {
1283     int ellength;
1284     char *endmatch = ptr + offsets[1];
1285     t = ptr;
1286     while (t < endmatch)
1287     {
1288     t = end_of_line(t, endptr, &ellength);
1289     if (t <= endmatch) linenumber++; else break;
1290     }
1291     endmatch = end_of_line(endmatch, endptr, &ellength);
1292     linelength = endmatch - ptr - ellength;
1293     }
1294    
1295 ph10 286 /* Advance to after the newline and increment the line number. The file
1296 ph10 280 offset to the current line is maintained in filepos. */
1297 nigel 77
1298 nigel 93 ptr += linelength + endlinelength;
1299 ph10 280 filepos += linelength + endlinelength;
1300 nigel 77 linenumber++;
1301    
1302     /* If we haven't yet reached the end of the file (the buffer is full), and
1303     the current point is in the top 1/3 of the buffer, slide the buffer down by
1304     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1305     about to be lost, print them. */
1306    
1307     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1308     {
1309     if (after_context > 0 &&
1310     lastmatchnumber > 0 &&
1311     lastmatchrestart < buffer + MBUFTHIRD)
1312     {
1313     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1314     lastmatchnumber = 0;
1315     }
1316    
1317     /* Now do the shuffle */
1318    
1319     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1320     ptr -= MBUFTHIRD;
1321 ph10 286
1322     #ifdef SUPPORT_LIBZ
1323     if (frtype == FR_LIBZ)
1324     bufflength = 2*MBUFTHIRD +
1325     gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1326     else
1327     #endif
1328    
1329     #ifdef SUPPORT_LIBBZ2
1330     if (frtype == FR_LIBBZ2)
1331     bufflength = 2*MBUFTHIRD +
1332     BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1333     else
1334     #endif
1335    
1336 nigel 77 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1337 ph10 286
1338 nigel 77 endptr = buffer + bufflength;
1339    
1340     /* Adjust any last match point */
1341    
1342     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1343     }
1344     } /* Loop through the whole file */
1345    
1346     /* End of file; print final "after" lines if wanted; do_after_lines sets
1347     hyphenpending if it prints something. */
1348    
1349 nigel 87 if (!only_matching && !count_only)
1350     {
1351     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1352     hyphenpending |= endhyphenpending;
1353     }
1354 nigel 77
1355     /* Print the file name if we are looking for those without matches and there
1356     were none. If we found a match, we won't have got this far. */
1357    
1358 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1359 nigel 77 {
1360     fprintf(stdout, "%s\n", printname);
1361     return 0;
1362 nigel 49 }
1363    
1364 nigel 77 /* Print the match count if wanted */
1365    
1366 nigel 49 if (count_only)
1367     {
1368 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1369 nigel 49 fprintf(stdout, "%d\n", count);
1370     }
1371    
1372     return rc;
1373     }
1374    
1375    
1376    
1377     /*************************************************
1378 nigel 53 * Grep a file or recurse into a directory *
1379     *************************************************/
1380    
1381 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1382     recursing; if it's a file, grep it.
1383    
1384     Arguments:
1385     pathname the path to investigate
1386 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1387 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1388    
1389     Returns: 0 if there was at least one match
1390     1 if there were no matches
1391     2 there was some kind of error
1392    
1393     However, file opening failures are suppressed if "silent" is set.
1394     */
1395    
1396 nigel 53 static int
1397 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1398 nigel 53 {
1399     int rc = 1;
1400     int sep;
1401 ph10 286 int frtype;
1402     int pathlen;
1403     void *handle;
1404     FILE *in = NULL; /* Ensure initialized */
1405 nigel 53
1406 ph10 286 #ifdef SUPPORT_LIBZ
1407     gzFile ingz = NULL;
1408     #endif
1409    
1410     #ifdef SUPPORT_LIBBZ2
1411     BZFILE *inbz2 = NULL;
1412     #endif
1413    
1414 nigel 77 /* If the file name is "-" we scan stdin */
1415 nigel 53
1416 nigel 77 if (strcmp(pathname, "-") == 0)
1417 nigel 53 {
1418 ph10 286 return pcregrep(stdin, FR_PLAIN,
1419 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1420 nigel 77 stdin_name : NULL);
1421     }
1422    
1423 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1424 ph10 325 each file and directory within it, subject to any include or exclude patterns
1425     that were set. The scanning code is localized so it can be made
1426     system-specific. */
1427 nigel 87
1428     if ((sep = isdirectory(pathname)) != 0)
1429 nigel 77 {
1430 nigel 87 if (dee_action == dee_SKIP) return 1;
1431     if (dee_action == dee_RECURSE)
1432 nigel 53 {
1433 nigel 87 char buffer[1024];
1434     char *nextfile;
1435     directory_type *dir = opendirectory(pathname);
1436 nigel 53
1437 nigel 87 if (dir == NULL)
1438     {
1439     if (!silent)
1440     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1441     strerror(errno));
1442     return 2;
1443     }
1444 nigel 77
1445 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1446     {
1447 ph10 324 int frc, nflen;
1448 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1449 ph10 324 nflen = strlen(nextfile);
1450 ph10 345
1451 ph10 325 if (isdirectory(buffer))
1452     {
1453     if (exclude_dir_compiled != NULL &&
1454     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1455     continue;
1456 ph10 345
1457 ph10 325 if (include_dir_compiled != NULL &&
1458     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1459     continue;
1460     }
1461 ph10 345 else
1462     {
1463 ph10 324 if (exclude_compiled != NULL &&
1464     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1465     continue;
1466 ph10 345
1467 ph10 324 if (include_compiled != NULL &&
1468     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1469     continue;
1470 ph10 345 }
1471 nigel 77
1472 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1473     if (frc > 1) rc = frc;
1474     else if (frc == 0 && rc == 1) rc = 0;
1475     }
1476    
1477     closedirectory(dir);
1478     return rc;
1479 nigel 53 }
1480     }
1481    
1482 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1483     been requested. */
1484 nigel 53
1485 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1486    
1487     /* Control reaches here if we have a regular file, or if we have a directory
1488     and recursion or skipping was not requested, or if we have anything else and
1489     skipping was not requested. The scan proceeds. If this is the first and only
1490     argument at top level, we don't show the file name, unless we are only showing
1491     the file name, or the filename was forced (-H). */
1492    
1493 ph10 286 pathlen = strlen(pathname);
1494    
1495     /* Open using zlib if it is supported and the file name ends with .gz. */
1496    
1497     #ifdef SUPPORT_LIBZ
1498     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1499 nigel 53 {
1500 ph10 286 ingz = gzopen(pathname, "rb");
1501     if (ingz == NULL)
1502     {
1503     if (!silent)
1504     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1505     strerror(errno));
1506     return 2;
1507     }
1508     handle = (void *)ingz;
1509     frtype = FR_LIBZ;
1510     }
1511     else
1512     #endif
1513    
1514     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1515    
1516     #ifdef SUPPORT_LIBBZ2
1517     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1518     {
1519     inbz2 = BZ2_bzopen(pathname, "rb");
1520     handle = (void *)inbz2;
1521     frtype = FR_LIBBZ2;
1522     }
1523     else
1524     #endif
1525    
1526     /* Otherwise use plain fopen(). The label is so that we can come back here if
1527     an attempt to read a .bz2 file indicates that it really is a plain file. */
1528    
1529     #ifdef SUPPORT_LIBBZ2
1530     PLAIN_FILE:
1531     #endif
1532     {
1533 ph10 419 in = fopen(pathname, "rb");
1534 ph10 286 handle = (void *)in;
1535     frtype = FR_PLAIN;
1536     }
1537    
1538     /* All the opening methods return errno when they fail. */
1539    
1540     if (handle == NULL)
1541     {
1542 nigel 77 if (!silent)
1543     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1544     strerror(errno));
1545 nigel 53 return 2;
1546     }
1547    
1548 ph10 286 /* Now grep the file */
1549    
1550     rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1551 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1552 nigel 77
1553 ph10 286 /* Close in an appropriate manner. */
1554    
1555     #ifdef SUPPORT_LIBZ
1556     if (frtype == FR_LIBZ)
1557     gzclose(ingz);
1558     else
1559     #endif
1560    
1561     /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1562     read failed. If the error indicates that the file isn't in fact bzipped, try
1563     again as a normal file. */
1564    
1565     #ifdef SUPPORT_LIBBZ2
1566     if (frtype == FR_LIBBZ2)
1567     {
1568     if (rc == 2)
1569     {
1570     int errnum;
1571     const char *err = BZ2_bzerror(inbz2, &errnum);
1572     if (errnum == BZ_DATA_ERROR_MAGIC)
1573     {
1574     BZ2_bzclose(inbz2);
1575     goto PLAIN_FILE;
1576     }
1577     else if (!silent)
1578     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1579     pathname, err);
1580     }
1581     BZ2_bzclose(inbz2);
1582     }
1583     else
1584     #endif
1585    
1586     /* Normal file close */
1587    
1588 nigel 53 fclose(in);
1589 ph10 286
1590     /* Pass back the yield from pcregrep(). */
1591    
1592 nigel 53 return rc;
1593     }
1594    
1595    
1596    
1597    
1598     /*************************************************
1599 nigel 49 * Usage function *
1600     *************************************************/
1601    
1602     static int
1603     usage(int rc)
1604     {
1605 nigel 87 option_item *op;
1606     fprintf(stderr, "Usage: pcregrep [-");
1607     for (op = optionlist; op->one_char != 0; op++)
1608     {
1609     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1610     }
1611     fprintf(stderr, "] [long options] [pattern] [files]\n");
1612 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1613     "options.\n");
1614 nigel 49 return rc;
1615     }
1616    
1617    
1618    
1619    
1620     /*************************************************
1621 nigel 53 * Help function *
1622     *************************************************/
1623    
1624     static void
1625     help(void)
1626     {
1627     option_item *op;
1628    
1629 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1630 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1631 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1632 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1633    
1634     #ifdef SUPPORT_LIBZ
1635     printf("Files whose names end in .gz are read using zlib.\n");
1636     #endif
1637    
1638     #ifdef SUPPORT_LIBBZ2
1639     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1640     #endif
1641    
1642     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1643     printf("Other files and the standard input are read as plain files.\n\n");
1644     #else
1645     printf("All files are read as plain files, without any interpretation.\n\n");
1646     #endif
1647    
1648 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1649     printf("Options:\n");
1650    
1651     for (op = optionlist; op->one_char != 0; op++)
1652     {
1653     int n;
1654     char s[4];
1655     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1656 ph10 296 n = 30 - printf(" %s --%s", s, op->long_name);
1657 nigel 53 if (n < 1) n = 1;
1658     printf("%.*s%s\n", n, " ", op->help_text);
1659     }
1660    
1661 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1662     printf("trailing white space is removed and blank lines are ignored.\n");
1663     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1664 nigel 53
1665 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1666 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1667     }
1668    
1669    
1670    
1671    
1672     /*************************************************
1673 nigel 77 * Handle a single-letter, no data option *
1674 nigel 53 *************************************************/
1675    
1676     static int
1677     handle_option(int letter, int options)
1678     {
1679     switch(letter)
1680     {
1681 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1682 nigel 87 case N_HELP: help(); exit(0);
1683 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1684 nigel 53 case 'c': count_only = TRUE; break;
1685 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1686     case 'H': filenames = FN_FORCE; break;
1687     case 'h': filenames = FN_NONE; break;
1688 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1689 nigel 87 case 'l': filenames = FN_ONLY; break;
1690     case 'L': filenames = FN_NOMATCH_ONLY; break;
1691 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1692 nigel 53 case 'n': number = TRUE; break;
1693 nigel 87 case 'o': only_matching = TRUE; break;
1694 nigel 77 case 'q': quiet = TRUE; break;
1695 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1696 nigel 53 case 's': silent = TRUE; break;
1697 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1698 nigel 53 case 'v': invert = TRUE; break;
1699 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1700     case 'x': process_options |= PO_LINE_MATCH; break;
1701 nigel 53
1702     case 'V':
1703 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1704 nigel 53 exit(0);
1705     break;
1706    
1707     default:
1708     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1709     exit(usage(2));
1710     }
1711    
1712     return options;
1713     }
1714    
1715    
1716    
1717    
1718     /*************************************************
1719 nigel 87 * Construct printed ordinal *
1720     *************************************************/
1721    
1722     /* This turns a number into "1st", "3rd", etc. */
1723    
1724     static char *
1725     ordin(int n)
1726     {
1727     static char buffer[8];
1728     char *p = buffer;
1729     sprintf(p, "%d", n);
1730     while (*p != 0) p++;
1731     switch (n%10)
1732     {
1733     case 1: strcpy(p, "st"); break;
1734     case 2: strcpy(p, "nd"); break;
1735     case 3: strcpy(p, "rd"); break;
1736     default: strcpy(p, "th"); break;
1737     }
1738     return buffer;
1739     }
1740    
1741    
1742    
1743     /*************************************************
1744     * Compile a single pattern *
1745     *************************************************/
1746    
1747     /* When the -F option has been used, this is called for each substring.
1748     Otherwise it's called for each supplied pattern.
1749    
1750     Arguments:
1751     pattern the pattern string
1752     options the PCRE options
1753     filename the file name, or NULL for a command-line pattern
1754     count 0 if this is the only command line pattern, or
1755     number of the command line pattern, or
1756     linenumber for a pattern from a file
1757    
1758     Returns: TRUE on success, FALSE after an error
1759     */
1760    
1761     static BOOL
1762     compile_single_pattern(char *pattern, int options, char *filename, int count)
1763     {
1764     char buffer[MBUFTHIRD + 16];
1765     const char *error;
1766     int errptr;
1767    
1768     if (pattern_count >= MAX_PATTERN_COUNT)
1769     {
1770     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1771     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1772     return FALSE;
1773     }
1774    
1775     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1776     suffix[process_options]);
1777     pattern_list[pattern_count] =
1778     pcre_compile(buffer, options, &error, &errptr, pcretables);
1779 ph10 142 if (pattern_list[pattern_count] != NULL)
1780 ph10 141 {
1781 ph10 142 pattern_count++;
1782 ph10 141 return TRUE;
1783 ph10 142 }
1784 nigel 87
1785     /* Handle compile errors */
1786    
1787     errptr -= (int)strlen(prefix[process_options]);
1788     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1789    
1790     if (filename == NULL)
1791     {
1792     if (count == 0)
1793     fprintf(stderr, "pcregrep: Error in command-line regex "
1794     "at offset %d: %s\n", errptr, error);
1795     else
1796     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1797     "at offset %d: %s\n", ordin(count), errptr, error);
1798     }
1799     else
1800     {
1801     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1802     "at offset %d: %s\n", count, filename, errptr, error);
1803     }
1804    
1805     return FALSE;
1806     }
1807    
1808    
1809    
1810     /*************************************************
1811     * Compile one supplied pattern *
1812     *************************************************/
1813    
1814     /* When the -F option has been used, each string may be a list of strings,
1815 nigel 91 separated by line breaks. They will be matched literally.
1816 nigel 87
1817     Arguments:
1818     pattern the pattern string
1819     options the PCRE options
1820     filename the file name, or NULL for a command-line pattern
1821     count 0 if this is the only command line pattern, or
1822     number of the command line pattern, or
1823     linenumber for a pattern from a file
1824    
1825     Returns: TRUE on success, FALSE after an error
1826     */
1827    
1828     static BOOL
1829     compile_pattern(char *pattern, int options, char *filename, int count)
1830     {
1831     if ((process_options & PO_FIXED_STRINGS) != 0)
1832     {
1833 nigel 93 char *eop = pattern + strlen(pattern);
1834 nigel 87 char buffer[MBUFTHIRD];
1835     for(;;)
1836     {
1837 nigel 93 int ellength;
1838     char *p = end_of_line(pattern, eop, &ellength);
1839     if (ellength == 0)
1840 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1841 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1842 nigel 93 pattern = p;
1843 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1844     return FALSE;
1845     }
1846     }
1847     else return compile_single_pattern(pattern, options, filename, count);
1848     }
1849    
1850    
1851    
1852     /*************************************************
1853 nigel 49 * Main program *
1854     *************************************************/
1855    
1856 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1857    
1858 nigel 49 int
1859     main(int argc, char **argv)
1860     {
1861 nigel 53 int i, j;
1862 nigel 49 int rc = 1;
1863 nigel 87 int pcre_options = 0;
1864     int cmd_pattern_count = 0;
1865 ph10 141 int hint_count = 0;
1866 nigel 49 int errptr;
1867 nigel 87 BOOL only_one_at_top;
1868     char *patterns[MAX_PATTERN_COUNT];
1869     const char *locale_from = "--locale";
1870 nigel 49 const char *error;
1871    
1872 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1873     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1874 ph10 391 Note that the return values from pcre_config(), though derived from the ASCII
1875 ph10 392 codes, are the same in EBCDIC environments, so we must use the actual values
1876 ph10 391 rather than escapes such as as '\r'. */
1877 nigel 91
1878     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1879     switch(i)
1880     {
1881 ph10 391 default: newline = (char *)"lf"; break;
1882     case 13: newline = (char *)"cr"; break;
1883     case (13 << 8) | 10: newline = (char *)"crlf"; break;
1884     case -1: newline = (char *)"any"; break;
1885     case -2: newline = (char *)"anycrlf"; break;
1886 nigel 91 }
1887    
1888 nigel 49 /* Process the options */
1889    
1890     for (i = 1; i < argc; i++)
1891     {
1892 nigel 77 option_item *op = NULL;
1893     char *option_data = (char *)""; /* default to keep compiler happy */
1894     BOOL longop;
1895     BOOL longopwasequals = FALSE;
1896    
1897 nigel 49 if (argv[i][0] != '-') break;
1898 nigel 53
1899 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1900 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1901 nigel 63
1902 nigel 77 if (argv[i][1] == 0)
1903     {
1904 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1905 nigel 77 else exit(usage(2));
1906     }
1907 nigel 63
1908 nigel 77 /* Handle a long name option, or -- to terminate the options */
1909 nigel 53
1910     if (argv[i][1] == '-')
1911 nigel 49 {
1912 nigel 77 char *arg = argv[i] + 2;
1913     char *argequals = strchr(arg, '=');
1914 nigel 53
1915 nigel 77 if (*arg == 0) /* -- terminates options */
1916 nigel 49 {
1917 nigel 77 i++;
1918     break; /* out of the options-handling loop */
1919 nigel 53 }
1920 nigel 49
1921 nigel 77 longop = TRUE;
1922    
1923     /* Some long options have data that follows after =, for example file=name.
1924     Some options have variations in the long name spelling: specifically, we
1925     allow "regexp" because GNU grep allows it, though I personally go along
1926 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1927     These options are entered in the table as "regex(p)". No option is in both
1928     these categories, fortunately. */
1929 nigel 77
1930 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1931     {
1932 nigel 77 char *opbra = strchr(op->long_name, '(');
1933     char *equals = strchr(op->long_name, '=');
1934     if (opbra == NULL) /* Not a (p) case */
1935 nigel 53 {
1936 nigel 77 if (equals == NULL) /* Not thing=data case */
1937     {
1938     if (strcmp(arg, op->long_name) == 0) break;
1939     }
1940     else /* Special case xxx=data */
1941     {
1942     int oplen = equals - op->long_name;
1943 ph10 199 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1944 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1945     {
1946     option_data = arg + arglen;
1947     if (*option_data == '=')
1948     {
1949     option_data++;
1950     longopwasequals = TRUE;
1951     }
1952     break;
1953     }
1954     }
1955 nigel 53 }
1956 nigel 77 else /* Special case xxxx(p) */
1957     {
1958     char buff1[24];
1959     char buff2[24];
1960     int baselen = opbra - op->long_name;
1961     sprintf(buff1, "%.*s", baselen, op->long_name);
1962 ph10 152 sprintf(buff2, "%s%.*s", buff1,
1963 ph10 151 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1964 nigel 77 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1965     break;
1966     }
1967 nigel 53 }
1968 nigel 77
1969 nigel 53 if (op->one_char == 0)
1970     {
1971     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1972     exit(usage(2));
1973     }
1974     }
1975 nigel 49
1976 nigel 89
1977     /* Jeffrey Friedl's debugging harness uses these additional options which
1978     are not in the right form for putting in the option table because they use
1979     only one hyphen, yet are more than one character long. By putting them
1980     separately here, they will not get displayed as part of the help() output,
1981     but I don't think Jeffrey will care about that. */
1982    
1983     #ifdef JFRIEDL_DEBUG
1984     else if (strcmp(argv[i], "-pre") == 0) {
1985     jfriedl_prefix = argv[++i];
1986     continue;
1987     } else if (strcmp(argv[i], "-post") == 0) {
1988     jfriedl_postfix = argv[++i];
1989     continue;
1990     } else if (strcmp(argv[i], "-XT") == 0) {
1991     sscanf(argv[++i], "%d", &jfriedl_XT);
1992     continue;
1993     } else if (strcmp(argv[i], "-XR") == 0) {
1994     sscanf(argv[++i], "%d", &jfriedl_XR);
1995     continue;
1996     }
1997     #endif
1998    
1999    
2000 nigel 77 /* One-char options; many that have no data may be in a single argument; we
2001     continue till we hit the last one or one that needs data. */
2002 nigel 53
2003     else
2004     {
2005     char *s = argv[i] + 1;
2006 nigel 77 longop = FALSE;
2007 nigel 53 while (*s != 0)
2008     {
2009 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2010     { if (*s == op->one_char) break; }
2011     if (op->one_char == 0)
2012 nigel 53 {
2013 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2014     *s, argv[i]);
2015     exit(usage(2));
2016     }
2017     if (op->type != OP_NODATA || s[1] == 0)
2018     {
2019     option_data = s+1;
2020 nigel 53 break;
2021     }
2022 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2023 nigel 49 }
2024     }
2025 nigel 77
2026 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2027     is NO_DATA, it means that there is no data, and the option might set
2028     something in the PCRE options. */
2029 nigel 77
2030     if (op->type == OP_NODATA)
2031     {
2032 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2033     continue;
2034     }
2035    
2036     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2037     either has a value or defaults to something. It cannot have data in a
2038     separate item. At the moment, the only such options are "colo(u)r" and
2039 nigel 89 Jeffrey Friedl's special -S debugging option. */
2040 nigel 87
2041     if (*option_data == 0 &&
2042     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2043     {
2044     switch (op->one_char)
2045 nigel 77 {
2046 nigel 87 case N_COLOUR:
2047     colour_option = (char *)"auto";
2048     break;
2049     #ifdef JFRIEDL_DEBUG
2050     case 'S':
2051     S_arg = 0;
2052     break;
2053     #endif
2054 nigel 77 }
2055 nigel 87 continue;
2056     }
2057 nigel 77
2058 nigel 87 /* Otherwise, find the data string for the option. */
2059    
2060     if (*option_data == 0)
2061     {
2062     if (i >= argc - 1 || longopwasequals)
2063 nigel 77 {
2064 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2065     exit(usage(2));
2066     }
2067     option_data = argv[++i];
2068     }
2069    
2070     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2071     multiple times to create a list of patterns. */
2072    
2073     if (op->type == OP_PATLIST)
2074     {
2075     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2076     {
2077     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2078     MAX_PATTERN_COUNT);
2079     return 2;
2080     }
2081     patterns[cmd_pattern_count++] = option_data;
2082     }
2083    
2084     /* Otherwise, deal with single string or numeric data values. */
2085    
2086     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2087     {
2088     *((char **)op->dataptr) = option_data;
2089     }
2090     else
2091     {
2092     char *endptr;
2093     int n = strtoul(option_data, &endptr, 10);
2094     if (*endptr != 0)
2095     {
2096     if (longop)
2097 nigel 77 {
2098 nigel 87 char *equals = strchr(op->long_name, '=');
2099     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2100     equals - op->long_name;
2101     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2102     option_data, nlen, op->long_name);
2103 nigel 77 }
2104 nigel 87 else
2105     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2106     option_data, op->one_char);
2107     exit(usage(2));
2108 nigel 77 }
2109 nigel 87 *((int *)op->dataptr) = n;
2110 nigel 77 }
2111 nigel 49 }
2112    
2113 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2114     for -A and -B. */
2115    
2116     if (both_context > 0)
2117     {
2118     if (after_context == 0) after_context = both_context;
2119     if (before_context == 0) before_context = both_context;
2120     }
2121 ph10 286
2122     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2123 ph10 280 However, the latter two set the only_matching flag. */
2124 nigel 77
2125 ph10 280 if ((only_matching && (file_offsets || line_offsets)) ||
2126 ph10 286 (file_offsets && line_offsets))
2127 ph10 280 {
2128     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2129     "and/or --line-offsets\n");
2130     exit(usage(2));
2131     }
2132    
2133 ph10 286 if (file_offsets || line_offsets) only_matching = TRUE;
2134    
2135 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2136     LC_ALL environment variable is set, and if so, use it. */
2137 nigel 49
2138 nigel 87 if (locale == NULL)
2139 nigel 53 {
2140 nigel 87 locale = getenv("LC_ALL");
2141     locale_from = "LCC_ALL";
2142 nigel 53 }
2143 nigel 49
2144 nigel 87 if (locale == NULL)
2145     {
2146     locale = getenv("LC_CTYPE");
2147     locale_from = "LC_CTYPE";
2148     }
2149 nigel 49
2150 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2151     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2152    
2153     if (locale != NULL)
2154 nigel 49 {
2155 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2156 nigel 53 {
2157 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2158     locale, locale_from);
2159 nigel 53 return 2;
2160     }
2161 nigel 87 pcretables = pcre_maketables();
2162     }
2163 nigel 77
2164 nigel 87 /* Sort out colouring */
2165    
2166     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2167     {
2168     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2169     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2170     else
2171 nigel 53 {
2172 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2173     colour_option);
2174     return 2;
2175 nigel 77 }
2176 nigel 87 if (do_colour)
2177 nigel 77 {
2178 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2179     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2180     if (cs != NULL) colour_string = cs;
2181 nigel 77 }
2182 nigel 87 }
2183 nigel 77
2184 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2185    
2186     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2187     {
2188     pcre_options |= PCRE_NEWLINE_CR;
2189 nigel 93 endlinetype = EL_CR;
2190 nigel 91 }
2191     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2192     {
2193     pcre_options |= PCRE_NEWLINE_LF;
2194 nigel 93 endlinetype = EL_LF;
2195 nigel 91 }
2196     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2197     {
2198     pcre_options |= PCRE_NEWLINE_CRLF;
2199 nigel 93 endlinetype = EL_CRLF;
2200 nigel 91 }
2201 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2202     {
2203     pcre_options |= PCRE_NEWLINE_ANY;
2204     endlinetype = EL_ANY;
2205     }
2206 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2207     {
2208     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2209     endlinetype = EL_ANYCRLF;
2210     }
2211 nigel 91 else
2212     {
2213     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2214     return 2;
2215     }
2216    
2217 nigel 87 /* Interpret the text values for -d and -D */
2218    
2219     if (dee_option != NULL)
2220     {
2221     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2222     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2223     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2224     else
2225 nigel 77 {
2226 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2227     return 2;
2228 nigel 53 }
2229 nigel 49 }
2230    
2231 nigel 87 if (DEE_option != NULL)
2232     {
2233     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2234     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2235     else
2236     {
2237     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2238     return 2;
2239     }
2240     }
2241 nigel 49
2242 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2243 nigel 87
2244     #ifdef JFRIEDL_DEBUG
2245     if (S_arg > 9)
2246 nigel 49 {
2247 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2248     return 2;
2249     }
2250 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2251     {
2252     if (jfriedl_XT == 0) jfriedl_XT = 1;
2253     if (jfriedl_XR == 0) jfriedl_XR = 1;
2254     }
2255 nigel 87 #endif
2256 nigel 77
2257 nigel 87 /* Get memory to store the pattern and hints lists. */
2258    
2259     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2260     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2261    
2262     if (pattern_list == NULL || hints_list == NULL)
2263     {
2264     fprintf(stderr, "pcregrep: malloc failed\n");
2265 ph10 123 goto EXIT2;
2266 nigel 87 }
2267    
2268     /* If no patterns were provided by -e, and there is no file provided by -f,
2269     the first argument is the one and only pattern, and it must exist. */
2270    
2271     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2272     {
2273 nigel 63 if (i >= argc) return usage(2);
2274 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2275     }
2276 nigel 77
2277 nigel 87 /* Compile the patterns that were provided on the command line, either by
2278     multiple uses of -e or as a single unkeyed pattern. */
2279    
2280     for (j = 0; j < cmd_pattern_count; j++)
2281     {
2282     if (!compile_pattern(patterns[j], pcre_options, NULL,
2283     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2284 ph10 123 goto EXIT2;
2285 nigel 87 }
2286    
2287     /* Compile the regular expressions that are provided in a file. */
2288    
2289     if (pattern_filename != NULL)
2290     {
2291     int linenumber = 0;
2292     FILE *f;
2293     char *filename;
2294     char buffer[MBUFTHIRD];
2295    
2296     if (strcmp(pattern_filename, "-") == 0)
2297 nigel 77 {
2298 nigel 87 f = stdin;
2299     filename = stdin_name;
2300 nigel 77 }
2301 nigel 87 else
2302 nigel 77 {
2303 nigel 87 f = fopen(pattern_filename, "r");
2304     if (f == NULL)
2305     {
2306     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2307     strerror(errno));
2308 ph10 123 goto EXIT2;
2309 nigel 87 }
2310     filename = pattern_filename;
2311 nigel 77 }
2312    
2313 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2314 nigel 53 {
2315 nigel 87 char *s = buffer + (int)strlen(buffer);
2316     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2317     *s = 0;
2318     linenumber++;
2319     if (buffer[0] == 0) continue; /* Skip blank lines */
2320     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2321 ph10 121 goto EXIT2;
2322 nigel 53 }
2323 nigel 87
2324     if (f != stdin) fclose(f);
2325 nigel 49 }
2326    
2327 nigel 77 /* Study the regular expressions, as we will be running them many times */
2328 nigel 53
2329     for (j = 0; j < pattern_count; j++)
2330     {
2331     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2332     if (error != NULL)
2333     {
2334     char s[16];
2335     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2336     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2337 ph10 121 goto EXIT2;
2338 nigel 53 }
2339 ph10 142 hint_count++;
2340 nigel 53 }
2341    
2342 nigel 77 /* If there are include or exclude patterns, compile them. */
2343    
2344     if (exclude_pattern != NULL)
2345     {
2346 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2347     pcretables);
2348 nigel 77 if (exclude_compiled == NULL)
2349     {
2350     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2351     errptr, error);
2352 ph10 121 goto EXIT2;
2353 nigel 77 }
2354     }
2355    
2356     if (include_pattern != NULL)
2357     {
2358 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2359     pcretables);
2360 nigel 77 if (include_compiled == NULL)
2361     {
2362     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2363     errptr, error);
2364 ph10 121 goto EXIT2;
2365 nigel 77 }
2366     }
2367    
2368 ph10 325 if (exclude_dir_pattern != NULL)
2369     {
2370     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2371     pcretables);
2372     if (exclude_dir_compiled == NULL)
2373     {
2374     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2375     errptr, error);
2376     goto EXIT2;
2377     }
2378     }
2379    
2380     if (include_dir_pattern != NULL)
2381     {
2382     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2383     pcretables);
2384     if (include_dir_compiled == NULL)
2385     {
2386     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2387     errptr, error);
2388     goto EXIT2;
2389     }
2390     }
2391    
2392 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2393 nigel 49
2394 nigel 87 if (i >= argc)
2395 ph10 121 {
2396 ph10 286 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2397 ph10 121 goto EXIT;
2398 ph10 123 }
2399 nigel 49
2400 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2401     Pass in the fact that there is only one argument at top level - this suppresses
2402 nigel 87 the file name if the argument is not a directory and filenames are not
2403     otherwise forced. */
2404 nigel 49
2405 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2406 nigel 49
2407     for (; i < argc; i++)
2408     {
2409 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2410     only_one_at_top);
2411 nigel 77 if (frc > 1) rc = frc;
2412     else if (frc == 0 && rc == 1) rc = 0;
2413 nigel 49 }
2414    
2415 ph10 121 EXIT:
2416     if (pattern_list != NULL)
2417     {
2418 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2419 ph10 121 free(pattern_list);
2420 ph10 123 }
2421 ph10 121 if (hints_list != NULL)
2422     {
2423 ph10 141 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2424 ph10 121 free(hints_list);
2425 ph10 123 }
2426 nigel 49 return rc;
2427 ph10 121
2428     EXIT2:
2429     rc = 2;
2430     goto EXIT;
2431 nigel 49 }
2432    
2433 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12