/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 377 - (hide annotations) (download)
Sun Mar 1 12:07:19 2009 UTC (5 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 67322 byte(s)
Use colour when requested with -o in pcregrep.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 377 Copyright (c) 1997-2009 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 nigel 49
75 nigel 77 #if BUFSIZ > 8192
76     #define MBUFTHIRD BUFSIZ
77     #else
78     #define MBUFTHIRD 8192
79     #endif
80 nigel 49
81 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
82     output. The order is important; it is assumed that a file name is wanted for
83     all values greater than FN_DEFAULT. */
84 nigel 77
85 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
86    
87 ph10 286 /* File reading styles */
88    
89     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
90    
91 nigel 87 /* Actions for the -d and -D options */
92    
93     enum { dee_READ, dee_SKIP, dee_RECURSE };
94     enum { DEE_READ, DEE_SKIP };
95    
96     /* Actions for special processing options (flag bits) */
97    
98     #define PO_WORD_MATCH 0x0001
99     #define PO_LINE_MATCH 0x0002
100     #define PO_FIXED_STRINGS 0x0004
101    
102 nigel 93 /* Line ending types */
103 nigel 87
104 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
105 nigel 87
106 nigel 93
107    
108 nigel 49 /*************************************************
109     * Global variables *
110     *************************************************/
111    
112 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
113     regular code. */
114    
115     #ifdef JFRIEDL_DEBUG
116     static int S_arg = -1;
117 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
118     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
119     static const char *jfriedl_prefix = "";
120     static const char *jfriedl_postfix = "";
121 nigel 87 #endif
122    
123 nigel 93 static int endlinetype;
124 nigel 91
125 nigel 87 static char *colour_string = (char *)"1;31";
126     static char *colour_option = NULL;
127     static char *dee_option = NULL;
128     static char *DEE_option = NULL;
129 nigel 91 static char *newline = NULL;
130 nigel 53 static char *pattern_filename = NULL;
131 nigel 77 static char *stdin_name = (char *)"(standard input)";
132 nigel 87 static char *locale = NULL;
133    
134     static const unsigned char *pcretables = NULL;
135    
136 nigel 53 static int pattern_count = 0;
137 ph10 121 static pcre **pattern_list = NULL;
138     static pcre_extra **hints_list = NULL;
139 nigel 49
140 nigel 77 static char *include_pattern = NULL;
141     static char *exclude_pattern = NULL;
142 ph10 325 static char *include_dir_pattern = NULL;
143     static char *exclude_dir_pattern = NULL;
144 nigel 77
145     static pcre *include_compiled = NULL;
146     static pcre *exclude_compiled = NULL;
147 ph10 325 static pcre *include_dir_compiled = NULL;
148     static pcre *exclude_dir_compiled = NULL;
149 nigel 77
150     static int after_context = 0;
151     static int before_context = 0;
152     static int both_context = 0;
153 nigel 87 static int dee_action = dee_READ;
154     static int DEE_action = DEE_READ;
155     static int error_count = 0;
156     static int filenames = FN_DEFAULT;
157     static int process_options = 0;
158 nigel 77
159 nigel 49 static BOOL count_only = FALSE;
160 nigel 87 static BOOL do_colour = FALSE;
161 ph10 280 static BOOL file_offsets = FALSE;
162 nigel 77 static BOOL hyphenpending = FALSE;
163 nigel 49 static BOOL invert = FALSE;
164 ph10 280 static BOOL line_offsets = FALSE;
165 nigel 77 static BOOL multiline = FALSE;
166 nigel 49 static BOOL number = FALSE;
167 nigel 87 static BOOL only_matching = FALSE;
168 nigel 77 static BOOL quiet = FALSE;
169 nigel 49 static BOOL silent = FALSE;
170 nigel 93 static BOOL utf8 = FALSE;
171 nigel 49
172 nigel 53 /* Structure for options and list of them */
173 nigel 49
174 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
175     OP_PATLIST };
176 nigel 77
177 nigel 53 typedef struct option_item {
178 nigel 77 int type;
179 nigel 53 int one_char;
180 nigel 77 void *dataptr;
181 nigel 67 const char *long_name;
182     const char *help_text;
183 nigel 53 } option_item;
184 nigel 49
185 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
186     used to identify them. */
187    
188 ph10 325 #define N_COLOUR (-1)
189     #define N_EXCLUDE (-2)
190     #define N_EXCLUDE_DIR (-3)
191     #define N_HELP (-4)
192     #define N_INCLUDE (-5)
193     #define N_INCLUDE_DIR (-6)
194     #define N_LABEL (-7)
195     #define N_LOCALE (-8)
196     #define N_NULL (-9)
197     #define N_LOFFSETS (-10)
198     #define N_FOFFSETS (-11)
199 nigel 87
200 nigel 53 static option_item optionlist[] = {
201 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
202     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
203     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
204     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
205     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
206     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
207     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
208     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
209     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
210     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
211     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
212     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
213     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
214 ph10 280 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
215 nigel 87 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
216     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
217     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
218     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
219     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
220     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
221 ph10 280 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
222 nigel 87 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
223     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
224 ph10 280 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
225 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
226     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
227     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
228     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
229     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
230     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
231 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
232     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
233 nigel 87 #ifdef JFRIEDL_DEBUG
234     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
235     #endif
236     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
237     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
238     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
239     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
240     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
241     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
242     { OP_NODATA, 0, NULL, NULL, NULL }
243 nigel 53 };
244    
245 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
246     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
247     that the combination of -w and -x has the same effect as -x on its own, so we
248     can treat them as the same. */
249 nigel 53
250 nigel 87 static const char *prefix[] = {
251     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
252    
253     static const char *suffix[] = {
254     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
255    
256 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
257 nigel 87
258 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
259 nigel 87
260 nigel 93 const char utf8_table4[] = {
261     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
262     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
264     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
265    
266    
267    
268 nigel 53 /*************************************************
269 nigel 87 * OS-specific functions *
270 nigel 53 *************************************************/
271    
272     /* These functions are defined so that they can be made system specific,
273 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
274 nigel 53
275    
276     /************* Directory scanning in Unix ***********/
277    
278 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
279 nigel 53 #include <sys/types.h>
280     #include <sys/stat.h>
281     #include <dirent.h>
282    
283     typedef DIR directory_type;
284    
285 nigel 67 static int
286 nigel 53 isdirectory(char *filename)
287     {
288     struct stat statbuf;
289     if (stat(filename, &statbuf) < 0)
290     return 0; /* In the expectation that opening as a file will fail */
291     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
292     }
293    
294 nigel 67 static directory_type *
295 nigel 53 opendirectory(char *filename)
296     {
297     return opendir(filename);
298     }
299    
300 nigel 67 static char *
301 nigel 53 readdirectory(directory_type *dir)
302     {
303     for (;;)
304     {
305     struct dirent *dent = readdir(dir);
306     if (dent == NULL) return NULL;
307     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
308     return dent->d_name;
309     }
310 ph10 151 /* Control never reaches here */
311 nigel 53 }
312    
313 nigel 67 static void
314 nigel 53 closedirectory(directory_type *dir)
315     {
316     closedir(dir);
317     }
318    
319    
320 nigel 87 /************* Test for regular file in Unix **********/
321    
322     static int
323     isregfile(char *filename)
324     {
325     struct stat statbuf;
326     if (stat(filename, &statbuf) < 0)
327     return 1; /* In the expectation that opening as a file will fail */
328     return (statbuf.st_mode & S_IFMT) == S_IFREG;
329     }
330    
331    
332     /************* Test stdout for being a terminal in Unix **********/
333    
334     static BOOL
335     is_stdout_tty(void)
336     {
337     return isatty(fileno(stdout));
338     }
339    
340    
341 nigel 63 /************* Directory scanning in Win32 ***********/
342 nigel 53
343 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
344 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
345 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
346     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
347 ph10 283 */
348 nigel 53
349 ph10 97 #elif HAVE_WINDOWS_H
350 nigel 63
351     #ifndef STRICT
352     # define STRICT
353     #endif
354     #ifndef WIN32_LEAN_AND_MEAN
355     # define WIN32_LEAN_AND_MEAN
356     #endif
357 ph10 283
358     #include <windows.h>
359    
360 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
361     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
362     #endif
363    
364 nigel 63 typedef struct directory_type
365     {
366     HANDLE handle;
367     BOOL first;
368     WIN32_FIND_DATA data;
369     } directory_type;
370    
371     int
372     isdirectory(char *filename)
373     {
374     DWORD attr = GetFileAttributes(filename);
375     if (attr == INVALID_FILE_ATTRIBUTES)
376     return 0;
377     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
378     }
379    
380     directory_type *
381     opendirectory(char *filename)
382     {
383     size_t len;
384     char *pattern;
385     directory_type *dir;
386     DWORD err;
387     len = strlen(filename);
388     pattern = (char *) malloc(len + 3);
389     dir = (directory_type *) malloc(sizeof(*dir));
390     if ((pattern == NULL) || (dir == NULL))
391     {
392     fprintf(stderr, "pcregrep: malloc failed\n");
393     exit(2);
394     }
395     memcpy(pattern, filename, len);
396     memcpy(&(pattern[len]), "\\*", 3);
397     dir->handle = FindFirstFile(pattern, &(dir->data));
398     if (dir->handle != INVALID_HANDLE_VALUE)
399     {
400     free(pattern);
401     dir->first = TRUE;
402     return dir;
403     }
404     err = GetLastError();
405     free(pattern);
406     free(dir);
407     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
408     return NULL;
409     }
410    
411     char *
412     readdirectory(directory_type *dir)
413     {
414     for (;;)
415     {
416     if (!dir->first)
417     {
418     if (!FindNextFile(dir->handle, &(dir->data)))
419     return NULL;
420     }
421     else
422     {
423     dir->first = FALSE;
424     }
425     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
426     return dir->data.cFileName;
427     }
428     #ifndef _MSC_VER
429     return NULL; /* Keep compiler happy; never executed */
430     #endif
431     }
432    
433     void
434     closedirectory(directory_type *dir)
435     {
436     FindClose(dir->handle);
437     free(dir);
438     }
439    
440    
441 nigel 87 /************* Test for regular file in Win32 **********/
442    
443     /* I don't know how to do this, or if it can be done; assume all paths are
444     regular if they are not directories. */
445    
446     int isregfile(char *filename)
447     {
448 ph10 283 return !isdirectory(filename);
449 nigel 87 }
450    
451    
452     /************* Test stdout for being a terminal in Win32 **********/
453    
454     /* I don't know how to do this; assume never */
455    
456     static BOOL
457     is_stdout_tty(void)
458     {
459 ph10 283 return FALSE;
460 nigel 87 }
461    
462    
463 nigel 53 /************* Directory scanning when we can't do it ***********/
464    
465     /* The type is void, and apart from isdirectory(), the functions do nothing. */
466    
467 nigel 63 #else
468    
469 nigel 53 typedef void directory_type;
470    
471 nigel 87 int isdirectory(char *filename) { return 0; }
472 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
473     char *readdirectory(directory_type *dir) { return (char*)0;}
474 nigel 53 void closedirectory(directory_type *dir) {}
475    
476 nigel 87
477     /************* Test for regular when we can't do it **********/
478    
479     /* Assume all files are regular. */
480    
481     int isregfile(char *filename) { return 1; }
482    
483    
484     /************* Test stdout for being a terminal when we can't do it **********/
485    
486     static BOOL
487     is_stdout_tty(void)
488     {
489     return FALSE;
490     }
491    
492    
493 nigel 53 #endif
494    
495    
496    
497 ph10 137 #ifndef HAVE_STRERROR
498 nigel 49 /*************************************************
499     * Provide strerror() for non-ANSI libraries *
500     *************************************************/
501    
502     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
503     in their libraries, but can provide the same facility by this simple
504     alternative function. */
505    
506     extern int sys_nerr;
507     extern char *sys_errlist[];
508    
509     char *
510     strerror(int n)
511     {
512     if (n < 0 || n >= sys_nerr) return "unknown error number";
513     return sys_errlist[n];
514     }
515     #endif /* HAVE_STRERROR */
516    
517    
518    
519     /*************************************************
520 nigel 93 * Find end of line *
521     *************************************************/
522    
523     /* The length of the endline sequence that is found is set via lenptr. This may
524     be zero at the very end of the file if there is no line-ending sequence there.
525    
526     Arguments:
527     p current position in line
528     endptr end of available data
529     lenptr where to put the length of the eol sequence
530    
531     Returns: pointer to the last byte of the line
532     */
533    
534     static char *
535     end_of_line(char *p, char *endptr, int *lenptr)
536     {
537     switch(endlinetype)
538     {
539     default: /* Just in case */
540     case EL_LF:
541     while (p < endptr && *p != '\n') p++;
542     if (p < endptr)
543     {
544     *lenptr = 1;
545     return p + 1;
546     }
547     *lenptr = 0;
548     return endptr;
549    
550     case EL_CR:
551     while (p < endptr && *p != '\r') p++;
552     if (p < endptr)
553     {
554     *lenptr = 1;
555     return p + 1;
556     }
557     *lenptr = 0;
558     return endptr;
559    
560     case EL_CRLF:
561     for (;;)
562     {
563     while (p < endptr && *p != '\r') p++;
564     if (++p >= endptr)
565     {
566     *lenptr = 0;
567     return endptr;
568     }
569     if (*p == '\n')
570     {
571     *lenptr = 2;
572     return p + 1;
573     }
574     }
575     break;
576    
577 ph10 149 case EL_ANYCRLF:
578     while (p < endptr)
579     {
580     int extra = 0;
581     register int c = *((unsigned char *)p);
582    
583     if (utf8 && c >= 0xc0)
584     {
585     int gcii, gcss;
586     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
587     gcss = 6*extra;
588     c = (c & utf8_table3[extra]) << gcss;
589     for (gcii = 1; gcii <= extra; gcii++)
590     {
591     gcss -= 6;
592     c |= (p[gcii] & 0x3f) << gcss;
593     }
594     }
595    
596     p += 1 + extra;
597    
598     switch (c)
599     {
600     case 0x0a: /* LF */
601     *lenptr = 1;
602     return p;
603    
604     case 0x0d: /* CR */
605     if (p < endptr && *p == 0x0a)
606     {
607     *lenptr = 2;
608     p++;
609     }
610     else *lenptr = 1;
611     return p;
612 ph10 150
613 ph10 149 default:
614     break;
615     }
616     } /* End of loop for ANYCRLF case */
617 ph10 150
618 ph10 149 *lenptr = 0; /* Must have hit the end */
619     return endptr;
620    
621 nigel 93 case EL_ANY:
622     while (p < endptr)
623     {
624     int extra = 0;
625     register int c = *((unsigned char *)p);
626    
627     if (utf8 && c >= 0xc0)
628     {
629     int gcii, gcss;
630     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
631     gcss = 6*extra;
632     c = (c & utf8_table3[extra]) << gcss;
633     for (gcii = 1; gcii <= extra; gcii++)
634     {
635     gcss -= 6;
636     c |= (p[gcii] & 0x3f) << gcss;
637     }
638     }
639    
640     p += 1 + extra;
641    
642     switch (c)
643     {
644     case 0x0a: /* LF */
645     case 0x0b: /* VT */
646     case 0x0c: /* FF */
647     *lenptr = 1;
648     return p;
649    
650     case 0x0d: /* CR */
651     if (p < endptr && *p == 0x0a)
652     {
653     *lenptr = 2;
654     p++;
655     }
656     else *lenptr = 1;
657     return p;
658    
659     case 0x85: /* NEL */
660     *lenptr = utf8? 2 : 1;
661     return p;
662    
663     case 0x2028: /* LS */
664     case 0x2029: /* PS */
665     *lenptr = 3;
666     return p;
667    
668     default:
669     break;
670     }
671     } /* End of loop for ANY case */
672    
673     *lenptr = 0; /* Must have hit the end */
674     return endptr;
675     } /* End of overall switch */
676     }
677    
678    
679    
680     /*************************************************
681     * Find start of previous line *
682     *************************************************/
683    
684     /* This is called when looking back for before lines to print.
685    
686     Arguments:
687     p start of the subsequent line
688     startptr start of available data
689    
690     Returns: pointer to the start of the previous line
691     */
692    
693     static char *
694     previous_line(char *p, char *startptr)
695     {
696     switch(endlinetype)
697     {
698     default: /* Just in case */
699     case EL_LF:
700     p--;
701     while (p > startptr && p[-1] != '\n') p--;
702     return p;
703    
704     case EL_CR:
705     p--;
706     while (p > startptr && p[-1] != '\n') p--;
707     return p;
708    
709     case EL_CRLF:
710     for (;;)
711     {
712     p -= 2;
713     while (p > startptr && p[-1] != '\n') p--;
714     if (p <= startptr + 1 || p[-2] == '\r') return p;
715     }
716     return p; /* But control should never get here */
717    
718     case EL_ANY:
719 ph10 150 case EL_ANYCRLF:
720 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
721     if (utf8) while ((*p & 0xc0) == 0x80) p--;
722    
723     while (p > startptr)
724     {
725     register int c;
726     char *pp = p - 1;
727    
728     if (utf8)
729     {
730     int extra = 0;
731     while ((*pp & 0xc0) == 0x80) pp--;
732     c = *((unsigned char *)pp);
733     if (c >= 0xc0)
734     {
735     int gcii, gcss;
736     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
737     gcss = 6*extra;
738     c = (c & utf8_table3[extra]) << gcss;
739     for (gcii = 1; gcii <= extra; gcii++)
740     {
741     gcss -= 6;
742     c |= (pp[gcii] & 0x3f) << gcss;
743     }
744     }
745     }
746     else c = *((unsigned char *)pp);
747    
748 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
749 nigel 93 {
750     case 0x0a: /* LF */
751 ph10 149 case 0x0d: /* CR */
752     return p;
753 ph10 150
754 ph10 149 default:
755     break;
756 ph10 150 }
757 ph10 149
758     else switch (c)
759     {
760     case 0x0a: /* LF */
761 nigel 93 case 0x0b: /* VT */
762     case 0x0c: /* FF */
763     case 0x0d: /* CR */
764     case 0x85: /* NEL */
765     case 0x2028: /* LS */
766     case 0x2029: /* PS */
767     return p;
768    
769     default:
770     break;
771     }
772    
773     p = pp; /* Back one character */
774     } /* End of loop for ANY case */
775    
776     return startptr; /* Hit start of data */
777     } /* End of overall switch */
778     }
779    
780    
781    
782    
783    
784     /*************************************************
785 nigel 77 * Print the previous "after" lines *
786 nigel 49 *************************************************/
787    
788 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
789 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
790     that a binary zero does not terminate it.
791 nigel 77
792     Arguments:
793     lastmatchnumber the number of the last matching line, plus one
794     lastmatchrestart where we restarted after the last match
795     endptr end of available data
796     printname filename for printing
797    
798     Returns: nothing
799     */
800    
801     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
802     char *endptr, char *printname)
803     {
804     if (after_context > 0 && lastmatchnumber > 0)
805     {
806     int count = 0;
807     while (lastmatchrestart < endptr && count++ < after_context)
808     {
809 nigel 93 int ellength;
810 nigel 77 char *pp = lastmatchrestart;
811     if (printname != NULL) fprintf(stdout, "%s-", printname);
812     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
813 nigel 93 pp = end_of_line(pp, endptr, &ellength);
814     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
815     lastmatchrestart = pp;
816 nigel 77 }
817     hyphenpending = TRUE;
818     }
819     }
820    
821    
822    
823     /*************************************************
824     * Grep an individual file *
825     *************************************************/
826    
827     /* This is called from grep_or_recurse() below. It uses a buffer that is three
828     times the value of MBUFTHIRD. The matching point is never allowed to stray into
829     the top third of the buffer, thus keeping more of the file available for
830     context printing or for multiline scanning. For large files, the pointer will
831     be in the middle third most of the time, so the bottom third is available for
832     "before" context printing.
833    
834     Arguments:
835 ph10 286 handle the fopened FILE stream for a normal file
836     the gzFile pointer when reading is via libz
837     the BZFILE pointer when reading is via libbz2
838     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
839 nigel 77 printname the file name if it is to be printed for each match
840     or NULL if the file name is not to be printed
841     it cannot be NULL if filenames[_nomatch]_only is set
842    
843     Returns: 0 if there was at least one match
844     1 otherwise (no matches)
845 ph10 286 2 if there is a read error on a .bz2 file
846 nigel 77 */
847    
848 nigel 49 static int
849 ph10 286 pcregrep(void *handle, int frtype, char *printname)
850 nigel 49 {
851     int rc = 1;
852 nigel 77 int linenumber = 1;
853     int lastmatchnumber = 0;
854 nigel 49 int count = 0;
855 ph10 280 int filepos = 0;
856 nigel 49 int offsets[99];
857 nigel 77 char *lastmatchrestart = NULL;
858     char buffer[3*MBUFTHIRD];
859     char *ptr = buffer;
860     char *endptr;
861     size_t bufflength;
862     BOOL endhyphenpending = FALSE;
863 ph10 286 FILE *in = NULL; /* Ensure initialized */
864 nigel 49
865 ph10 286 #ifdef SUPPORT_LIBZ
866     gzFile ingz = NULL;
867     #endif
868 nigel 77
869 ph10 286 #ifdef SUPPORT_LIBBZ2
870     BZFILE *inbz2 = NULL;
871     #endif
872    
873    
874     /* Do the first read into the start of the buffer and set up the pointer to end
875     of what we have. In the case of libz, a non-zipped .gz file will be read as a
876     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
877     fail. */
878    
879     #ifdef SUPPORT_LIBZ
880     if (frtype == FR_LIBZ)
881     {
882     ingz = (gzFile)handle;
883     bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
884     }
885     else
886     #endif
887    
888     #ifdef SUPPORT_LIBBZ2
889     if (frtype == FR_LIBBZ2)
890     {
891     inbz2 = (BZFILE *)handle;
892     bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
893     if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
894     } /* without the cast it is unsigned. */
895     else
896     #endif
897    
898     {
899     in = (FILE *)handle;
900     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
901     }
902    
903 nigel 77 endptr = buffer + bufflength;
904    
905     /* Loop while the current pointer is not at the end of the file. For large
906     files, endptr will be at the end of the buffer when we are in the middle of the
907     file, but ptr will never get there, because as soon as it gets over 2/3 of the
908     way, the buffer is shifted left and re-filled. */
909    
910     while (ptr < endptr)
911 nigel 49 {
912 nigel 93 int i, endlinelength;
913 nigel 87 int mrc = 0;
914 nigel 53 BOOL match = FALSE;
915 ph10 286 char *matchptr = ptr;
916 nigel 77 char *t = ptr;
917     size_t length, linelength;
918 nigel 49
919 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
920     of the subject string to pass to pcre_exec(). In multiline mode, it is the
921     length remainder of the data in the buffer. Otherwise, it is the length of
922     the next line. After matching, we always advance by the length of the next
923     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
924     that any match is constrained to be in the first line. */
925    
926 nigel 93 t = end_of_line(t, endptr, &endlinelength);
927     linelength = t - ptr - endlinelength;
928 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
929 nigel 77
930 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
931    
932     #ifdef JFRIEDL_DEBUG
933     if (jfriedl_XT || jfriedl_XR)
934     {
935     #include <sys/time.h>
936     #include <time.h>
937     struct timeval start_time, end_time;
938     struct timezone dummy;
939    
940     if (jfriedl_XT)
941     {
942     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
943     const char *orig = ptr;
944     ptr = malloc(newlen + 1);
945     if (!ptr) {
946     printf("out of memory");
947     exit(2);
948     }
949     endptr = ptr;
950     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
951     for (i = 0; i < jfriedl_XT; i++) {
952     strncpy(endptr, orig, length);
953     endptr += length;
954     }
955     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
956     length = newlen;
957     }
958    
959     if (gettimeofday(&start_time, &dummy) != 0)
960     perror("bad gettimeofday");
961    
962    
963     for (i = 0; i < jfriedl_XR; i++)
964     match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
965    
966     if (gettimeofday(&end_time, &dummy) != 0)
967     perror("bad gettimeofday");
968    
969     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
970     -
971     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
972    
973     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
974     return 0;
975     }
976     #endif
977    
978 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
979 ph10 279 in order to find any further matches in the same line. */
980 nigel 89
981 ph10 286 ONLY_MATCHING_RESTART:
982    
983 nigel 77 /* Run through all the patterns until one matches. Note that we don't include
984     the final newline in the subject string. */
985    
986 nigel 87 for (i = 0; i < pattern_count; i++)
987 nigel 53 {
988 ph10 279 mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
989 nigel 87 offsets, 99);
990     if (mrc >= 0) { match = TRUE; break; }
991     if (mrc != PCRE_ERROR_NOMATCH)
992     {
993     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
994     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
995     fprintf(stderr, "this line:\n");
996 ph10 279 fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
997 nigel 87 fprintf(stderr, "\n");
998     if (error_count == 0 &&
999     (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
1000     {
1001     fprintf(stderr, "pcregrep: error %d means that a resource limit "
1002     "was exceeded\n", mrc);
1003     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
1004     }
1005     if (error_count++ > 20)
1006     {
1007     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
1008     exit(2);
1009     }
1010     match = invert; /* No more matching; don't show the line again */
1011     break;
1012     }
1013 nigel 53 }
1014 nigel 49
1015 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1016 nigel 77
1017 nigel 49 if (match != invert)
1018     {
1019 nigel 77 BOOL hyphenprinted = FALSE;
1020    
1021 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1022 nigel 77
1023 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1024    
1025     /* Just count if just counting is wanted. */
1026    
1027 nigel 49 if (count_only) count++;
1028    
1029 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1030     in the file. */
1031    
1032     else if (filenames == FN_ONLY)
1033 nigel 49 {
1034 nigel 77 fprintf(stdout, "%s\n", printname);
1035 nigel 49 return 0;
1036     }
1037    
1038 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1039    
1040 nigel 77 else if (quiet) return 0;
1041 nigel 49
1042 nigel 87 /* The --only-matching option prints just the substring that matched, and
1043 ph10 286 the --file-offsets and --line-offsets options output offsets for the
1044 ph10 280 matching substring (they both force --only-matching). None of these options
1045     prints any context. Afterwards, adjust the start and length, and then jump
1046     back to look for further matches in the same line. If we are in invert
1047     mode, however, nothing is printed - this could be still useful because the
1048     return code is set. */
1049 nigel 87
1050     else if (only_matching)
1051     {
1052 ph10 279 if (!invert)
1053 ph10 286 {
1054 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1055     if (number) fprintf(stdout, "%d:", linenumber);
1056 ph10 280 if (line_offsets)
1057 ph10 357 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1058 ph10 286 offsets[1] - offsets[0]);
1059 ph10 280 else if (file_offsets)
1060 ph10 357 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1061 ph10 286 offsets[1] - offsets[0]);
1062     else
1063 ph10 377 {
1064     if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1065 ph10 280 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1066 ph10 377 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1067     }
1068 ph10 279 fprintf(stdout, "\n");
1069     matchptr += offsets[1];
1070     length -= offsets[1];
1071 ph10 286 match = FALSE;
1072     goto ONLY_MATCHING_RESTART;
1073     }
1074 nigel 87 }
1075    
1076     /* This is the default case when none of the above options is set. We print
1077     the matching lines(s), possibly preceded and/or followed by other lines of
1078     context. */
1079    
1080 nigel 49 else
1081     {
1082 nigel 77 /* See if there is a requirement to print some "after" lines from a
1083     previous match. We never print any overlaps. */
1084    
1085     if (after_context > 0 && lastmatchnumber > 0)
1086     {
1087 nigel 93 int ellength;
1088 nigel 77 int linecount = 0;
1089     char *p = lastmatchrestart;
1090    
1091     while (p < ptr && linecount < after_context)
1092     {
1093 nigel 93 p = end_of_line(p, ptr, &ellength);
1094 nigel 77 linecount++;
1095     }
1096    
1097     /* It is important to advance lastmatchrestart during this printing so
1098 nigel 87 that it interacts correctly with any "before" printing below. Print
1099     each line's data using fwrite() in case there are binary zeroes. */
1100 nigel 77
1101     while (lastmatchrestart < p)
1102     {
1103     char *pp = lastmatchrestart;
1104     if (printname != NULL) fprintf(stdout, "%s-", printname);
1105     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1106 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1107     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1108     lastmatchrestart = pp;
1109 nigel 77 }
1110     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1111     }
1112    
1113     /* If there were non-contiguous lines printed above, insert hyphens. */
1114    
1115     if (hyphenpending)
1116     {
1117     fprintf(stdout, "--\n");
1118     hyphenpending = FALSE;
1119     hyphenprinted = TRUE;
1120     }
1121    
1122     /* See if there is a requirement to print some "before" lines for this
1123     match. Again, don't print overlaps. */
1124    
1125     if (before_context > 0)
1126     {
1127     int linecount = 0;
1128     char *p = ptr;
1129    
1130     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1131 nigel 87 linecount < before_context)
1132 nigel 77 {
1133 nigel 87 linecount++;
1134 nigel 93 p = previous_line(p, buffer);
1135 nigel 77 }
1136    
1137     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1138     fprintf(stdout, "--\n");
1139    
1140     while (p < ptr)
1141     {
1142 nigel 93 int ellength;
1143 nigel 77 char *pp = p;
1144     if (printname != NULL) fprintf(stdout, "%s-", printname);
1145     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1146 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1147     fwrite(p, 1, pp - p, stdout);
1148     p = pp;
1149 nigel 77 }
1150     }
1151    
1152     /* Now print the matching line(s); ensure we set hyphenpending at the end
1153 nigel 85 of the file if any context lines are being output. */
1154 nigel 77
1155 nigel 85 if (after_context > 0 || before_context > 0)
1156     endhyphenpending = TRUE;
1157    
1158 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1159 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1160 nigel 77
1161     /* In multiline mode, we want to print to the end of the line in which
1162     the end of the matched string is found, so we adjust linelength and the
1163 ph10 222 line number appropriately, but only when there actually was a match
1164     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1165     the match will always be before the first newline sequence. */
1166 nigel 77
1167     if (multiline)
1168     {
1169 nigel 93 int ellength;
1170 ph10 222 char *endmatch = ptr;
1171     if (!invert)
1172 nigel 93 {
1173 ph10 222 endmatch += offsets[1];
1174     t = ptr;
1175     while (t < endmatch)
1176     {
1177     t = end_of_line(t, endptr, &ellength);
1178     if (t <= endmatch) linenumber++; else break;
1179     }
1180 nigel 93 }
1181     endmatch = end_of_line(endmatch, endptr, &ellength);
1182     linelength = endmatch - ptr - ellength;
1183 nigel 77 }
1184    
1185 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1186     zeroes are treated as just another data character. */
1187    
1188     /* This extra option, for Jeffrey Friedl's debugging requirements,
1189     replaces the matched string, or a specific captured string if it exists,
1190     with X. When this happens, colouring is ignored. */
1191    
1192     #ifdef JFRIEDL_DEBUG
1193     if (S_arg >= 0 && S_arg < mrc)
1194     {
1195     int first = S_arg * 2;
1196     int last = first + 1;
1197     fwrite(ptr, 1, offsets[first], stdout);
1198     fprintf(stdout, "X");
1199     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1200     }
1201     else
1202     #endif
1203    
1204     /* We have to split the line(s) up if colouring. */
1205    
1206     if (do_colour)
1207     {
1208     fwrite(ptr, 1, offsets[0], stdout);
1209     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1210     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1211     fprintf(stdout, "%c[00m", 0x1b);
1212 ph10 243 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1213 ph10 239 stdout);
1214 nigel 87 }
1215 nigel 93 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1216 nigel 49 }
1217    
1218 nigel 87 /* End of doing what has to be done for a match */
1219    
1220 nigel 77 rc = 0; /* Had some success */
1221    
1222     /* Remember where the last match happened for after_context. We remember
1223     where we are about to restart, and that line's number. */
1224    
1225 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1226 nigel 77 lastmatchnumber = linenumber + 1;
1227 nigel 49 }
1228 nigel 77
1229 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1230     anything to be printed), we have to move on to the end of the match before
1231     proceeding. */
1232    
1233     if (multiline && invert && match)
1234     {
1235     int ellength;
1236     char *endmatch = ptr + offsets[1];
1237     t = ptr;
1238     while (t < endmatch)
1239     {
1240     t = end_of_line(t, endptr, &ellength);
1241     if (t <= endmatch) linenumber++; else break;
1242     }
1243     endmatch = end_of_line(endmatch, endptr, &ellength);
1244     linelength = endmatch - ptr - ellength;
1245     }
1246    
1247 ph10 286 /* Advance to after the newline and increment the line number. The file
1248 ph10 280 offset to the current line is maintained in filepos. */
1249 nigel 77
1250 nigel 93 ptr += linelength + endlinelength;
1251 ph10 280 filepos += linelength + endlinelength;
1252 nigel 77 linenumber++;
1253    
1254     /* If we haven't yet reached the end of the file (the buffer is full), and
1255     the current point is in the top 1/3 of the buffer, slide the buffer down by
1256     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1257     about to be lost, print them. */
1258    
1259     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1260     {
1261     if (after_context > 0 &&
1262     lastmatchnumber > 0 &&
1263     lastmatchrestart < buffer + MBUFTHIRD)
1264     {
1265     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1266     lastmatchnumber = 0;
1267     }
1268    
1269     /* Now do the shuffle */
1270    
1271     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1272     ptr -= MBUFTHIRD;
1273 ph10 286
1274     #ifdef SUPPORT_LIBZ
1275     if (frtype == FR_LIBZ)
1276     bufflength = 2*MBUFTHIRD +
1277     gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1278     else
1279     #endif
1280    
1281     #ifdef SUPPORT_LIBBZ2
1282     if (frtype == FR_LIBBZ2)
1283     bufflength = 2*MBUFTHIRD +
1284     BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1285     else
1286     #endif
1287    
1288 nigel 77 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1289 ph10 286
1290 nigel 77 endptr = buffer + bufflength;
1291    
1292     /* Adjust any last match point */
1293    
1294     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1295     }
1296     } /* Loop through the whole file */
1297    
1298     /* End of file; print final "after" lines if wanted; do_after_lines sets
1299     hyphenpending if it prints something. */
1300    
1301 nigel 87 if (!only_matching && !count_only)
1302     {
1303     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1304     hyphenpending |= endhyphenpending;
1305     }
1306 nigel 77
1307     /* Print the file name if we are looking for those without matches and there
1308     were none. If we found a match, we won't have got this far. */
1309    
1310 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1311 nigel 77 {
1312     fprintf(stdout, "%s\n", printname);
1313     return 0;
1314 nigel 49 }
1315    
1316 nigel 77 /* Print the match count if wanted */
1317    
1318 nigel 49 if (count_only)
1319     {
1320 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1321 nigel 49 fprintf(stdout, "%d\n", count);
1322     }
1323    
1324     return rc;
1325     }
1326    
1327    
1328    
1329     /*************************************************
1330 nigel 53 * Grep a file or recurse into a directory *
1331     *************************************************/
1332    
1333 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1334     recursing; if it's a file, grep it.
1335    
1336     Arguments:
1337     pathname the path to investigate
1338 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1339 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1340    
1341     Returns: 0 if there was at least one match
1342     1 if there were no matches
1343     2 there was some kind of error
1344    
1345     However, file opening failures are suppressed if "silent" is set.
1346     */
1347    
1348 nigel 53 static int
1349 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1350 nigel 53 {
1351     int rc = 1;
1352     int sep;
1353 ph10 286 int frtype;
1354     int pathlen;
1355     void *handle;
1356     FILE *in = NULL; /* Ensure initialized */
1357 nigel 53
1358 ph10 286 #ifdef SUPPORT_LIBZ
1359     gzFile ingz = NULL;
1360     #endif
1361    
1362     #ifdef SUPPORT_LIBBZ2
1363     BZFILE *inbz2 = NULL;
1364     #endif
1365    
1366 nigel 77 /* If the file name is "-" we scan stdin */
1367 nigel 53
1368 nigel 77 if (strcmp(pathname, "-") == 0)
1369 nigel 53 {
1370 ph10 286 return pcregrep(stdin, FR_PLAIN,
1371 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1372 nigel 77 stdin_name : NULL);
1373     }
1374    
1375 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1376 ph10 325 each file and directory within it, subject to any include or exclude patterns
1377     that were set. The scanning code is localized so it can be made
1378     system-specific. */
1379 nigel 87
1380     if ((sep = isdirectory(pathname)) != 0)
1381 nigel 77 {
1382 nigel 87 if (dee_action == dee_SKIP) return 1;
1383     if (dee_action == dee_RECURSE)
1384 nigel 53 {
1385 nigel 87 char buffer[1024];
1386     char *nextfile;
1387     directory_type *dir = opendirectory(pathname);
1388 nigel 53
1389 nigel 87 if (dir == NULL)
1390     {
1391     if (!silent)
1392     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1393     strerror(errno));
1394     return 2;
1395     }
1396 nigel 77
1397 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1398     {
1399 ph10 324 int frc, nflen;
1400 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1401 ph10 324 nflen = strlen(nextfile);
1402 ph10 345
1403 ph10 325 if (isdirectory(buffer))
1404     {
1405     if (exclude_dir_compiled != NULL &&
1406     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1407     continue;
1408 ph10 345
1409 ph10 325 if (include_dir_compiled != NULL &&
1410     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1411     continue;
1412     }
1413 ph10 345 else
1414     {
1415 ph10 324 if (exclude_compiled != NULL &&
1416     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1417     continue;
1418 ph10 345
1419 ph10 324 if (include_compiled != NULL &&
1420     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1421     continue;
1422 ph10 345 }
1423 nigel 77
1424 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1425     if (frc > 1) rc = frc;
1426     else if (frc == 0 && rc == 1) rc = 0;
1427     }
1428    
1429     closedirectory(dir);
1430     return rc;
1431 nigel 53 }
1432     }
1433    
1434 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1435     been requested. */
1436 nigel 53
1437 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1438    
1439     /* Control reaches here if we have a regular file, or if we have a directory
1440     and recursion or skipping was not requested, or if we have anything else and
1441     skipping was not requested. The scan proceeds. If this is the first and only
1442     argument at top level, we don't show the file name, unless we are only showing
1443     the file name, or the filename was forced (-H). */
1444    
1445 ph10 286 pathlen = strlen(pathname);
1446    
1447     /* Open using zlib if it is supported and the file name ends with .gz. */
1448    
1449     #ifdef SUPPORT_LIBZ
1450     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1451 nigel 53 {
1452 ph10 286 ingz = gzopen(pathname, "rb");
1453     if (ingz == NULL)
1454     {
1455     if (!silent)
1456     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1457     strerror(errno));
1458     return 2;
1459     }
1460     handle = (void *)ingz;
1461     frtype = FR_LIBZ;
1462     }
1463     else
1464     #endif
1465    
1466     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1467    
1468     #ifdef SUPPORT_LIBBZ2
1469     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1470     {
1471     inbz2 = BZ2_bzopen(pathname, "rb");
1472     handle = (void *)inbz2;
1473     frtype = FR_LIBBZ2;
1474     }
1475     else
1476     #endif
1477    
1478     /* Otherwise use plain fopen(). The label is so that we can come back here if
1479     an attempt to read a .bz2 file indicates that it really is a plain file. */
1480    
1481     #ifdef SUPPORT_LIBBZ2
1482     PLAIN_FILE:
1483     #endif
1484     {
1485     in = fopen(pathname, "r");
1486     handle = (void *)in;
1487     frtype = FR_PLAIN;
1488     }
1489    
1490     /* All the opening methods return errno when they fail. */
1491    
1492     if (handle == NULL)
1493     {
1494 nigel 77 if (!silent)
1495     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1496     strerror(errno));
1497 nigel 53 return 2;
1498     }
1499    
1500 ph10 286 /* Now grep the file */
1501    
1502     rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1503 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1504 nigel 77
1505 ph10 286 /* Close in an appropriate manner. */
1506    
1507     #ifdef SUPPORT_LIBZ
1508     if (frtype == FR_LIBZ)
1509     gzclose(ingz);
1510     else
1511     #endif
1512    
1513     /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1514     read failed. If the error indicates that the file isn't in fact bzipped, try
1515     again as a normal file. */
1516    
1517     #ifdef SUPPORT_LIBBZ2
1518     if (frtype == FR_LIBBZ2)
1519     {
1520     if (rc == 2)
1521     {
1522     int errnum;
1523     const char *err = BZ2_bzerror(inbz2, &errnum);
1524     if (errnum == BZ_DATA_ERROR_MAGIC)
1525     {
1526     BZ2_bzclose(inbz2);
1527     goto PLAIN_FILE;
1528     }
1529     else if (!silent)
1530     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1531     pathname, err);
1532     }
1533     BZ2_bzclose(inbz2);
1534     }
1535     else
1536     #endif
1537    
1538     /* Normal file close */
1539    
1540 nigel 53 fclose(in);
1541 ph10 286
1542     /* Pass back the yield from pcregrep(). */
1543    
1544 nigel 53 return rc;
1545     }
1546    
1547    
1548    
1549    
1550     /*************************************************
1551 nigel 49 * Usage function *
1552     *************************************************/
1553    
1554     static int
1555     usage(int rc)
1556     {
1557 nigel 87 option_item *op;
1558     fprintf(stderr, "Usage: pcregrep [-");
1559     for (op = optionlist; op->one_char != 0; op++)
1560     {
1561     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1562     }
1563     fprintf(stderr, "] [long options] [pattern] [files]\n");
1564 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1565     "options.\n");
1566 nigel 49 return rc;
1567     }
1568    
1569    
1570    
1571    
1572     /*************************************************
1573 nigel 53 * Help function *
1574     *************************************************/
1575    
1576     static void
1577     help(void)
1578     {
1579     option_item *op;
1580    
1581 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1582 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1583 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1584 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1585    
1586     #ifdef SUPPORT_LIBZ
1587     printf("Files whose names end in .gz are read using zlib.\n");
1588     #endif
1589    
1590     #ifdef SUPPORT_LIBBZ2
1591     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1592     #endif
1593    
1594     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1595     printf("Other files and the standard input are read as plain files.\n\n");
1596     #else
1597     printf("All files are read as plain files, without any interpretation.\n\n");
1598     #endif
1599    
1600 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1601     printf("Options:\n");
1602    
1603     for (op = optionlist; op->one_char != 0; op++)
1604     {
1605     int n;
1606     char s[4];
1607     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1608 ph10 296 n = 30 - printf(" %s --%s", s, op->long_name);
1609 nigel 53 if (n < 1) n = 1;
1610     printf("%.*s%s\n", n, " ", op->help_text);
1611     }
1612    
1613 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1614     printf("trailing white space is removed and blank lines are ignored.\n");
1615     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1616 nigel 53
1617 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1618 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1619     }
1620    
1621    
1622    
1623    
1624     /*************************************************
1625 nigel 77 * Handle a single-letter, no data option *
1626 nigel 53 *************************************************/
1627    
1628     static int
1629     handle_option(int letter, int options)
1630     {
1631     switch(letter)
1632     {
1633 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1634 nigel 87 case N_HELP: help(); exit(0);
1635 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1636 nigel 53 case 'c': count_only = TRUE; break;
1637 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1638     case 'H': filenames = FN_FORCE; break;
1639     case 'h': filenames = FN_NONE; break;
1640 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1641 nigel 87 case 'l': filenames = FN_ONLY; break;
1642     case 'L': filenames = FN_NOMATCH_ONLY; break;
1643 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1644 nigel 53 case 'n': number = TRUE; break;
1645 nigel 87 case 'o': only_matching = TRUE; break;
1646 nigel 77 case 'q': quiet = TRUE; break;
1647 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1648 nigel 53 case 's': silent = TRUE; break;
1649 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1650 nigel 53 case 'v': invert = TRUE; break;
1651 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1652     case 'x': process_options |= PO_LINE_MATCH; break;
1653 nigel 53
1654     case 'V':
1655 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1656 nigel 53 exit(0);
1657     break;
1658    
1659     default:
1660     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1661     exit(usage(2));
1662     }
1663    
1664     return options;
1665     }
1666    
1667    
1668    
1669    
1670     /*************************************************
1671 nigel 87 * Construct printed ordinal *
1672     *************************************************/
1673    
1674     /* This turns a number into "1st", "3rd", etc. */
1675    
1676     static char *
1677     ordin(int n)
1678     {
1679     static char buffer[8];
1680     char *p = buffer;
1681     sprintf(p, "%d", n);
1682     while (*p != 0) p++;
1683     switch (n%10)
1684     {
1685     case 1: strcpy(p, "st"); break;
1686     case 2: strcpy(p, "nd"); break;
1687     case 3: strcpy(p, "rd"); break;
1688     default: strcpy(p, "th"); break;
1689     }
1690     return buffer;
1691     }
1692    
1693    
1694    
1695     /*************************************************
1696     * Compile a single pattern *
1697     *************************************************/
1698    
1699     /* When the -F option has been used, this is called for each substring.
1700     Otherwise it's called for each supplied pattern.
1701    
1702     Arguments:
1703     pattern the pattern string
1704     options the PCRE options
1705     filename the file name, or NULL for a command-line pattern
1706     count 0 if this is the only command line pattern, or
1707     number of the command line pattern, or
1708     linenumber for a pattern from a file
1709    
1710     Returns: TRUE on success, FALSE after an error
1711     */
1712    
1713     static BOOL
1714     compile_single_pattern(char *pattern, int options, char *filename, int count)
1715     {
1716     char buffer[MBUFTHIRD + 16];
1717     const char *error;
1718     int errptr;
1719    
1720     if (pattern_count >= MAX_PATTERN_COUNT)
1721     {
1722     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1723     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1724     return FALSE;
1725     }
1726    
1727     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1728     suffix[process_options]);
1729     pattern_list[pattern_count] =
1730     pcre_compile(buffer, options, &error, &errptr, pcretables);
1731 ph10 142 if (pattern_list[pattern_count] != NULL)
1732 ph10 141 {
1733 ph10 142 pattern_count++;
1734 ph10 141 return TRUE;
1735 ph10 142 }
1736 nigel 87
1737     /* Handle compile errors */
1738    
1739     errptr -= (int)strlen(prefix[process_options]);
1740     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1741    
1742     if (filename == NULL)
1743     {
1744     if (count == 0)
1745     fprintf(stderr, "pcregrep: Error in command-line regex "
1746     "at offset %d: %s\n", errptr, error);
1747     else
1748     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1749     "at offset %d: %s\n", ordin(count), errptr, error);
1750     }
1751     else
1752     {
1753     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1754     "at offset %d: %s\n", count, filename, errptr, error);
1755     }
1756    
1757     return FALSE;
1758     }
1759    
1760    
1761    
1762     /*************************************************
1763     * Compile one supplied pattern *
1764     *************************************************/
1765    
1766     /* When the -F option has been used, each string may be a list of strings,
1767 nigel 91 separated by line breaks. They will be matched literally.
1768 nigel 87
1769     Arguments:
1770     pattern the pattern string
1771     options the PCRE options
1772     filename the file name, or NULL for a command-line pattern
1773     count 0 if this is the only command line pattern, or
1774     number of the command line pattern, or
1775     linenumber for a pattern from a file
1776    
1777     Returns: TRUE on success, FALSE after an error
1778     */
1779    
1780     static BOOL
1781     compile_pattern(char *pattern, int options, char *filename, int count)
1782     {
1783     if ((process_options & PO_FIXED_STRINGS) != 0)
1784     {
1785 nigel 93 char *eop = pattern + strlen(pattern);
1786 nigel 87 char buffer[MBUFTHIRD];
1787     for(;;)
1788     {
1789 nigel 93 int ellength;
1790     char *p = end_of_line(pattern, eop, &ellength);
1791     if (ellength == 0)
1792 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1793 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1794 nigel 93 pattern = p;
1795 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1796     return FALSE;
1797     }
1798     }
1799     else return compile_single_pattern(pattern, options, filename, count);
1800     }
1801    
1802    
1803    
1804     /*************************************************
1805 nigel 49 * Main program *
1806     *************************************************/
1807    
1808 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1809    
1810 nigel 49 int
1811     main(int argc, char **argv)
1812     {
1813 nigel 53 int i, j;
1814 nigel 49 int rc = 1;
1815 nigel 87 int pcre_options = 0;
1816     int cmd_pattern_count = 0;
1817 ph10 141 int hint_count = 0;
1818 nigel 49 int errptr;
1819 nigel 87 BOOL only_one_at_top;
1820     char *patterns[MAX_PATTERN_COUNT];
1821     const char *locale_from = "--locale";
1822 nigel 49 const char *error;
1823    
1824 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1825     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1826     */
1827 nigel 91
1828     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1829     switch(i)
1830     {
1831     default: newline = (char *)"lf"; break;
1832     case '\r': newline = (char *)"cr"; break;
1833     case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1834 nigel 93 case -1: newline = (char *)"any"; break;
1835 ph10 150 case -2: newline = (char *)"anycrlf"; break;
1836 nigel 91 }
1837    
1838 nigel 49 /* Process the options */
1839    
1840     for (i = 1; i < argc; i++)
1841     {
1842 nigel 77 option_item *op = NULL;
1843     char *option_data = (char *)""; /* default to keep compiler happy */
1844     BOOL longop;
1845     BOOL longopwasequals = FALSE;
1846    
1847 nigel 49 if (argv[i][0] != '-') break;
1848 nigel 53
1849 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1850 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1851 nigel 63
1852 nigel 77 if (argv[i][1] == 0)
1853     {
1854 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1855 nigel 77 else exit(usage(2));
1856     }
1857 nigel 63
1858 nigel 77 /* Handle a long name option, or -- to terminate the options */
1859 nigel 53
1860     if (argv[i][1] == '-')
1861 nigel 49 {
1862 nigel 77 char *arg = argv[i] + 2;
1863     char *argequals = strchr(arg, '=');
1864 nigel 53
1865 nigel 77 if (*arg == 0) /* -- terminates options */
1866 nigel 49 {
1867 nigel 77 i++;
1868     break; /* out of the options-handling loop */
1869 nigel 53 }
1870 nigel 49
1871 nigel 77 longop = TRUE;
1872    
1873     /* Some long options have data that follows after =, for example file=name.
1874     Some options have variations in the long name spelling: specifically, we
1875     allow "regexp" because GNU grep allows it, though I personally go along
1876 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1877     These options are entered in the table as "regex(p)". No option is in both
1878     these categories, fortunately. */
1879 nigel 77
1880 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1881     {
1882 nigel 77 char *opbra = strchr(op->long_name, '(');
1883     char *equals = strchr(op->long_name, '=');
1884     if (opbra == NULL) /* Not a (p) case */
1885 nigel 53 {
1886 nigel 77 if (equals == NULL) /* Not thing=data case */
1887     {
1888     if (strcmp(arg, op->long_name) == 0) break;
1889     }
1890     else /* Special case xxx=data */
1891     {
1892     int oplen = equals - op->long_name;
1893 ph10 199 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1894 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1895     {
1896     option_data = arg + arglen;
1897     if (*option_data == '=')
1898     {
1899     option_data++;
1900     longopwasequals = TRUE;
1901     }
1902     break;
1903     }
1904     }
1905 nigel 53 }
1906 nigel 77 else /* Special case xxxx(p) */
1907     {
1908     char buff1[24];
1909     char buff2[24];
1910     int baselen = opbra - op->long_name;
1911     sprintf(buff1, "%.*s", baselen, op->long_name);
1912 ph10 152 sprintf(buff2, "%s%.*s", buff1,
1913 ph10 151 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1914 nigel 77 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1915     break;
1916     }
1917 nigel 53 }
1918 nigel 77
1919 nigel 53 if (op->one_char == 0)
1920     {
1921     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1922     exit(usage(2));
1923     }
1924     }
1925 nigel 49
1926 nigel 89
1927     /* Jeffrey Friedl's debugging harness uses these additional options which
1928     are not in the right form for putting in the option table because they use
1929     only one hyphen, yet are more than one character long. By putting them
1930     separately here, they will not get displayed as part of the help() output,
1931     but I don't think Jeffrey will care about that. */
1932    
1933     #ifdef JFRIEDL_DEBUG
1934     else if (strcmp(argv[i], "-pre") == 0) {
1935     jfriedl_prefix = argv[++i];
1936     continue;
1937     } else if (strcmp(argv[i], "-post") == 0) {
1938     jfriedl_postfix = argv[++i];
1939     continue;
1940     } else if (strcmp(argv[i], "-XT") == 0) {
1941     sscanf(argv[++i], "%d", &jfriedl_XT);
1942     continue;
1943     } else if (strcmp(argv[i], "-XR") == 0) {
1944     sscanf(argv[++i], "%d", &jfriedl_XR);
1945     continue;
1946     }
1947     #endif
1948    
1949    
1950 nigel 77 /* One-char options; many that have no data may be in a single argument; we
1951     continue till we hit the last one or one that needs data. */
1952 nigel 53
1953     else
1954     {
1955     char *s = argv[i] + 1;
1956 nigel 77 longop = FALSE;
1957 nigel 53 while (*s != 0)
1958     {
1959 nigel 77 for (op = optionlist; op->one_char != 0; op++)
1960     { if (*s == op->one_char) break; }
1961     if (op->one_char == 0)
1962 nigel 53 {
1963 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1964     *s, argv[i]);
1965     exit(usage(2));
1966     }
1967     if (op->type != OP_NODATA || s[1] == 0)
1968     {
1969     option_data = s+1;
1970 nigel 53 break;
1971     }
1972 nigel 87 pcre_options = handle_option(*s++, pcre_options);
1973 nigel 49 }
1974     }
1975 nigel 77
1976 nigel 87 /* At this point we should have op pointing to a matched option. If the type
1977     is NO_DATA, it means that there is no data, and the option might set
1978     something in the PCRE options. */
1979 nigel 77
1980     if (op->type == OP_NODATA)
1981     {
1982 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
1983     continue;
1984     }
1985    
1986     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1987     either has a value or defaults to something. It cannot have data in a
1988     separate item. At the moment, the only such options are "colo(u)r" and
1989 nigel 89 Jeffrey Friedl's special -S debugging option. */
1990 nigel 87
1991     if (*option_data == 0 &&
1992     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1993     {
1994     switch (op->one_char)
1995 nigel 77 {
1996 nigel 87 case N_COLOUR:
1997     colour_option = (char *)"auto";
1998     break;
1999     #ifdef JFRIEDL_DEBUG
2000     case 'S':
2001     S_arg = 0;
2002     break;
2003     #endif
2004 nigel 77 }
2005 nigel 87 continue;
2006     }
2007 nigel 77
2008 nigel 87 /* Otherwise, find the data string for the option. */
2009    
2010     if (*option_data == 0)
2011     {
2012     if (i >= argc - 1 || longopwasequals)
2013 nigel 77 {
2014 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2015     exit(usage(2));
2016     }
2017     option_data = argv[++i];
2018     }
2019    
2020     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2021     multiple times to create a list of patterns. */
2022    
2023     if (op->type == OP_PATLIST)
2024     {
2025     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2026     {
2027     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2028     MAX_PATTERN_COUNT);
2029     return 2;
2030     }
2031     patterns[cmd_pattern_count++] = option_data;
2032     }
2033    
2034     /* Otherwise, deal with single string or numeric data values. */
2035    
2036     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2037     {
2038     *((char **)op->dataptr) = option_data;
2039     }
2040     else
2041     {
2042     char *endptr;
2043     int n = strtoul(option_data, &endptr, 10);
2044     if (*endptr != 0)
2045     {
2046     if (longop)
2047 nigel 77 {
2048 nigel 87 char *equals = strchr(op->long_name, '=');
2049     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2050     equals - op->long_name;
2051     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2052     option_data, nlen, op->long_name);
2053 nigel 77 }
2054 nigel 87 else
2055     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2056     option_data, op->one_char);
2057     exit(usage(2));
2058 nigel 77 }
2059 nigel 87 *((int *)op->dataptr) = n;
2060 nigel 77 }
2061 nigel 49 }
2062    
2063 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2064     for -A and -B. */
2065    
2066     if (both_context > 0)
2067     {
2068     if (after_context == 0) after_context = both_context;
2069     if (before_context == 0) before_context = both_context;
2070     }
2071 ph10 286
2072     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2073 ph10 280 However, the latter two set the only_matching flag. */
2074 nigel 77
2075 ph10 280 if ((only_matching && (file_offsets || line_offsets)) ||
2076 ph10 286 (file_offsets && line_offsets))
2077 ph10 280 {
2078     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2079     "and/or --line-offsets\n");
2080     exit(usage(2));
2081     }
2082    
2083 ph10 286 if (file_offsets || line_offsets) only_matching = TRUE;
2084    
2085 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2086     LC_ALL environment variable is set, and if so, use it. */
2087 nigel 49
2088 nigel 87 if (locale == NULL)
2089 nigel 53 {
2090 nigel 87 locale = getenv("LC_ALL");
2091     locale_from = "LCC_ALL";
2092 nigel 53 }
2093 nigel 49
2094 nigel 87 if (locale == NULL)
2095     {
2096     locale = getenv("LC_CTYPE");
2097     locale_from = "LC_CTYPE";
2098     }
2099 nigel 49
2100 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2101     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2102    
2103     if (locale != NULL)
2104 nigel 49 {
2105 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2106 nigel 53 {
2107 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2108     locale, locale_from);
2109 nigel 53 return 2;
2110     }
2111 nigel 87 pcretables = pcre_maketables();
2112     }
2113 nigel 77
2114 nigel 87 /* Sort out colouring */
2115    
2116     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2117     {
2118     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2119     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2120     else
2121 nigel 53 {
2122 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2123     colour_option);
2124     return 2;
2125 nigel 77 }
2126 nigel 87 if (do_colour)
2127 nigel 77 {
2128 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2129     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2130     if (cs != NULL) colour_string = cs;
2131 nigel 77 }
2132 nigel 87 }
2133 nigel 77
2134 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2135    
2136     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2137     {
2138     pcre_options |= PCRE_NEWLINE_CR;
2139 nigel 93 endlinetype = EL_CR;
2140 nigel 91 }
2141     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2142     {
2143     pcre_options |= PCRE_NEWLINE_LF;
2144 nigel 93 endlinetype = EL_LF;
2145 nigel 91 }
2146     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2147     {
2148     pcre_options |= PCRE_NEWLINE_CRLF;
2149 nigel 93 endlinetype = EL_CRLF;
2150 nigel 91 }
2151 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2152     {
2153     pcre_options |= PCRE_NEWLINE_ANY;
2154     endlinetype = EL_ANY;
2155     }
2156 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2157     {
2158     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2159     endlinetype = EL_ANYCRLF;
2160     }
2161 nigel 91 else
2162     {
2163     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2164     return 2;
2165     }
2166    
2167 nigel 87 /* Interpret the text values for -d and -D */
2168    
2169     if (dee_option != NULL)
2170     {
2171     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2172     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2173     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2174     else
2175 nigel 77 {
2176 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2177     return 2;
2178 nigel 53 }
2179 nigel 49 }
2180    
2181 nigel 87 if (DEE_option != NULL)
2182     {
2183     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2184     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2185     else
2186     {
2187     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2188     return 2;
2189     }
2190     }
2191 nigel 49
2192 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2193 nigel 87
2194     #ifdef JFRIEDL_DEBUG
2195     if (S_arg > 9)
2196 nigel 49 {
2197 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2198     return 2;
2199     }
2200 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2201     {
2202     if (jfriedl_XT == 0) jfriedl_XT = 1;
2203     if (jfriedl_XR == 0) jfriedl_XR = 1;
2204     }
2205 nigel 87 #endif
2206 nigel 77
2207 nigel 87 /* Get memory to store the pattern and hints lists. */
2208    
2209     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2210     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2211    
2212     if (pattern_list == NULL || hints_list == NULL)
2213     {
2214     fprintf(stderr, "pcregrep: malloc failed\n");
2215 ph10 123 goto EXIT2;
2216 nigel 87 }
2217    
2218     /* If no patterns were provided by -e, and there is no file provided by -f,
2219     the first argument is the one and only pattern, and it must exist. */
2220    
2221     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2222     {
2223 nigel 63 if (i >= argc) return usage(2);
2224 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2225     }
2226 nigel 77
2227 nigel 87 /* Compile the patterns that were provided on the command line, either by
2228     multiple uses of -e or as a single unkeyed pattern. */
2229    
2230     for (j = 0; j < cmd_pattern_count; j++)
2231     {
2232     if (!compile_pattern(patterns[j], pcre_options, NULL,
2233     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2234 ph10 123 goto EXIT2;
2235 nigel 87 }
2236    
2237     /* Compile the regular expressions that are provided in a file. */
2238    
2239     if (pattern_filename != NULL)
2240     {
2241     int linenumber = 0;
2242     FILE *f;
2243     char *filename;
2244     char buffer[MBUFTHIRD];
2245    
2246     if (strcmp(pattern_filename, "-") == 0)
2247 nigel 77 {
2248 nigel 87 f = stdin;
2249     filename = stdin_name;
2250 nigel 77 }
2251 nigel 87 else
2252 nigel 77 {
2253 nigel 87 f = fopen(pattern_filename, "r");
2254     if (f == NULL)
2255     {
2256     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2257     strerror(errno));
2258 ph10 123 goto EXIT2;
2259 nigel 87 }
2260     filename = pattern_filename;
2261 nigel 77 }
2262    
2263 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2264 nigel 53 {
2265 nigel 87 char *s = buffer + (int)strlen(buffer);
2266     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2267     *s = 0;
2268     linenumber++;
2269     if (buffer[0] == 0) continue; /* Skip blank lines */
2270     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2271 ph10 121 goto EXIT2;
2272 nigel 53 }
2273 nigel 87
2274     if (f != stdin) fclose(f);
2275 nigel 49 }
2276    
2277 nigel 77 /* Study the regular expressions, as we will be running them many times */
2278 nigel 53
2279     for (j = 0; j < pattern_count; j++)
2280     {
2281     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2282     if (error != NULL)
2283     {
2284     char s[16];
2285     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2286     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2287 ph10 121 goto EXIT2;
2288 nigel 53 }
2289 ph10 142 hint_count++;
2290 nigel 53 }
2291    
2292 nigel 77 /* If there are include or exclude patterns, compile them. */
2293    
2294     if (exclude_pattern != NULL)
2295     {
2296 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2297     pcretables);
2298 nigel 77 if (exclude_compiled == NULL)
2299     {
2300     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2301     errptr, error);
2302 ph10 121 goto EXIT2;
2303 nigel 77 }
2304     }
2305    
2306     if (include_pattern != NULL)
2307     {
2308 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2309     pcretables);
2310 nigel 77 if (include_compiled == NULL)
2311     {
2312     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2313     errptr, error);
2314 ph10 121 goto EXIT2;
2315 nigel 77 }
2316     }
2317    
2318 ph10 325 if (exclude_dir_pattern != NULL)
2319     {
2320     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2321     pcretables);
2322     if (exclude_dir_compiled == NULL)
2323     {
2324     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2325     errptr, error);
2326     goto EXIT2;
2327     }
2328     }
2329    
2330     if (include_dir_pattern != NULL)
2331     {
2332     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2333     pcretables);
2334     if (include_dir_compiled == NULL)
2335     {
2336     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2337     errptr, error);
2338     goto EXIT2;
2339     }
2340     }
2341    
2342 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2343 nigel 49
2344 nigel 87 if (i >= argc)
2345 ph10 121 {
2346 ph10 286 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2347 ph10 121 goto EXIT;
2348 ph10 123 }
2349 nigel 49
2350 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2351     Pass in the fact that there is only one argument at top level - this suppresses
2352 nigel 87 the file name if the argument is not a directory and filenames are not
2353     otherwise forced. */
2354 nigel 49
2355 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2356 nigel 49
2357     for (; i < argc; i++)
2358     {
2359 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2360     only_one_at_top);
2361 nigel 77 if (frc > 1) rc = frc;
2362     else if (frc == 0 && rc == 1) rc = 0;
2363 nigel 49 }
2364    
2365 ph10 121 EXIT:
2366     if (pattern_list != NULL)
2367     {
2368 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2369 ph10 121 free(pattern_list);
2370 ph10 123 }
2371 ph10 121 if (hints_list != NULL)
2372     {
2373 ph10 141 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2374 ph10 121 free(hints_list);
2375 ph10 123 }
2376 nigel 49 return rc;
2377 ph10 121
2378     EXIT2:
2379     rc = 2;
2380     goto EXIT;
2381 nigel 49 }
2382    
2383 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12