/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 357 - (hide annotations) (download)
Tue Jul 8 14:18:28 2008 UTC (6 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 67166 byte(s)
Added two (int) casts to pcregrep.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 305 Copyright (c) 1997-2008 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 nigel 49
75 nigel 77 #if BUFSIZ > 8192
76     #define MBUFTHIRD BUFSIZ
77     #else
78     #define MBUFTHIRD 8192
79     #endif
80 nigel 49
81 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
82     output. The order is important; it is assumed that a file name is wanted for
83     all values greater than FN_DEFAULT. */
84 nigel 77
85 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
86    
87 ph10 286 /* File reading styles */
88    
89     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
90    
91 nigel 87 /* Actions for the -d and -D options */
92    
93     enum { dee_READ, dee_SKIP, dee_RECURSE };
94     enum { DEE_READ, DEE_SKIP };
95    
96     /* Actions for special processing options (flag bits) */
97    
98     #define PO_WORD_MATCH 0x0001
99     #define PO_LINE_MATCH 0x0002
100     #define PO_FIXED_STRINGS 0x0004
101    
102 nigel 93 /* Line ending types */
103 nigel 87
104 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
105 nigel 87
106 nigel 93
107    
108 nigel 49 /*************************************************
109     * Global variables *
110     *************************************************/
111    
112 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
113     regular code. */
114    
115     #ifdef JFRIEDL_DEBUG
116     static int S_arg = -1;
117 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
118     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
119     static const char *jfriedl_prefix = "";
120     static const char *jfriedl_postfix = "";
121 nigel 87 #endif
122    
123 nigel 93 static int endlinetype;
124 nigel 91
125 nigel 87 static char *colour_string = (char *)"1;31";
126     static char *colour_option = NULL;
127     static char *dee_option = NULL;
128     static char *DEE_option = NULL;
129 nigel 91 static char *newline = NULL;
130 nigel 53 static char *pattern_filename = NULL;
131 nigel 77 static char *stdin_name = (char *)"(standard input)";
132 nigel 87 static char *locale = NULL;
133    
134     static const unsigned char *pcretables = NULL;
135    
136 nigel 53 static int pattern_count = 0;
137 ph10 121 static pcre **pattern_list = NULL;
138     static pcre_extra **hints_list = NULL;
139 nigel 49
140 nigel 77 static char *include_pattern = NULL;
141     static char *exclude_pattern = NULL;
142 ph10 325 static char *include_dir_pattern = NULL;
143     static char *exclude_dir_pattern = NULL;
144 nigel 77
145     static pcre *include_compiled = NULL;
146     static pcre *exclude_compiled = NULL;
147 ph10 325 static pcre *include_dir_compiled = NULL;
148     static pcre *exclude_dir_compiled = NULL;
149 nigel 77
150     static int after_context = 0;
151     static int before_context = 0;
152     static int both_context = 0;
153 nigel 87 static int dee_action = dee_READ;
154     static int DEE_action = DEE_READ;
155     static int error_count = 0;
156     static int filenames = FN_DEFAULT;
157     static int process_options = 0;
158 nigel 77
159 nigel 49 static BOOL count_only = FALSE;
160 nigel 87 static BOOL do_colour = FALSE;
161 ph10 280 static BOOL file_offsets = FALSE;
162 nigel 77 static BOOL hyphenpending = FALSE;
163 nigel 49 static BOOL invert = FALSE;
164 ph10 280 static BOOL line_offsets = FALSE;
165 nigel 77 static BOOL multiline = FALSE;
166 nigel 49 static BOOL number = FALSE;
167 nigel 87 static BOOL only_matching = FALSE;
168 nigel 77 static BOOL quiet = FALSE;
169 nigel 49 static BOOL silent = FALSE;
170 nigel 93 static BOOL utf8 = FALSE;
171 nigel 49
172 nigel 53 /* Structure for options and list of them */
173 nigel 49
174 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
175     OP_PATLIST };
176 nigel 77
177 nigel 53 typedef struct option_item {
178 nigel 77 int type;
179 nigel 53 int one_char;
180 nigel 77 void *dataptr;
181 nigel 67 const char *long_name;
182     const char *help_text;
183 nigel 53 } option_item;
184 nigel 49
185 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
186     used to identify them. */
187    
188 ph10 325 #define N_COLOUR (-1)
189     #define N_EXCLUDE (-2)
190     #define N_EXCLUDE_DIR (-3)
191     #define N_HELP (-4)
192     #define N_INCLUDE (-5)
193     #define N_INCLUDE_DIR (-6)
194     #define N_LABEL (-7)
195     #define N_LOCALE (-8)
196     #define N_NULL (-9)
197     #define N_LOFFSETS (-10)
198     #define N_FOFFSETS (-11)
199 nigel 87
200 nigel 53 static option_item optionlist[] = {
201 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
202     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
203     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
204     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
205     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
206     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
207     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
208     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
209     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
210     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
211     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
212     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
213     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
214 ph10 280 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
215 nigel 87 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
216     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
217     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
218     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
219     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
220     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
221 ph10 280 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
222 nigel 87 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
223     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
224 ph10 280 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
225 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
226     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
227     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
228     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
229     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
230     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
231 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
232     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
233 nigel 87 #ifdef JFRIEDL_DEBUG
234     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
235     #endif
236     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
237     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
238     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
239     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
240     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
241     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
242     { OP_NODATA, 0, NULL, NULL, NULL }
243 nigel 53 };
244    
245 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
246     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
247     that the combination of -w and -x has the same effect as -x on its own, so we
248     can treat them as the same. */
249 nigel 53
250 nigel 87 static const char *prefix[] = {
251     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
252    
253     static const char *suffix[] = {
254     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
255    
256 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
257 nigel 87
258 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
259 nigel 87
260 nigel 93 const char utf8_table4[] = {
261     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
262     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
264     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
265    
266    
267    
268 nigel 53 /*************************************************
269 nigel 87 * OS-specific functions *
270 nigel 53 *************************************************/
271    
272     /* These functions are defined so that they can be made system specific,
273 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
274 nigel 53
275    
276     /************* Directory scanning in Unix ***********/
277    
278 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
279 nigel 53 #include <sys/types.h>
280     #include <sys/stat.h>
281     #include <dirent.h>
282    
283     typedef DIR directory_type;
284    
285 nigel 67 static int
286 nigel 53 isdirectory(char *filename)
287     {
288     struct stat statbuf;
289     if (stat(filename, &statbuf) < 0)
290     return 0; /* In the expectation that opening as a file will fail */
291     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
292     }
293    
294 nigel 67 static directory_type *
295 nigel 53 opendirectory(char *filename)
296     {
297     return opendir(filename);
298     }
299    
300 nigel 67 static char *
301 nigel 53 readdirectory(directory_type *dir)
302     {
303     for (;;)
304     {
305     struct dirent *dent = readdir(dir);
306     if (dent == NULL) return NULL;
307     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
308     return dent->d_name;
309     }
310 ph10 151 /* Control never reaches here */
311 nigel 53 }
312    
313 nigel 67 static void
314 nigel 53 closedirectory(directory_type *dir)
315     {
316     closedir(dir);
317     }
318    
319    
320 nigel 87 /************* Test for regular file in Unix **********/
321    
322     static int
323     isregfile(char *filename)
324     {
325     struct stat statbuf;
326     if (stat(filename, &statbuf) < 0)
327     return 1; /* In the expectation that opening as a file will fail */
328     return (statbuf.st_mode & S_IFMT) == S_IFREG;
329     }
330    
331    
332     /************* Test stdout for being a terminal in Unix **********/
333    
334     static BOOL
335     is_stdout_tty(void)
336     {
337     return isatty(fileno(stdout));
338     }
339    
340    
341 nigel 63 /************* Directory scanning in Win32 ***********/
342 nigel 53
343 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
344 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
345 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
346     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
347 ph10 283 */
348 nigel 53
349 ph10 97 #elif HAVE_WINDOWS_H
350 nigel 63
351     #ifndef STRICT
352     # define STRICT
353     #endif
354     #ifndef WIN32_LEAN_AND_MEAN
355     # define WIN32_LEAN_AND_MEAN
356     #endif
357 ph10 283
358     #include <windows.h>
359    
360 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
361     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
362     #endif
363    
364 nigel 63 typedef struct directory_type
365     {
366     HANDLE handle;
367     BOOL first;
368     WIN32_FIND_DATA data;
369     } directory_type;
370    
371     int
372     isdirectory(char *filename)
373     {
374     DWORD attr = GetFileAttributes(filename);
375     if (attr == INVALID_FILE_ATTRIBUTES)
376     return 0;
377     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
378     }
379    
380     directory_type *
381     opendirectory(char *filename)
382     {
383     size_t len;
384     char *pattern;
385     directory_type *dir;
386     DWORD err;
387     len = strlen(filename);
388     pattern = (char *) malloc(len + 3);
389     dir = (directory_type *) malloc(sizeof(*dir));
390     if ((pattern == NULL) || (dir == NULL))
391     {
392     fprintf(stderr, "pcregrep: malloc failed\n");
393     exit(2);
394     }
395     memcpy(pattern, filename, len);
396     memcpy(&(pattern[len]), "\\*", 3);
397     dir->handle = FindFirstFile(pattern, &(dir->data));
398     if (dir->handle != INVALID_HANDLE_VALUE)
399     {
400     free(pattern);
401     dir->first = TRUE;
402     return dir;
403     }
404     err = GetLastError();
405     free(pattern);
406     free(dir);
407     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
408     return NULL;
409     }
410    
411     char *
412     readdirectory(directory_type *dir)
413     {
414     for (;;)
415     {
416     if (!dir->first)
417     {
418     if (!FindNextFile(dir->handle, &(dir->data)))
419     return NULL;
420     }
421     else
422     {
423     dir->first = FALSE;
424     }
425     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
426     return dir->data.cFileName;
427     }
428     #ifndef _MSC_VER
429     return NULL; /* Keep compiler happy; never executed */
430     #endif
431     }
432    
433     void
434     closedirectory(directory_type *dir)
435     {
436     FindClose(dir->handle);
437     free(dir);
438     }
439    
440    
441 nigel 87 /************* Test for regular file in Win32 **********/
442    
443     /* I don't know how to do this, or if it can be done; assume all paths are
444     regular if they are not directories. */
445    
446     int isregfile(char *filename)
447     {
448 ph10 283 return !isdirectory(filename);
449 nigel 87 }
450    
451    
452     /************* Test stdout for being a terminal in Win32 **********/
453    
454     /* I don't know how to do this; assume never */
455    
456     static BOOL
457     is_stdout_tty(void)
458     {
459 ph10 283 return FALSE;
460 nigel 87 }
461    
462    
463 nigel 53 /************* Directory scanning when we can't do it ***********/
464    
465     /* The type is void, and apart from isdirectory(), the functions do nothing. */
466    
467 nigel 63 #else
468    
469 nigel 53 typedef void directory_type;
470    
471 nigel 87 int isdirectory(char *filename) { return 0; }
472 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
473     char *readdirectory(directory_type *dir) { return (char*)0;}
474 nigel 53 void closedirectory(directory_type *dir) {}
475    
476 nigel 87
477     /************* Test for regular when we can't do it **********/
478    
479     /* Assume all files are regular. */
480    
481     int isregfile(char *filename) { return 1; }
482    
483    
484     /************* Test stdout for being a terminal when we can't do it **********/
485    
486     static BOOL
487     is_stdout_tty(void)
488     {
489     return FALSE;
490     }
491    
492    
493 nigel 53 #endif
494    
495    
496    
497 ph10 137 #ifndef HAVE_STRERROR
498 nigel 49 /*************************************************
499     * Provide strerror() for non-ANSI libraries *
500     *************************************************/
501    
502     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
503     in their libraries, but can provide the same facility by this simple
504     alternative function. */
505    
506     extern int sys_nerr;
507     extern char *sys_errlist[];
508    
509     char *
510     strerror(int n)
511     {
512     if (n < 0 || n >= sys_nerr) return "unknown error number";
513     return sys_errlist[n];
514     }
515     #endif /* HAVE_STRERROR */
516    
517    
518    
519     /*************************************************
520 nigel 93 * Find end of line *
521     *************************************************/
522    
523     /* The length of the endline sequence that is found is set via lenptr. This may
524     be zero at the very end of the file if there is no line-ending sequence there.
525    
526     Arguments:
527     p current position in line
528     endptr end of available data
529     lenptr where to put the length of the eol sequence
530    
531     Returns: pointer to the last byte of the line
532     */
533    
534     static char *
535     end_of_line(char *p, char *endptr, int *lenptr)
536     {
537     switch(endlinetype)
538     {
539     default: /* Just in case */
540     case EL_LF:
541     while (p < endptr && *p != '\n') p++;
542     if (p < endptr)
543     {
544     *lenptr = 1;
545     return p + 1;
546     }
547     *lenptr = 0;
548     return endptr;
549    
550     case EL_CR:
551     while (p < endptr && *p != '\r') p++;
552     if (p < endptr)
553     {
554     *lenptr = 1;
555     return p + 1;
556     }
557     *lenptr = 0;
558     return endptr;
559    
560     case EL_CRLF:
561     for (;;)
562     {
563     while (p < endptr && *p != '\r') p++;
564     if (++p >= endptr)
565     {
566     *lenptr = 0;
567     return endptr;
568     }
569     if (*p == '\n')
570     {
571     *lenptr = 2;
572     return p + 1;
573     }
574     }
575     break;
576    
577 ph10 149 case EL_ANYCRLF:
578     while (p < endptr)
579     {
580     int extra = 0;
581     register int c = *((unsigned char *)p);
582    
583     if (utf8 && c >= 0xc0)
584     {
585     int gcii, gcss;
586     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
587     gcss = 6*extra;
588     c = (c & utf8_table3[extra]) << gcss;
589     for (gcii = 1; gcii <= extra; gcii++)
590     {
591     gcss -= 6;
592     c |= (p[gcii] & 0x3f) << gcss;
593     }
594     }
595    
596     p += 1 + extra;
597    
598     switch (c)
599     {
600     case 0x0a: /* LF */
601     *lenptr = 1;
602     return p;
603    
604     case 0x0d: /* CR */
605     if (p < endptr && *p == 0x0a)
606     {
607     *lenptr = 2;
608     p++;
609     }
610     else *lenptr = 1;
611     return p;
612 ph10 150
613 ph10 149 default:
614     break;
615     }
616     } /* End of loop for ANYCRLF case */
617 ph10 150
618 ph10 149 *lenptr = 0; /* Must have hit the end */
619     return endptr;
620    
621 nigel 93 case EL_ANY:
622     while (p < endptr)
623     {
624     int extra = 0;
625     register int c = *((unsigned char *)p);
626    
627     if (utf8 && c >= 0xc0)
628     {
629     int gcii, gcss;
630     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
631     gcss = 6*extra;
632     c = (c & utf8_table3[extra]) << gcss;
633     for (gcii = 1; gcii <= extra; gcii++)
634     {
635     gcss -= 6;
636     c |= (p[gcii] & 0x3f) << gcss;
637     }
638     }
639    
640     p += 1 + extra;
641    
642     switch (c)
643     {
644     case 0x0a: /* LF */
645     case 0x0b: /* VT */
646     case 0x0c: /* FF */
647     *lenptr = 1;
648     return p;
649    
650     case 0x0d: /* CR */
651     if (p < endptr && *p == 0x0a)
652     {
653     *lenptr = 2;
654     p++;
655     }
656     else *lenptr = 1;
657     return p;
658    
659     case 0x85: /* NEL */
660     *lenptr = utf8? 2 : 1;
661     return p;
662    
663     case 0x2028: /* LS */
664     case 0x2029: /* PS */
665     *lenptr = 3;
666     return p;
667    
668     default:
669     break;
670     }
671     } /* End of loop for ANY case */
672    
673     *lenptr = 0; /* Must have hit the end */
674     return endptr;
675     } /* End of overall switch */
676     }
677    
678    
679    
680     /*************************************************
681     * Find start of previous line *
682     *************************************************/
683    
684     /* This is called when looking back for before lines to print.
685    
686     Arguments:
687     p start of the subsequent line
688     startptr start of available data
689    
690     Returns: pointer to the start of the previous line
691     */
692    
693     static char *
694     previous_line(char *p, char *startptr)
695     {
696     switch(endlinetype)
697     {
698     default: /* Just in case */
699     case EL_LF:
700     p--;
701     while (p > startptr && p[-1] != '\n') p--;
702     return p;
703    
704     case EL_CR:
705     p--;
706     while (p > startptr && p[-1] != '\n') p--;
707     return p;
708    
709     case EL_CRLF:
710     for (;;)
711     {
712     p -= 2;
713     while (p > startptr && p[-1] != '\n') p--;
714     if (p <= startptr + 1 || p[-2] == '\r') return p;
715     }
716     return p; /* But control should never get here */
717    
718     case EL_ANY:
719 ph10 150 case EL_ANYCRLF:
720 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
721     if (utf8) while ((*p & 0xc0) == 0x80) p--;
722    
723     while (p > startptr)
724     {
725     register int c;
726     char *pp = p - 1;
727    
728     if (utf8)
729     {
730     int extra = 0;
731     while ((*pp & 0xc0) == 0x80) pp--;
732     c = *((unsigned char *)pp);
733     if (c >= 0xc0)
734     {
735     int gcii, gcss;
736     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
737     gcss = 6*extra;
738     c = (c & utf8_table3[extra]) << gcss;
739     for (gcii = 1; gcii <= extra; gcii++)
740     {
741     gcss -= 6;
742     c |= (pp[gcii] & 0x3f) << gcss;
743     }
744     }
745     }
746     else c = *((unsigned char *)pp);
747    
748 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
749 nigel 93 {
750     case 0x0a: /* LF */
751 ph10 149 case 0x0d: /* CR */
752     return p;
753 ph10 150
754 ph10 149 default:
755     break;
756 ph10 150 }
757 ph10 149
758     else switch (c)
759     {
760     case 0x0a: /* LF */
761 nigel 93 case 0x0b: /* VT */
762     case 0x0c: /* FF */
763     case 0x0d: /* CR */
764     case 0x85: /* NEL */
765     case 0x2028: /* LS */
766     case 0x2029: /* PS */
767     return p;
768    
769     default:
770     break;
771     }
772    
773     p = pp; /* Back one character */
774     } /* End of loop for ANY case */
775    
776     return startptr; /* Hit start of data */
777     } /* End of overall switch */
778     }
779    
780    
781    
782    
783    
784     /*************************************************
785 nigel 77 * Print the previous "after" lines *
786 nigel 49 *************************************************/
787    
788 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
789 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
790     that a binary zero does not terminate it.
791 nigel 77
792     Arguments:
793     lastmatchnumber the number of the last matching line, plus one
794     lastmatchrestart where we restarted after the last match
795     endptr end of available data
796     printname filename for printing
797    
798     Returns: nothing
799     */
800    
801     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
802     char *endptr, char *printname)
803     {
804     if (after_context > 0 && lastmatchnumber > 0)
805     {
806     int count = 0;
807     while (lastmatchrestart < endptr && count++ < after_context)
808     {
809 nigel 93 int ellength;
810 nigel 77 char *pp = lastmatchrestart;
811     if (printname != NULL) fprintf(stdout, "%s-", printname);
812     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
813 nigel 93 pp = end_of_line(pp, endptr, &ellength);
814     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
815     lastmatchrestart = pp;
816 nigel 77 }
817     hyphenpending = TRUE;
818     }
819     }
820    
821    
822    
823     /*************************************************
824     * Grep an individual file *
825     *************************************************/
826    
827     /* This is called from grep_or_recurse() below. It uses a buffer that is three
828     times the value of MBUFTHIRD. The matching point is never allowed to stray into
829     the top third of the buffer, thus keeping more of the file available for
830     context printing or for multiline scanning. For large files, the pointer will
831     be in the middle third most of the time, so the bottom third is available for
832     "before" context printing.
833    
834     Arguments:
835 ph10 286 handle the fopened FILE stream for a normal file
836     the gzFile pointer when reading is via libz
837     the BZFILE pointer when reading is via libbz2
838     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
839 nigel 77 printname the file name if it is to be printed for each match
840     or NULL if the file name is not to be printed
841     it cannot be NULL if filenames[_nomatch]_only is set
842    
843     Returns: 0 if there was at least one match
844     1 otherwise (no matches)
845 ph10 286 2 if there is a read error on a .bz2 file
846 nigel 77 */
847    
848 nigel 49 static int
849 ph10 286 pcregrep(void *handle, int frtype, char *printname)
850 nigel 49 {
851     int rc = 1;
852 nigel 77 int linenumber = 1;
853     int lastmatchnumber = 0;
854 nigel 49 int count = 0;
855 ph10 280 int filepos = 0;
856 nigel 49 int offsets[99];
857 nigel 77 char *lastmatchrestart = NULL;
858     char buffer[3*MBUFTHIRD];
859     char *ptr = buffer;
860     char *endptr;
861     size_t bufflength;
862     BOOL endhyphenpending = FALSE;
863 ph10 286 FILE *in = NULL; /* Ensure initialized */
864 nigel 49
865 ph10 286 #ifdef SUPPORT_LIBZ
866     gzFile ingz = NULL;
867     #endif
868 nigel 77
869 ph10 286 #ifdef SUPPORT_LIBBZ2
870     BZFILE *inbz2 = NULL;
871     #endif
872    
873    
874     /* Do the first read into the start of the buffer and set up the pointer to end
875     of what we have. In the case of libz, a non-zipped .gz file will be read as a
876     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
877     fail. */
878    
879     #ifdef SUPPORT_LIBZ
880     if (frtype == FR_LIBZ)
881     {
882     ingz = (gzFile)handle;
883     bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
884     }
885     else
886     #endif
887    
888     #ifdef SUPPORT_LIBBZ2
889     if (frtype == FR_LIBBZ2)
890     {
891     inbz2 = (BZFILE *)handle;
892     bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
893     if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
894     } /* without the cast it is unsigned. */
895     else
896     #endif
897    
898     {
899     in = (FILE *)handle;
900     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
901     }
902    
903 nigel 77 endptr = buffer + bufflength;
904    
905     /* Loop while the current pointer is not at the end of the file. For large
906     files, endptr will be at the end of the buffer when we are in the middle of the
907     file, but ptr will never get there, because as soon as it gets over 2/3 of the
908     way, the buffer is shifted left and re-filled. */
909    
910     while (ptr < endptr)
911 nigel 49 {
912 nigel 93 int i, endlinelength;
913 nigel 87 int mrc = 0;
914 nigel 53 BOOL match = FALSE;
915 ph10 286 char *matchptr = ptr;
916 nigel 77 char *t = ptr;
917     size_t length, linelength;
918 nigel 49
919 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
920     of the subject string to pass to pcre_exec(). In multiline mode, it is the
921     length remainder of the data in the buffer. Otherwise, it is the length of
922     the next line. After matching, we always advance by the length of the next
923     line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
924     that any match is constrained to be in the first line. */
925    
926 nigel 93 t = end_of_line(t, endptr, &endlinelength);
927     linelength = t - ptr - endlinelength;
928 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
929 nigel 77
930 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
931    
932     #ifdef JFRIEDL_DEBUG
933     if (jfriedl_XT || jfriedl_XR)
934     {
935     #include <sys/time.h>
936     #include <time.h>
937     struct timeval start_time, end_time;
938     struct timezone dummy;
939    
940     if (jfriedl_XT)
941     {
942     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
943     const char *orig = ptr;
944     ptr = malloc(newlen + 1);
945     if (!ptr) {
946     printf("out of memory");
947     exit(2);
948     }
949     endptr = ptr;
950     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
951     for (i = 0; i < jfriedl_XT; i++) {
952     strncpy(endptr, orig, length);
953     endptr += length;
954     }
955     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
956     length = newlen;
957     }
958    
959     if (gettimeofday(&start_time, &dummy) != 0)
960     perror("bad gettimeofday");
961    
962    
963     for (i = 0; i < jfriedl_XR; i++)
964     match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
965    
966     if (gettimeofday(&end_time, &dummy) != 0)
967     perror("bad gettimeofday");
968    
969     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
970     -
971     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
972    
973     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
974     return 0;
975     }
976     #endif
977    
978 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
979 ph10 279 in order to find any further matches in the same line. */
980 nigel 89
981 ph10 286 ONLY_MATCHING_RESTART:
982    
983 nigel 77 /* Run through all the patterns until one matches. Note that we don't include
984     the final newline in the subject string. */
985    
986 nigel 87 for (i = 0; i < pattern_count; i++)
987 nigel 53 {
988 ph10 279 mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
989 nigel 87 offsets, 99);
990     if (mrc >= 0) { match = TRUE; break; }
991     if (mrc != PCRE_ERROR_NOMATCH)
992     {
993     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
994     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
995     fprintf(stderr, "this line:\n");
996 ph10 279 fwrite(matchptr, 1, linelength, stderr); /* In case binary zero included */
997 nigel 87 fprintf(stderr, "\n");
998     if (error_count == 0 &&
999     (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
1000     {
1001     fprintf(stderr, "pcregrep: error %d means that a resource limit "
1002     "was exceeded\n", mrc);
1003     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
1004     }
1005     if (error_count++ > 20)
1006     {
1007     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
1008     exit(2);
1009     }
1010     match = invert; /* No more matching; don't show the line again */
1011     break;
1012     }
1013 nigel 53 }
1014 nigel 49
1015 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1016 nigel 77
1017 nigel 49 if (match != invert)
1018     {
1019 nigel 77 BOOL hyphenprinted = FALSE;
1020    
1021 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1022 nigel 77
1023 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1024    
1025     /* Just count if just counting is wanted. */
1026    
1027 nigel 49 if (count_only) count++;
1028    
1029 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1030     in the file. */
1031    
1032     else if (filenames == FN_ONLY)
1033 nigel 49 {
1034 nigel 77 fprintf(stdout, "%s\n", printname);
1035 nigel 49 return 0;
1036     }
1037    
1038 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1039    
1040 nigel 77 else if (quiet) return 0;
1041 nigel 49
1042 nigel 87 /* The --only-matching option prints just the substring that matched, and
1043 ph10 286 the --file-offsets and --line-offsets options output offsets for the
1044 ph10 280 matching substring (they both force --only-matching). None of these options
1045     prints any context. Afterwards, adjust the start and length, and then jump
1046     back to look for further matches in the same line. If we are in invert
1047     mode, however, nothing is printed - this could be still useful because the
1048     return code is set. */
1049 nigel 87
1050     else if (only_matching)
1051     {
1052 ph10 279 if (!invert)
1053 ph10 286 {
1054 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1055     if (number) fprintf(stdout, "%d:", linenumber);
1056 ph10 280 if (line_offsets)
1057 ph10 357 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1058 ph10 286 offsets[1] - offsets[0]);
1059 ph10 280 else if (file_offsets)
1060 ph10 357 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1061 ph10 286 offsets[1] - offsets[0]);
1062     else
1063 ph10 280 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1064 ph10 279 fprintf(stdout, "\n");
1065     matchptr += offsets[1];
1066     length -= offsets[1];
1067 ph10 286 match = FALSE;
1068     goto ONLY_MATCHING_RESTART;
1069     }
1070 nigel 87 }
1071    
1072     /* This is the default case when none of the above options is set. We print
1073     the matching lines(s), possibly preceded and/or followed by other lines of
1074     context. */
1075    
1076 nigel 49 else
1077     {
1078 nigel 77 /* See if there is a requirement to print some "after" lines from a
1079     previous match. We never print any overlaps. */
1080    
1081     if (after_context > 0 && lastmatchnumber > 0)
1082     {
1083 nigel 93 int ellength;
1084 nigel 77 int linecount = 0;
1085     char *p = lastmatchrestart;
1086    
1087     while (p < ptr && linecount < after_context)
1088     {
1089 nigel 93 p = end_of_line(p, ptr, &ellength);
1090 nigel 77 linecount++;
1091     }
1092    
1093     /* It is important to advance lastmatchrestart during this printing so
1094 nigel 87 that it interacts correctly with any "before" printing below. Print
1095     each line's data using fwrite() in case there are binary zeroes. */
1096 nigel 77
1097     while (lastmatchrestart < p)
1098     {
1099     char *pp = lastmatchrestart;
1100     if (printname != NULL) fprintf(stdout, "%s-", printname);
1101     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1102 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1103     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1104     lastmatchrestart = pp;
1105 nigel 77 }
1106     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1107     }
1108    
1109     /* If there were non-contiguous lines printed above, insert hyphens. */
1110    
1111     if (hyphenpending)
1112     {
1113     fprintf(stdout, "--\n");
1114     hyphenpending = FALSE;
1115     hyphenprinted = TRUE;
1116     }
1117    
1118     /* See if there is a requirement to print some "before" lines for this
1119     match. Again, don't print overlaps. */
1120    
1121     if (before_context > 0)
1122     {
1123     int linecount = 0;
1124     char *p = ptr;
1125    
1126     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1127 nigel 87 linecount < before_context)
1128 nigel 77 {
1129 nigel 87 linecount++;
1130 nigel 93 p = previous_line(p, buffer);
1131 nigel 77 }
1132    
1133     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1134     fprintf(stdout, "--\n");
1135    
1136     while (p < ptr)
1137     {
1138 nigel 93 int ellength;
1139 nigel 77 char *pp = p;
1140     if (printname != NULL) fprintf(stdout, "%s-", printname);
1141     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1142 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1143     fwrite(p, 1, pp - p, stdout);
1144     p = pp;
1145 nigel 77 }
1146     }
1147    
1148     /* Now print the matching line(s); ensure we set hyphenpending at the end
1149 nigel 85 of the file if any context lines are being output. */
1150 nigel 77
1151 nigel 85 if (after_context > 0 || before_context > 0)
1152     endhyphenpending = TRUE;
1153    
1154 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1155 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1156 nigel 77
1157     /* In multiline mode, we want to print to the end of the line in which
1158     the end of the matched string is found, so we adjust linelength and the
1159 ph10 222 line number appropriately, but only when there actually was a match
1160     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1161     the match will always be before the first newline sequence. */
1162 nigel 77
1163     if (multiline)
1164     {
1165 nigel 93 int ellength;
1166 ph10 222 char *endmatch = ptr;
1167     if (!invert)
1168 nigel 93 {
1169 ph10 222 endmatch += offsets[1];
1170     t = ptr;
1171     while (t < endmatch)
1172     {
1173     t = end_of_line(t, endptr, &ellength);
1174     if (t <= endmatch) linenumber++; else break;
1175     }
1176 nigel 93 }
1177     endmatch = end_of_line(endmatch, endptr, &ellength);
1178     linelength = endmatch - ptr - ellength;
1179 nigel 77 }
1180    
1181 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1182     zeroes are treated as just another data character. */
1183    
1184     /* This extra option, for Jeffrey Friedl's debugging requirements,
1185     replaces the matched string, or a specific captured string if it exists,
1186     with X. When this happens, colouring is ignored. */
1187    
1188     #ifdef JFRIEDL_DEBUG
1189     if (S_arg >= 0 && S_arg < mrc)
1190     {
1191     int first = S_arg * 2;
1192     int last = first + 1;
1193     fwrite(ptr, 1, offsets[first], stdout);
1194     fprintf(stdout, "X");
1195     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1196     }
1197     else
1198     #endif
1199    
1200     /* We have to split the line(s) up if colouring. */
1201    
1202     if (do_colour)
1203     {
1204     fwrite(ptr, 1, offsets[0], stdout);
1205     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1206     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1207     fprintf(stdout, "%c[00m", 0x1b);
1208 ph10 243 fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1209 ph10 239 stdout);
1210 nigel 87 }
1211 nigel 93 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1212 nigel 49 }
1213    
1214 nigel 87 /* End of doing what has to be done for a match */
1215    
1216 nigel 77 rc = 0; /* Had some success */
1217    
1218     /* Remember where the last match happened for after_context. We remember
1219     where we are about to restart, and that line's number. */
1220    
1221 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1222 nigel 77 lastmatchnumber = linenumber + 1;
1223 nigel 49 }
1224 nigel 77
1225 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1226     anything to be printed), we have to move on to the end of the match before
1227     proceeding. */
1228    
1229     if (multiline && invert && match)
1230     {
1231     int ellength;
1232     char *endmatch = ptr + offsets[1];
1233     t = ptr;
1234     while (t < endmatch)
1235     {
1236     t = end_of_line(t, endptr, &ellength);
1237     if (t <= endmatch) linenumber++; else break;
1238     }
1239     endmatch = end_of_line(endmatch, endptr, &ellength);
1240     linelength = endmatch - ptr - ellength;
1241     }
1242    
1243 ph10 286 /* Advance to after the newline and increment the line number. The file
1244 ph10 280 offset to the current line is maintained in filepos. */
1245 nigel 77
1246 nigel 93 ptr += linelength + endlinelength;
1247 ph10 280 filepos += linelength + endlinelength;
1248 nigel 77 linenumber++;
1249    
1250     /* If we haven't yet reached the end of the file (the buffer is full), and
1251     the current point is in the top 1/3 of the buffer, slide the buffer down by
1252     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1253     about to be lost, print them. */
1254    
1255     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1256     {
1257     if (after_context > 0 &&
1258     lastmatchnumber > 0 &&
1259     lastmatchrestart < buffer + MBUFTHIRD)
1260     {
1261     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1262     lastmatchnumber = 0;
1263     }
1264    
1265     /* Now do the shuffle */
1266    
1267     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1268     ptr -= MBUFTHIRD;
1269 ph10 286
1270     #ifdef SUPPORT_LIBZ
1271     if (frtype == FR_LIBZ)
1272     bufflength = 2*MBUFTHIRD +
1273     gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1274     else
1275     #endif
1276    
1277     #ifdef SUPPORT_LIBBZ2
1278     if (frtype == FR_LIBBZ2)
1279     bufflength = 2*MBUFTHIRD +
1280     BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1281     else
1282     #endif
1283    
1284 nigel 77 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1285 ph10 286
1286 nigel 77 endptr = buffer + bufflength;
1287    
1288     /* Adjust any last match point */
1289    
1290     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1291     }
1292     } /* Loop through the whole file */
1293    
1294     /* End of file; print final "after" lines if wanted; do_after_lines sets
1295     hyphenpending if it prints something. */
1296    
1297 nigel 87 if (!only_matching && !count_only)
1298     {
1299     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1300     hyphenpending |= endhyphenpending;
1301     }
1302 nigel 77
1303     /* Print the file name if we are looking for those without matches and there
1304     were none. If we found a match, we won't have got this far. */
1305    
1306 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1307 nigel 77 {
1308     fprintf(stdout, "%s\n", printname);
1309     return 0;
1310 nigel 49 }
1311    
1312 nigel 77 /* Print the match count if wanted */
1313    
1314 nigel 49 if (count_only)
1315     {
1316 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1317 nigel 49 fprintf(stdout, "%d\n", count);
1318     }
1319    
1320     return rc;
1321     }
1322    
1323    
1324    
1325     /*************************************************
1326 nigel 53 * Grep a file or recurse into a directory *
1327     *************************************************/
1328    
1329 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1330     recursing; if it's a file, grep it.
1331    
1332     Arguments:
1333     pathname the path to investigate
1334 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1335 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1336    
1337     Returns: 0 if there was at least one match
1338     1 if there were no matches
1339     2 there was some kind of error
1340    
1341     However, file opening failures are suppressed if "silent" is set.
1342     */
1343    
1344 nigel 53 static int
1345 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1346 nigel 53 {
1347     int rc = 1;
1348     int sep;
1349 ph10 286 int frtype;
1350     int pathlen;
1351     void *handle;
1352     FILE *in = NULL; /* Ensure initialized */
1353 nigel 53
1354 ph10 286 #ifdef SUPPORT_LIBZ
1355     gzFile ingz = NULL;
1356     #endif
1357    
1358     #ifdef SUPPORT_LIBBZ2
1359     BZFILE *inbz2 = NULL;
1360     #endif
1361    
1362 nigel 77 /* If the file name is "-" we scan stdin */
1363 nigel 53
1364 nigel 77 if (strcmp(pathname, "-") == 0)
1365 nigel 53 {
1366 ph10 286 return pcregrep(stdin, FR_PLAIN,
1367 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1368 nigel 77 stdin_name : NULL);
1369     }
1370    
1371 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1372 ph10 325 each file and directory within it, subject to any include or exclude patterns
1373     that were set. The scanning code is localized so it can be made
1374     system-specific. */
1375 nigel 87
1376     if ((sep = isdirectory(pathname)) != 0)
1377 nigel 77 {
1378 nigel 87 if (dee_action == dee_SKIP) return 1;
1379     if (dee_action == dee_RECURSE)
1380 nigel 53 {
1381 nigel 87 char buffer[1024];
1382     char *nextfile;
1383     directory_type *dir = opendirectory(pathname);
1384 nigel 53
1385 nigel 87 if (dir == NULL)
1386     {
1387     if (!silent)
1388     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1389     strerror(errno));
1390     return 2;
1391     }
1392 nigel 77
1393 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1394     {
1395 ph10 324 int frc, nflen;
1396 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1397 ph10 324 nflen = strlen(nextfile);
1398 ph10 345
1399 ph10 325 if (isdirectory(buffer))
1400     {
1401     if (exclude_dir_compiled != NULL &&
1402     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1403     continue;
1404 ph10 345
1405 ph10 325 if (include_dir_compiled != NULL &&
1406     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1407     continue;
1408     }
1409 ph10 345 else
1410     {
1411 ph10 324 if (exclude_compiled != NULL &&
1412     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1413     continue;
1414 ph10 345
1415 ph10 324 if (include_compiled != NULL &&
1416     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1417     continue;
1418 ph10 345 }
1419 nigel 77
1420 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1421     if (frc > 1) rc = frc;
1422     else if (frc == 0 && rc == 1) rc = 0;
1423     }
1424    
1425     closedirectory(dir);
1426     return rc;
1427 nigel 53 }
1428     }
1429    
1430 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1431     been requested. */
1432 nigel 53
1433 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1434    
1435     /* Control reaches here if we have a regular file, or if we have a directory
1436     and recursion or skipping was not requested, or if we have anything else and
1437     skipping was not requested. The scan proceeds. If this is the first and only
1438     argument at top level, we don't show the file name, unless we are only showing
1439     the file name, or the filename was forced (-H). */
1440    
1441 ph10 286 pathlen = strlen(pathname);
1442    
1443     /* Open using zlib if it is supported and the file name ends with .gz. */
1444    
1445     #ifdef SUPPORT_LIBZ
1446     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1447 nigel 53 {
1448 ph10 286 ingz = gzopen(pathname, "rb");
1449     if (ingz == NULL)
1450     {
1451     if (!silent)
1452     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1453     strerror(errno));
1454     return 2;
1455     }
1456     handle = (void *)ingz;
1457     frtype = FR_LIBZ;
1458     }
1459     else
1460     #endif
1461    
1462     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1463    
1464     #ifdef SUPPORT_LIBBZ2
1465     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1466     {
1467     inbz2 = BZ2_bzopen(pathname, "rb");
1468     handle = (void *)inbz2;
1469     frtype = FR_LIBBZ2;
1470     }
1471     else
1472     #endif
1473    
1474     /* Otherwise use plain fopen(). The label is so that we can come back here if
1475     an attempt to read a .bz2 file indicates that it really is a plain file. */
1476    
1477     #ifdef SUPPORT_LIBBZ2
1478     PLAIN_FILE:
1479     #endif
1480     {
1481     in = fopen(pathname, "r");
1482     handle = (void *)in;
1483     frtype = FR_PLAIN;
1484     }
1485    
1486     /* All the opening methods return errno when they fail. */
1487    
1488     if (handle == NULL)
1489     {
1490 nigel 77 if (!silent)
1491     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1492     strerror(errno));
1493 nigel 53 return 2;
1494     }
1495    
1496 ph10 286 /* Now grep the file */
1497    
1498     rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1499 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1500 nigel 77
1501 ph10 286 /* Close in an appropriate manner. */
1502    
1503     #ifdef SUPPORT_LIBZ
1504     if (frtype == FR_LIBZ)
1505     gzclose(ingz);
1506     else
1507     #endif
1508    
1509     /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1510     read failed. If the error indicates that the file isn't in fact bzipped, try
1511     again as a normal file. */
1512    
1513     #ifdef SUPPORT_LIBBZ2
1514     if (frtype == FR_LIBBZ2)
1515     {
1516     if (rc == 2)
1517     {
1518     int errnum;
1519     const char *err = BZ2_bzerror(inbz2, &errnum);
1520     if (errnum == BZ_DATA_ERROR_MAGIC)
1521     {
1522     BZ2_bzclose(inbz2);
1523     goto PLAIN_FILE;
1524     }
1525     else if (!silent)
1526     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1527     pathname, err);
1528     }
1529     BZ2_bzclose(inbz2);
1530     }
1531     else
1532     #endif
1533    
1534     /* Normal file close */
1535    
1536 nigel 53 fclose(in);
1537 ph10 286
1538     /* Pass back the yield from pcregrep(). */
1539    
1540 nigel 53 return rc;
1541     }
1542    
1543    
1544    
1545    
1546     /*************************************************
1547 nigel 49 * Usage function *
1548     *************************************************/
1549    
1550     static int
1551     usage(int rc)
1552     {
1553 nigel 87 option_item *op;
1554     fprintf(stderr, "Usage: pcregrep [-");
1555     for (op = optionlist; op->one_char != 0; op++)
1556     {
1557     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1558     }
1559     fprintf(stderr, "] [long options] [pattern] [files]\n");
1560 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1561     "options.\n");
1562 nigel 49 return rc;
1563     }
1564    
1565    
1566    
1567    
1568     /*************************************************
1569 nigel 53 * Help function *
1570     *************************************************/
1571    
1572     static void
1573     help(void)
1574     {
1575     option_item *op;
1576    
1577 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1578 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1579 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1580 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1581    
1582     #ifdef SUPPORT_LIBZ
1583     printf("Files whose names end in .gz are read using zlib.\n");
1584     #endif
1585    
1586     #ifdef SUPPORT_LIBBZ2
1587     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1588     #endif
1589    
1590     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1591     printf("Other files and the standard input are read as plain files.\n\n");
1592     #else
1593     printf("All files are read as plain files, without any interpretation.\n\n");
1594     #endif
1595    
1596 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1597     printf("Options:\n");
1598    
1599     for (op = optionlist; op->one_char != 0; op++)
1600     {
1601     int n;
1602     char s[4];
1603     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1604 ph10 296 n = 30 - printf(" %s --%s", s, op->long_name);
1605 nigel 53 if (n < 1) n = 1;
1606     printf("%.*s%s\n", n, " ", op->help_text);
1607     }
1608    
1609 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1610     printf("trailing white space is removed and blank lines are ignored.\n");
1611     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1612 nigel 53
1613 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1614 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1615     }
1616    
1617    
1618    
1619    
1620     /*************************************************
1621 nigel 77 * Handle a single-letter, no data option *
1622 nigel 53 *************************************************/
1623    
1624     static int
1625     handle_option(int letter, int options)
1626     {
1627     switch(letter)
1628     {
1629 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1630 nigel 87 case N_HELP: help(); exit(0);
1631 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1632 nigel 53 case 'c': count_only = TRUE; break;
1633 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1634     case 'H': filenames = FN_FORCE; break;
1635     case 'h': filenames = FN_NONE; break;
1636 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1637 nigel 87 case 'l': filenames = FN_ONLY; break;
1638     case 'L': filenames = FN_NOMATCH_ONLY; break;
1639 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1640 nigel 53 case 'n': number = TRUE; break;
1641 nigel 87 case 'o': only_matching = TRUE; break;
1642 nigel 77 case 'q': quiet = TRUE; break;
1643 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1644 nigel 53 case 's': silent = TRUE; break;
1645 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1646 nigel 53 case 'v': invert = TRUE; break;
1647 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1648     case 'x': process_options |= PO_LINE_MATCH; break;
1649 nigel 53
1650     case 'V':
1651 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1652 nigel 53 exit(0);
1653     break;
1654    
1655     default:
1656     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1657     exit(usage(2));
1658     }
1659    
1660     return options;
1661     }
1662    
1663    
1664    
1665    
1666     /*************************************************
1667 nigel 87 * Construct printed ordinal *
1668     *************************************************/
1669    
1670     /* This turns a number into "1st", "3rd", etc. */
1671    
1672     static char *
1673     ordin(int n)
1674     {
1675     static char buffer[8];
1676     char *p = buffer;
1677     sprintf(p, "%d", n);
1678     while (*p != 0) p++;
1679     switch (n%10)
1680     {
1681     case 1: strcpy(p, "st"); break;
1682     case 2: strcpy(p, "nd"); break;
1683     case 3: strcpy(p, "rd"); break;
1684     default: strcpy(p, "th"); break;
1685     }
1686     return buffer;
1687     }
1688    
1689    
1690    
1691     /*************************************************
1692     * Compile a single pattern *
1693     *************************************************/
1694    
1695     /* When the -F option has been used, this is called for each substring.
1696     Otherwise it's called for each supplied pattern.
1697    
1698     Arguments:
1699     pattern the pattern string
1700     options the PCRE options
1701     filename the file name, or NULL for a command-line pattern
1702     count 0 if this is the only command line pattern, or
1703     number of the command line pattern, or
1704     linenumber for a pattern from a file
1705    
1706     Returns: TRUE on success, FALSE after an error
1707     */
1708    
1709     static BOOL
1710     compile_single_pattern(char *pattern, int options, char *filename, int count)
1711     {
1712     char buffer[MBUFTHIRD + 16];
1713     const char *error;
1714     int errptr;
1715    
1716     if (pattern_count >= MAX_PATTERN_COUNT)
1717     {
1718     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1719     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1720     return FALSE;
1721     }
1722    
1723     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1724     suffix[process_options]);
1725     pattern_list[pattern_count] =
1726     pcre_compile(buffer, options, &error, &errptr, pcretables);
1727 ph10 142 if (pattern_list[pattern_count] != NULL)
1728 ph10 141 {
1729 ph10 142 pattern_count++;
1730 ph10 141 return TRUE;
1731 ph10 142 }
1732 nigel 87
1733     /* Handle compile errors */
1734    
1735     errptr -= (int)strlen(prefix[process_options]);
1736     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1737    
1738     if (filename == NULL)
1739     {
1740     if (count == 0)
1741     fprintf(stderr, "pcregrep: Error in command-line regex "
1742     "at offset %d: %s\n", errptr, error);
1743     else
1744     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1745     "at offset %d: %s\n", ordin(count), errptr, error);
1746     }
1747     else
1748     {
1749     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1750     "at offset %d: %s\n", count, filename, errptr, error);
1751     }
1752    
1753     return FALSE;
1754     }
1755    
1756    
1757    
1758     /*************************************************
1759     * Compile one supplied pattern *
1760     *************************************************/
1761    
1762     /* When the -F option has been used, each string may be a list of strings,
1763 nigel 91 separated by line breaks. They will be matched literally.
1764 nigel 87
1765     Arguments:
1766     pattern the pattern string
1767     options the PCRE options
1768     filename the file name, or NULL for a command-line pattern
1769     count 0 if this is the only command line pattern, or
1770     number of the command line pattern, or
1771     linenumber for a pattern from a file
1772    
1773     Returns: TRUE on success, FALSE after an error
1774     */
1775    
1776     static BOOL
1777     compile_pattern(char *pattern, int options, char *filename, int count)
1778     {
1779     if ((process_options & PO_FIXED_STRINGS) != 0)
1780     {
1781 nigel 93 char *eop = pattern + strlen(pattern);
1782 nigel 87 char buffer[MBUFTHIRD];
1783     for(;;)
1784     {
1785 nigel 93 int ellength;
1786     char *p = end_of_line(pattern, eop, &ellength);
1787     if (ellength == 0)
1788 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1789 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1790 nigel 93 pattern = p;
1791 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1792     return FALSE;
1793     }
1794     }
1795     else return compile_single_pattern(pattern, options, filename, count);
1796     }
1797    
1798    
1799    
1800     /*************************************************
1801 nigel 49 * Main program *
1802     *************************************************/
1803    
1804 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1805    
1806 nigel 49 int
1807     main(int argc, char **argv)
1808     {
1809 nigel 53 int i, j;
1810 nigel 49 int rc = 1;
1811 nigel 87 int pcre_options = 0;
1812     int cmd_pattern_count = 0;
1813 ph10 141 int hint_count = 0;
1814 nigel 49 int errptr;
1815 nigel 87 BOOL only_one_at_top;
1816     char *patterns[MAX_PATTERN_COUNT];
1817     const char *locale_from = "--locale";
1818 nigel 49 const char *error;
1819    
1820 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1821     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1822     */
1823 nigel 91
1824     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1825     switch(i)
1826     {
1827     default: newline = (char *)"lf"; break;
1828     case '\r': newline = (char *)"cr"; break;
1829     case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1830 nigel 93 case -1: newline = (char *)"any"; break;
1831 ph10 150 case -2: newline = (char *)"anycrlf"; break;
1832 nigel 91 }
1833    
1834 nigel 49 /* Process the options */
1835    
1836     for (i = 1; i < argc; i++)
1837     {
1838 nigel 77 option_item *op = NULL;
1839     char *option_data = (char *)""; /* default to keep compiler happy */
1840     BOOL longop;
1841     BOOL longopwasequals = FALSE;
1842    
1843 nigel 49 if (argv[i][0] != '-') break;
1844 nigel 53
1845 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1846 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1847 nigel 63
1848 nigel 77 if (argv[i][1] == 0)
1849     {
1850 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1851 nigel 77 else exit(usage(2));
1852     }
1853 nigel 63
1854 nigel 77 /* Handle a long name option, or -- to terminate the options */
1855 nigel 53
1856     if (argv[i][1] == '-')
1857 nigel 49 {
1858 nigel 77 char *arg = argv[i] + 2;
1859     char *argequals = strchr(arg, '=');
1860 nigel 53
1861 nigel 77 if (*arg == 0) /* -- terminates options */
1862 nigel 49 {
1863 nigel 77 i++;
1864     break; /* out of the options-handling loop */
1865 nigel 53 }
1866 nigel 49
1867 nigel 77 longop = TRUE;
1868    
1869     /* Some long options have data that follows after =, for example file=name.
1870     Some options have variations in the long name spelling: specifically, we
1871     allow "regexp" because GNU grep allows it, though I personally go along
1872 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1873     These options are entered in the table as "regex(p)". No option is in both
1874     these categories, fortunately. */
1875 nigel 77
1876 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1877     {
1878 nigel 77 char *opbra = strchr(op->long_name, '(');
1879     char *equals = strchr(op->long_name, '=');
1880     if (opbra == NULL) /* Not a (p) case */
1881 nigel 53 {
1882 nigel 77 if (equals == NULL) /* Not thing=data case */
1883     {
1884     if (strcmp(arg, op->long_name) == 0) break;
1885     }
1886     else /* Special case xxx=data */
1887     {
1888     int oplen = equals - op->long_name;
1889 ph10 199 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1890 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1891     {
1892     option_data = arg + arglen;
1893     if (*option_data == '=')
1894     {
1895     option_data++;
1896     longopwasequals = TRUE;
1897     }
1898     break;
1899     }
1900     }
1901 nigel 53 }
1902 nigel 77 else /* Special case xxxx(p) */
1903     {
1904     char buff1[24];
1905     char buff2[24];
1906     int baselen = opbra - op->long_name;
1907     sprintf(buff1, "%.*s", baselen, op->long_name);
1908 ph10 152 sprintf(buff2, "%s%.*s", buff1,
1909 ph10 151 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1910 nigel 77 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1911     break;
1912     }
1913 nigel 53 }
1914 nigel 77
1915 nigel 53 if (op->one_char == 0)
1916     {
1917     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1918     exit(usage(2));
1919     }
1920     }
1921 nigel 49
1922 nigel 89
1923     /* Jeffrey Friedl's debugging harness uses these additional options which
1924     are not in the right form for putting in the option table because they use
1925     only one hyphen, yet are more than one character long. By putting them
1926     separately here, they will not get displayed as part of the help() output,
1927     but I don't think Jeffrey will care about that. */
1928    
1929     #ifdef JFRIEDL_DEBUG
1930     else if (strcmp(argv[i], "-pre") == 0) {
1931     jfriedl_prefix = argv[++i];
1932     continue;
1933     } else if (strcmp(argv[i], "-post") == 0) {
1934     jfriedl_postfix = argv[++i];
1935     continue;
1936     } else if (strcmp(argv[i], "-XT") == 0) {
1937     sscanf(argv[++i], "%d", &jfriedl_XT);
1938     continue;
1939     } else if (strcmp(argv[i], "-XR") == 0) {
1940     sscanf(argv[++i], "%d", &jfriedl_XR);
1941     continue;
1942     }
1943     #endif
1944    
1945    
1946 nigel 77 /* One-char options; many that have no data may be in a single argument; we
1947     continue till we hit the last one or one that needs data. */
1948 nigel 53
1949     else
1950     {
1951     char *s = argv[i] + 1;
1952 nigel 77 longop = FALSE;
1953 nigel 53 while (*s != 0)
1954     {
1955 nigel 77 for (op = optionlist; op->one_char != 0; op++)
1956     { if (*s == op->one_char) break; }
1957     if (op->one_char == 0)
1958 nigel 53 {
1959 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1960     *s, argv[i]);
1961     exit(usage(2));
1962     }
1963     if (op->type != OP_NODATA || s[1] == 0)
1964     {
1965     option_data = s+1;
1966 nigel 53 break;
1967     }
1968 nigel 87 pcre_options = handle_option(*s++, pcre_options);
1969 nigel 49 }
1970     }
1971 nigel 77
1972 nigel 87 /* At this point we should have op pointing to a matched option. If the type
1973     is NO_DATA, it means that there is no data, and the option might set
1974     something in the PCRE options. */
1975 nigel 77
1976     if (op->type == OP_NODATA)
1977     {
1978 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
1979     continue;
1980     }
1981    
1982     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1983     either has a value or defaults to something. It cannot have data in a
1984     separate item. At the moment, the only such options are "colo(u)r" and
1985 nigel 89 Jeffrey Friedl's special -S debugging option. */
1986 nigel 87
1987     if (*option_data == 0 &&
1988     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1989     {
1990     switch (op->one_char)
1991 nigel 77 {
1992 nigel 87 case N_COLOUR:
1993     colour_option = (char *)"auto";
1994     break;
1995     #ifdef JFRIEDL_DEBUG
1996     case 'S':
1997     S_arg = 0;
1998     break;
1999     #endif
2000 nigel 77 }
2001 nigel 87 continue;
2002     }
2003 nigel 77
2004 nigel 87 /* Otherwise, find the data string for the option. */
2005    
2006     if (*option_data == 0)
2007     {
2008     if (i >= argc - 1 || longopwasequals)
2009 nigel 77 {
2010 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2011     exit(usage(2));
2012     }
2013     option_data = argv[++i];
2014     }
2015    
2016     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2017     multiple times to create a list of patterns. */
2018    
2019     if (op->type == OP_PATLIST)
2020     {
2021     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2022     {
2023     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2024     MAX_PATTERN_COUNT);
2025     return 2;
2026     }
2027     patterns[cmd_pattern_count++] = option_data;
2028     }
2029    
2030     /* Otherwise, deal with single string or numeric data values. */
2031    
2032     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2033     {
2034     *((char **)op->dataptr) = option_data;
2035     }
2036     else
2037     {
2038     char *endptr;
2039     int n = strtoul(option_data, &endptr, 10);
2040     if (*endptr != 0)
2041     {
2042     if (longop)
2043 nigel 77 {
2044 nigel 87 char *equals = strchr(op->long_name, '=');
2045     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2046     equals - op->long_name;
2047     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2048     option_data, nlen, op->long_name);
2049 nigel 77 }
2050 nigel 87 else
2051     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2052     option_data, op->one_char);
2053     exit(usage(2));
2054 nigel 77 }
2055 nigel 87 *((int *)op->dataptr) = n;
2056 nigel 77 }
2057 nigel 49 }
2058    
2059 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2060     for -A and -B. */
2061    
2062     if (both_context > 0)
2063     {
2064     if (after_context == 0) after_context = both_context;
2065     if (before_context == 0) before_context = both_context;
2066     }
2067 ph10 286
2068     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2069 ph10 280 However, the latter two set the only_matching flag. */
2070 nigel 77
2071 ph10 280 if ((only_matching && (file_offsets || line_offsets)) ||
2072 ph10 286 (file_offsets && line_offsets))
2073 ph10 280 {
2074     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2075     "and/or --line-offsets\n");
2076     exit(usage(2));
2077     }
2078    
2079 ph10 286 if (file_offsets || line_offsets) only_matching = TRUE;
2080    
2081 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2082     LC_ALL environment variable is set, and if so, use it. */
2083 nigel 49
2084 nigel 87 if (locale == NULL)
2085 nigel 53 {
2086 nigel 87 locale = getenv("LC_ALL");
2087     locale_from = "LCC_ALL";
2088 nigel 53 }
2089 nigel 49
2090 nigel 87 if (locale == NULL)
2091     {
2092     locale = getenv("LC_CTYPE");
2093     locale_from = "LC_CTYPE";
2094     }
2095 nigel 49
2096 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2097     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2098    
2099     if (locale != NULL)
2100 nigel 49 {
2101 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2102 nigel 53 {
2103 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2104     locale, locale_from);
2105 nigel 53 return 2;
2106     }
2107 nigel 87 pcretables = pcre_maketables();
2108     }
2109 nigel 77
2110 nigel 87 /* Sort out colouring */
2111    
2112     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2113     {
2114     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2115     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2116     else
2117 nigel 53 {
2118 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2119     colour_option);
2120     return 2;
2121 nigel 77 }
2122 nigel 87 if (do_colour)
2123 nigel 77 {
2124 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2125     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2126     if (cs != NULL) colour_string = cs;
2127 nigel 77 }
2128 nigel 87 }
2129 nigel 77
2130 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2131    
2132     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2133     {
2134     pcre_options |= PCRE_NEWLINE_CR;
2135 nigel 93 endlinetype = EL_CR;
2136 nigel 91 }
2137     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2138     {
2139     pcre_options |= PCRE_NEWLINE_LF;
2140 nigel 93 endlinetype = EL_LF;
2141 nigel 91 }
2142     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2143     {
2144     pcre_options |= PCRE_NEWLINE_CRLF;
2145 nigel 93 endlinetype = EL_CRLF;
2146 nigel 91 }
2147 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2148     {
2149     pcre_options |= PCRE_NEWLINE_ANY;
2150     endlinetype = EL_ANY;
2151     }
2152 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2153     {
2154     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2155     endlinetype = EL_ANYCRLF;
2156     }
2157 nigel 91 else
2158     {
2159     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2160     return 2;
2161     }
2162    
2163 nigel 87 /* Interpret the text values for -d and -D */
2164    
2165     if (dee_option != NULL)
2166     {
2167     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2168     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2169     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2170     else
2171 nigel 77 {
2172 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2173     return 2;
2174 nigel 53 }
2175 nigel 49 }
2176    
2177 nigel 87 if (DEE_option != NULL)
2178     {
2179     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2180     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2181     else
2182     {
2183     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2184     return 2;
2185     }
2186     }
2187 nigel 49
2188 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2189 nigel 87
2190     #ifdef JFRIEDL_DEBUG
2191     if (S_arg > 9)
2192 nigel 49 {
2193 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2194     return 2;
2195     }
2196 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2197     {
2198     if (jfriedl_XT == 0) jfriedl_XT = 1;
2199     if (jfriedl_XR == 0) jfriedl_XR = 1;
2200     }
2201 nigel 87 #endif
2202 nigel 77
2203 nigel 87 /* Get memory to store the pattern and hints lists. */
2204    
2205     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2206     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2207    
2208     if (pattern_list == NULL || hints_list == NULL)
2209     {
2210     fprintf(stderr, "pcregrep: malloc failed\n");
2211 ph10 123 goto EXIT2;
2212 nigel 87 }
2213    
2214     /* If no patterns were provided by -e, and there is no file provided by -f,
2215     the first argument is the one and only pattern, and it must exist. */
2216    
2217     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2218     {
2219 nigel 63 if (i >= argc) return usage(2);
2220 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2221     }
2222 nigel 77
2223 nigel 87 /* Compile the patterns that were provided on the command line, either by
2224     multiple uses of -e or as a single unkeyed pattern. */
2225    
2226     for (j = 0; j < cmd_pattern_count; j++)
2227     {
2228     if (!compile_pattern(patterns[j], pcre_options, NULL,
2229     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2230 ph10 123 goto EXIT2;
2231 nigel 87 }
2232    
2233     /* Compile the regular expressions that are provided in a file. */
2234    
2235     if (pattern_filename != NULL)
2236     {
2237     int linenumber = 0;
2238     FILE *f;
2239     char *filename;
2240     char buffer[MBUFTHIRD];
2241    
2242     if (strcmp(pattern_filename, "-") == 0)
2243 nigel 77 {
2244 nigel 87 f = stdin;
2245     filename = stdin_name;
2246 nigel 77 }
2247 nigel 87 else
2248 nigel 77 {
2249 nigel 87 f = fopen(pattern_filename, "r");
2250     if (f == NULL)
2251     {
2252     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2253     strerror(errno));
2254 ph10 123 goto EXIT2;
2255 nigel 87 }
2256     filename = pattern_filename;
2257 nigel 77 }
2258    
2259 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2260 nigel 53 {
2261 nigel 87 char *s = buffer + (int)strlen(buffer);
2262     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2263     *s = 0;
2264     linenumber++;
2265     if (buffer[0] == 0) continue; /* Skip blank lines */
2266     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2267 ph10 121 goto EXIT2;
2268 nigel 53 }
2269 nigel 87
2270     if (f != stdin) fclose(f);
2271 nigel 49 }
2272    
2273 nigel 77 /* Study the regular expressions, as we will be running them many times */
2274 nigel 53
2275     for (j = 0; j < pattern_count; j++)
2276     {
2277     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2278     if (error != NULL)
2279     {
2280     char s[16];
2281     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2282     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2283 ph10 121 goto EXIT2;
2284 nigel 53 }
2285 ph10 142 hint_count++;
2286 nigel 53 }
2287    
2288 nigel 77 /* If there are include or exclude patterns, compile them. */
2289    
2290     if (exclude_pattern != NULL)
2291     {
2292 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2293     pcretables);
2294 nigel 77 if (exclude_compiled == NULL)
2295     {
2296     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2297     errptr, error);
2298 ph10 121 goto EXIT2;
2299 nigel 77 }
2300     }
2301    
2302     if (include_pattern != NULL)
2303     {
2304 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2305     pcretables);
2306 nigel 77 if (include_compiled == NULL)
2307     {
2308     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2309     errptr, error);
2310 ph10 121 goto EXIT2;
2311 nigel 77 }
2312     }
2313    
2314 ph10 325 if (exclude_dir_pattern != NULL)
2315     {
2316     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2317     pcretables);
2318     if (exclude_dir_compiled == NULL)
2319     {
2320     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2321     errptr, error);
2322     goto EXIT2;
2323     }
2324     }
2325    
2326     if (include_dir_pattern != NULL)
2327     {
2328     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2329     pcretables);
2330     if (include_dir_compiled == NULL)
2331     {
2332     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2333     errptr, error);
2334     goto EXIT2;
2335     }
2336     }
2337    
2338 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2339 nigel 49
2340 nigel 87 if (i >= argc)
2341 ph10 121 {
2342 ph10 286 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2343 ph10 121 goto EXIT;
2344 ph10 123 }
2345 nigel 49
2346 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2347     Pass in the fact that there is only one argument at top level - this suppresses
2348 nigel 87 the file name if the argument is not a directory and filenames are not
2349     otherwise forced. */
2350 nigel 49
2351 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2352 nigel 49
2353     for (; i < argc; i++)
2354     {
2355 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2356     only_one_at_top);
2357 nigel 77 if (frc > 1) rc = frc;
2358     else if (frc == 0 && rc == 1) rc = 0;
2359 nigel 49 }
2360    
2361 ph10 121 EXIT:
2362     if (pattern_list != NULL)
2363     {
2364 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2365 ph10 121 free(pattern_list);
2366 ph10 123 }
2367 ph10 121 if (hints_list != NULL)
2368     {
2369 ph10 141 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2370 ph10 121 free(hints_list);
2371 ph10 123 }
2372 nigel 49 return rc;
2373 ph10 121
2374     EXIT2:
2375     rc = 2;
2376     goto EXIT;
2377 nigel 49 }
2378    
2379 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12