/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Contents of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 378 - (hide annotations) (download)
Sun Mar 1 14:13:34 2009 UTC (5 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 68901 byte(s)
Make pcregrep with --colour show all matches in a line in colour.

1 nigel 49 /*************************************************
2     * pcregrep program *
3     *************************************************/
4    
5     /* This is a grep program that uses the PCRE regular expression library to do
6 nigel 63 its pattern matching. On a Unix or Win32 system it can recurse into
7 nigel 75 directories.
8 nigel 49
9 ph10 377 Copyright (c) 1997-2009 University of Cambridge
10 nigel 75
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40 ph10 97 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 97 #endif
43    
44 nigel 53 #include <ctype.h>
45 nigel 87 #include <locale.h>
46 nigel 49 #include <stdio.h>
47     #include <string.h>
48     #include <stdlib.h>
49     #include <errno.h>
50 nigel 77
51     #include <sys/types.h>
52     #include <sys/stat.h>
53 ph10 199
54 ph10 137 #ifdef HAVE_UNISTD_H
55 ph10 199 #include <unistd.h>
56 ph10 137 #endif
57 nigel 77
58 ph10 286 #ifdef SUPPORT_LIBZ
59     #include <zlib.h>
60     #endif
61    
62     #ifdef SUPPORT_LIBBZ2
63     #include <bzlib.h>
64     #endif
65    
66 ph10 236 #include "pcre.h"
67 nigel 49
68     #define FALSE 0
69     #define TRUE 1
70    
71     typedef int BOOL;
72    
73 nigel 53 #define MAX_PATTERN_COUNT 100
74 ph10 378 #define OFFSET_SIZE 99
75 nigel 49
76 nigel 77 #if BUFSIZ > 8192
77     #define MBUFTHIRD BUFSIZ
78     #else
79     #define MBUFTHIRD 8192
80     #endif
81 nigel 49
82 nigel 87 /* Values for the "filenames" variable, which specifies options for file name
83     output. The order is important; it is assumed that a file name is wanted for
84     all values greater than FN_DEFAULT. */
85 nigel 77
86 nigel 87 enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
87    
88 ph10 286 /* File reading styles */
89    
90     enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
91    
92 nigel 87 /* Actions for the -d and -D options */
93    
94     enum { dee_READ, dee_SKIP, dee_RECURSE };
95     enum { DEE_READ, DEE_SKIP };
96    
97     /* Actions for special processing options (flag bits) */
98    
99     #define PO_WORD_MATCH 0x0001
100     #define PO_LINE_MATCH 0x0002
101     #define PO_FIXED_STRINGS 0x0004
102    
103 nigel 93 /* Line ending types */
104 nigel 87
105 ph10 149 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
106 nigel 87
107 nigel 93
108    
109 nigel 49 /*************************************************
110     * Global variables *
111     *************************************************/
112    
113 nigel 87 /* Jeffrey Friedl has some debugging requirements that are not part of the
114     regular code. */
115    
116     #ifdef JFRIEDL_DEBUG
117     static int S_arg = -1;
118 nigel 89 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
119     static unsigned int jfriedl_XT = 0; /* replicate text this many times */
120     static const char *jfriedl_prefix = "";
121     static const char *jfriedl_postfix = "";
122 nigel 87 #endif
123    
124 nigel 93 static int endlinetype;
125 nigel 91
126 nigel 87 static char *colour_string = (char *)"1;31";
127     static char *colour_option = NULL;
128     static char *dee_option = NULL;
129     static char *DEE_option = NULL;
130 nigel 91 static char *newline = NULL;
131 nigel 53 static char *pattern_filename = NULL;
132 nigel 77 static char *stdin_name = (char *)"(standard input)";
133 nigel 87 static char *locale = NULL;
134    
135     static const unsigned char *pcretables = NULL;
136    
137 nigel 53 static int pattern_count = 0;
138 ph10 121 static pcre **pattern_list = NULL;
139     static pcre_extra **hints_list = NULL;
140 nigel 49
141 nigel 77 static char *include_pattern = NULL;
142     static char *exclude_pattern = NULL;
143 ph10 325 static char *include_dir_pattern = NULL;
144     static char *exclude_dir_pattern = NULL;
145 nigel 77
146     static pcre *include_compiled = NULL;
147     static pcre *exclude_compiled = NULL;
148 ph10 325 static pcre *include_dir_compiled = NULL;
149     static pcre *exclude_dir_compiled = NULL;
150 nigel 77
151     static int after_context = 0;
152     static int before_context = 0;
153     static int both_context = 0;
154 nigel 87 static int dee_action = dee_READ;
155     static int DEE_action = DEE_READ;
156     static int error_count = 0;
157     static int filenames = FN_DEFAULT;
158     static int process_options = 0;
159 nigel 77
160 nigel 49 static BOOL count_only = FALSE;
161 nigel 87 static BOOL do_colour = FALSE;
162 ph10 280 static BOOL file_offsets = FALSE;
163 nigel 77 static BOOL hyphenpending = FALSE;
164 nigel 49 static BOOL invert = FALSE;
165 ph10 280 static BOOL line_offsets = FALSE;
166 nigel 77 static BOOL multiline = FALSE;
167 nigel 49 static BOOL number = FALSE;
168 nigel 87 static BOOL only_matching = FALSE;
169 nigel 77 static BOOL quiet = FALSE;
170 nigel 49 static BOOL silent = FALSE;
171 nigel 93 static BOOL utf8 = FALSE;
172 nigel 49
173 nigel 53 /* Structure for options and list of them */
174 nigel 49
175 nigel 87 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
176     OP_PATLIST };
177 nigel 77
178 nigel 53 typedef struct option_item {
179 nigel 77 int type;
180 nigel 53 int one_char;
181 nigel 77 void *dataptr;
182 nigel 67 const char *long_name;
183     const char *help_text;
184 nigel 53 } option_item;
185 nigel 49
186 nigel 87 /* Options without a single-letter equivalent get a negative value. This can be
187     used to identify them. */
188    
189 ph10 325 #define N_COLOUR (-1)
190     #define N_EXCLUDE (-2)
191     #define N_EXCLUDE_DIR (-3)
192     #define N_HELP (-4)
193     #define N_INCLUDE (-5)
194     #define N_INCLUDE_DIR (-6)
195     #define N_LABEL (-7)
196     #define N_LOCALE (-8)
197     #define N_NULL (-9)
198     #define N_LOFFSETS (-10)
199     #define N_FOFFSETS (-11)
200 nigel 87
201 nigel 53 static option_item optionlist[] = {
202 nigel 87 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
203     { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
204     { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
205     { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
206     { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
207     { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
208     { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
209     { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
210     { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
211     { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
212     { OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
213     { OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
214     { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
215 ph10 280 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
216 nigel 87 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
217     { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
218     { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
219     { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
220     { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
221     { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
222 ph10 280 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
223 nigel 87 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
224     { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
225 ph10 280 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
226 nigel 87 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
227     { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
228     { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
229     { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
230     { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
231     { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
232 ph10 325 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
233     { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
234 nigel 87 #ifdef JFRIEDL_DEBUG
235     { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
236     #endif
237     { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
238     { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
239     { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
240     { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
241     { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
242     { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
243     { OP_NODATA, 0, NULL, NULL, NULL }
244 nigel 53 };
245    
246 nigel 87 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
247     options. These set the 1, 2, and 4 bits in process_options, respectively. Note
248     that the combination of -w and -x has the same effect as -x on its own, so we
249     can treat them as the same. */
250 nigel 53
251 nigel 87 static const char *prefix[] = {
252     "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
253    
254     static const char *suffix[] = {
255     "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
256    
257 ph10 149 /* UTF-8 tables - used only when the newline setting is "any". */
258 nigel 87
259 nigel 93 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
260 nigel 87
261 nigel 93 const char utf8_table4[] = {
262     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
263     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
264     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
265     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
266    
267    
268    
269 nigel 53 /*************************************************
270 nigel 87 * OS-specific functions *
271 nigel 53 *************************************************/
272    
273     /* These functions are defined so that they can be made system specific,
274 nigel 87 although at present the only ones are for Unix, Win32, and for "no support". */
275 nigel 53
276    
277     /************* Directory scanning in Unix ***********/
278    
279 ph10 97 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
280 nigel 53 #include <sys/types.h>
281     #include <sys/stat.h>
282     #include <dirent.h>
283    
284     typedef DIR directory_type;
285    
286 nigel 67 static int
287 nigel 53 isdirectory(char *filename)
288     {
289     struct stat statbuf;
290     if (stat(filename, &statbuf) < 0)
291     return 0; /* In the expectation that opening as a file will fail */
292     return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
293     }
294    
295 nigel 67 static directory_type *
296 nigel 53 opendirectory(char *filename)
297     {
298     return opendir(filename);
299     }
300    
301 nigel 67 static char *
302 nigel 53 readdirectory(directory_type *dir)
303     {
304     for (;;)
305     {
306     struct dirent *dent = readdir(dir);
307     if (dent == NULL) return NULL;
308     if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
309     return dent->d_name;
310     }
311 ph10 151 /* Control never reaches here */
312 nigel 53 }
313    
314 nigel 67 static void
315 nigel 53 closedirectory(directory_type *dir)
316     {
317     closedir(dir);
318     }
319    
320    
321 nigel 87 /************* Test for regular file in Unix **********/
322    
323     static int
324     isregfile(char *filename)
325     {
326     struct stat statbuf;
327     if (stat(filename, &statbuf) < 0)
328     return 1; /* In the expectation that opening as a file will fail */
329     return (statbuf.st_mode & S_IFMT) == S_IFREG;
330     }
331    
332    
333     /************* Test stdout for being a terminal in Unix **********/
334    
335     static BOOL
336     is_stdout_tty(void)
337     {
338     return isatty(fileno(stdout));
339     }
340    
341    
342 nigel 63 /************* Directory scanning in Win32 ***********/
343 nigel 53
344 nigel 63 /* I (Philip Hazel) have no means of testing this code. It was contributed by
345 nigel 77 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
346 ph10 286 when it did not exist. David Byron added a patch that moved the #include of
347     <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
348 ph10 283 */
349 nigel 53
350 ph10 97 #elif HAVE_WINDOWS_H
351 nigel 63
352     #ifndef STRICT
353     # define STRICT
354     #endif
355     #ifndef WIN32_LEAN_AND_MEAN
356     # define WIN32_LEAN_AND_MEAN
357     #endif
358 ph10 283
359     #include <windows.h>
360    
361 nigel 77 #ifndef INVALID_FILE_ATTRIBUTES
362     #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
363     #endif
364    
365 nigel 63 typedef struct directory_type
366     {
367     HANDLE handle;
368     BOOL first;
369     WIN32_FIND_DATA data;
370     } directory_type;
371    
372     int
373     isdirectory(char *filename)
374     {
375     DWORD attr = GetFileAttributes(filename);
376     if (attr == INVALID_FILE_ATTRIBUTES)
377     return 0;
378     return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
379     }
380    
381     directory_type *
382     opendirectory(char *filename)
383     {
384     size_t len;
385     char *pattern;
386     directory_type *dir;
387     DWORD err;
388     len = strlen(filename);
389     pattern = (char *) malloc(len + 3);
390     dir = (directory_type *) malloc(sizeof(*dir));
391     if ((pattern == NULL) || (dir == NULL))
392     {
393     fprintf(stderr, "pcregrep: malloc failed\n");
394     exit(2);
395     }
396     memcpy(pattern, filename, len);
397     memcpy(&(pattern[len]), "\\*", 3);
398     dir->handle = FindFirstFile(pattern, &(dir->data));
399     if (dir->handle != INVALID_HANDLE_VALUE)
400     {
401     free(pattern);
402     dir->first = TRUE;
403     return dir;
404     }
405     err = GetLastError();
406     free(pattern);
407     free(dir);
408     errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
409     return NULL;
410     }
411    
412     char *
413     readdirectory(directory_type *dir)
414     {
415     for (;;)
416     {
417     if (!dir->first)
418     {
419     if (!FindNextFile(dir->handle, &(dir->data)))
420     return NULL;
421     }
422     else
423     {
424     dir->first = FALSE;
425     }
426     if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
427     return dir->data.cFileName;
428     }
429     #ifndef _MSC_VER
430     return NULL; /* Keep compiler happy; never executed */
431     #endif
432     }
433    
434     void
435     closedirectory(directory_type *dir)
436     {
437     FindClose(dir->handle);
438     free(dir);
439     }
440    
441    
442 nigel 87 /************* Test for regular file in Win32 **********/
443    
444     /* I don't know how to do this, or if it can be done; assume all paths are
445     regular if they are not directories. */
446    
447     int isregfile(char *filename)
448     {
449 ph10 283 return !isdirectory(filename);
450 nigel 87 }
451    
452    
453     /************* Test stdout for being a terminal in Win32 **********/
454    
455     /* I don't know how to do this; assume never */
456    
457     static BOOL
458     is_stdout_tty(void)
459     {
460 ph10 283 return FALSE;
461 nigel 87 }
462    
463    
464 nigel 53 /************* Directory scanning when we can't do it ***********/
465    
466     /* The type is void, and apart from isdirectory(), the functions do nothing. */
467    
468 nigel 63 #else
469    
470 nigel 53 typedef void directory_type;
471    
472 nigel 87 int isdirectory(char *filename) { return 0; }
473 ph10 97 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
474     char *readdirectory(directory_type *dir) { return (char*)0;}
475 nigel 53 void closedirectory(directory_type *dir) {}
476    
477 nigel 87
478     /************* Test for regular when we can't do it **********/
479    
480     /* Assume all files are regular. */
481    
482     int isregfile(char *filename) { return 1; }
483    
484    
485     /************* Test stdout for being a terminal when we can't do it **********/
486    
487     static BOOL
488     is_stdout_tty(void)
489     {
490     return FALSE;
491     }
492    
493    
494 nigel 53 #endif
495    
496    
497    
498 ph10 137 #ifndef HAVE_STRERROR
499 nigel 49 /*************************************************
500     * Provide strerror() for non-ANSI libraries *
501     *************************************************/
502    
503     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
504     in their libraries, but can provide the same facility by this simple
505     alternative function. */
506    
507     extern int sys_nerr;
508     extern char *sys_errlist[];
509    
510     char *
511     strerror(int n)
512     {
513     if (n < 0 || n >= sys_nerr) return "unknown error number";
514     return sys_errlist[n];
515     }
516     #endif /* HAVE_STRERROR */
517    
518    
519    
520     /*************************************************
521 nigel 93 * Find end of line *
522     *************************************************/
523    
524     /* The length of the endline sequence that is found is set via lenptr. This may
525     be zero at the very end of the file if there is no line-ending sequence there.
526    
527     Arguments:
528     p current position in line
529     endptr end of available data
530     lenptr where to put the length of the eol sequence
531    
532     Returns: pointer to the last byte of the line
533     */
534    
535     static char *
536     end_of_line(char *p, char *endptr, int *lenptr)
537     {
538     switch(endlinetype)
539     {
540     default: /* Just in case */
541     case EL_LF:
542     while (p < endptr && *p != '\n') p++;
543     if (p < endptr)
544     {
545     *lenptr = 1;
546     return p + 1;
547     }
548     *lenptr = 0;
549     return endptr;
550    
551     case EL_CR:
552     while (p < endptr && *p != '\r') p++;
553     if (p < endptr)
554     {
555     *lenptr = 1;
556     return p + 1;
557     }
558     *lenptr = 0;
559     return endptr;
560    
561     case EL_CRLF:
562     for (;;)
563     {
564     while (p < endptr && *p != '\r') p++;
565     if (++p >= endptr)
566     {
567     *lenptr = 0;
568     return endptr;
569     }
570     if (*p == '\n')
571     {
572     *lenptr = 2;
573     return p + 1;
574     }
575     }
576     break;
577    
578 ph10 149 case EL_ANYCRLF:
579     while (p < endptr)
580     {
581     int extra = 0;
582     register int c = *((unsigned char *)p);
583    
584     if (utf8 && c >= 0xc0)
585     {
586     int gcii, gcss;
587     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
588     gcss = 6*extra;
589     c = (c & utf8_table3[extra]) << gcss;
590     for (gcii = 1; gcii <= extra; gcii++)
591     {
592     gcss -= 6;
593     c |= (p[gcii] & 0x3f) << gcss;
594     }
595     }
596    
597     p += 1 + extra;
598    
599     switch (c)
600     {
601     case 0x0a: /* LF */
602     *lenptr = 1;
603     return p;
604    
605     case 0x0d: /* CR */
606     if (p < endptr && *p == 0x0a)
607     {
608     *lenptr = 2;
609     p++;
610     }
611     else *lenptr = 1;
612     return p;
613 ph10 150
614 ph10 149 default:
615     break;
616     }
617     } /* End of loop for ANYCRLF case */
618 ph10 150
619 ph10 149 *lenptr = 0; /* Must have hit the end */
620     return endptr;
621    
622 nigel 93 case EL_ANY:
623     while (p < endptr)
624     {
625     int extra = 0;
626     register int c = *((unsigned char *)p);
627    
628     if (utf8 && c >= 0xc0)
629     {
630     int gcii, gcss;
631     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
632     gcss = 6*extra;
633     c = (c & utf8_table3[extra]) << gcss;
634     for (gcii = 1; gcii <= extra; gcii++)
635     {
636     gcss -= 6;
637     c |= (p[gcii] & 0x3f) << gcss;
638     }
639     }
640    
641     p += 1 + extra;
642    
643     switch (c)
644     {
645     case 0x0a: /* LF */
646     case 0x0b: /* VT */
647     case 0x0c: /* FF */
648     *lenptr = 1;
649     return p;
650    
651     case 0x0d: /* CR */
652     if (p < endptr && *p == 0x0a)
653     {
654     *lenptr = 2;
655     p++;
656     }
657     else *lenptr = 1;
658     return p;
659    
660     case 0x85: /* NEL */
661     *lenptr = utf8? 2 : 1;
662     return p;
663    
664     case 0x2028: /* LS */
665     case 0x2029: /* PS */
666     *lenptr = 3;
667     return p;
668    
669     default:
670     break;
671     }
672     } /* End of loop for ANY case */
673    
674     *lenptr = 0; /* Must have hit the end */
675     return endptr;
676     } /* End of overall switch */
677     }
678    
679    
680    
681     /*************************************************
682     * Find start of previous line *
683     *************************************************/
684    
685     /* This is called when looking back for before lines to print.
686    
687     Arguments:
688     p start of the subsequent line
689     startptr start of available data
690    
691     Returns: pointer to the start of the previous line
692     */
693    
694     static char *
695     previous_line(char *p, char *startptr)
696     {
697     switch(endlinetype)
698     {
699     default: /* Just in case */
700     case EL_LF:
701     p--;
702     while (p > startptr && p[-1] != '\n') p--;
703     return p;
704    
705     case EL_CR:
706     p--;
707     while (p > startptr && p[-1] != '\n') p--;
708     return p;
709    
710     case EL_CRLF:
711     for (;;)
712     {
713     p -= 2;
714     while (p > startptr && p[-1] != '\n') p--;
715     if (p <= startptr + 1 || p[-2] == '\r') return p;
716     }
717     return p; /* But control should never get here */
718    
719     case EL_ANY:
720 ph10 150 case EL_ANYCRLF:
721 nigel 93 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
722     if (utf8) while ((*p & 0xc0) == 0x80) p--;
723    
724     while (p > startptr)
725     {
726     register int c;
727     char *pp = p - 1;
728    
729     if (utf8)
730     {
731     int extra = 0;
732     while ((*pp & 0xc0) == 0x80) pp--;
733     c = *((unsigned char *)pp);
734     if (c >= 0xc0)
735     {
736     int gcii, gcss;
737     extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
738     gcss = 6*extra;
739     c = (c & utf8_table3[extra]) << gcss;
740     for (gcii = 1; gcii <= extra; gcii++)
741     {
742     gcss -= 6;
743     c |= (pp[gcii] & 0x3f) << gcss;
744     }
745     }
746     }
747     else c = *((unsigned char *)pp);
748    
749 ph10 149 if (endlinetype == EL_ANYCRLF) switch (c)
750 nigel 93 {
751     case 0x0a: /* LF */
752 ph10 149 case 0x0d: /* CR */
753     return p;
754 ph10 150
755 ph10 149 default:
756     break;
757 ph10 150 }
758 ph10 149
759     else switch (c)
760     {
761     case 0x0a: /* LF */
762 nigel 93 case 0x0b: /* VT */
763     case 0x0c: /* FF */
764     case 0x0d: /* CR */
765     case 0x85: /* NEL */
766     case 0x2028: /* LS */
767     case 0x2029: /* PS */
768     return p;
769    
770     default:
771     break;
772     }
773    
774     p = pp; /* Back one character */
775     } /* End of loop for ANY case */
776    
777     return startptr; /* Hit start of data */
778     } /* End of overall switch */
779     }
780    
781    
782    
783    
784    
785     /*************************************************
786 nigel 77 * Print the previous "after" lines *
787 nigel 49 *************************************************/
788    
789 nigel 77 /* This is called if we are about to lose said lines because of buffer filling,
790 nigel 87 and at the end of the file. The data in the line is written using fwrite() so
791     that a binary zero does not terminate it.
792 nigel 77
793     Arguments:
794     lastmatchnumber the number of the last matching line, plus one
795     lastmatchrestart where we restarted after the last match
796     endptr end of available data
797     printname filename for printing
798    
799     Returns: nothing
800     */
801    
802     static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
803     char *endptr, char *printname)
804     {
805     if (after_context > 0 && lastmatchnumber > 0)
806     {
807     int count = 0;
808     while (lastmatchrestart < endptr && count++ < after_context)
809     {
810 nigel 93 int ellength;
811 nigel 77 char *pp = lastmatchrestart;
812     if (printname != NULL) fprintf(stdout, "%s-", printname);
813     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
814 nigel 93 pp = end_of_line(pp, endptr, &ellength);
815     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
816     lastmatchrestart = pp;
817 nigel 77 }
818     hyphenpending = TRUE;
819     }
820     }
821    
822    
823    
824     /*************************************************
825 ph10 378 * Apply patterns to subject till one matches *
826     *************************************************/
827    
828     /* This function is called to run through all patterns, looking for a match. It
829     is used multiple times for the same subject when colouring is enabled, in order
830     to find all possible matches.
831    
832     Arguments:
833     matchptr the start of the subject
834     length the length of the subject to match
835     offsets the offets vector to fill in
836     mrc address of where to put the result of pcre_exec()
837    
838     Returns: TRUE if there was a match
839     FALSE if there was no match
840     invert if there was a non-fatal error
841     */
842    
843     static BOOL
844     match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
845     {
846     int i;
847     for (i = 0; i < pattern_count; i++)
848     {
849     *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
850     offsets, OFFSET_SIZE);
851     if (*mrc >= 0) return TRUE;
852     if (*mrc == PCRE_ERROR_NOMATCH) continue;
853     fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
854     if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
855     fprintf(stderr, "this text:\n");
856     fwrite(matchptr, 1, length, stderr); /* In case binary zero included */
857     fprintf(stderr, "\n");
858     if (error_count == 0 &&
859     (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
860     {
861     fprintf(stderr, "pcregrep: error %d means that a resource limit "
862     "was exceeded\n", *mrc);
863     fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
864     }
865     if (error_count++ > 20)
866     {
867     fprintf(stderr, "pcregrep: too many errors - abandoned\n");
868     exit(2);
869     }
870     return invert; /* No more matching; don't show the line again */
871     }
872    
873     return FALSE; /* No match, no errors */
874     }
875    
876    
877    
878     /*************************************************
879 nigel 77 * Grep an individual file *
880     *************************************************/
881    
882     /* This is called from grep_or_recurse() below. It uses a buffer that is three
883     times the value of MBUFTHIRD. The matching point is never allowed to stray into
884     the top third of the buffer, thus keeping more of the file available for
885     context printing or for multiline scanning. For large files, the pointer will
886     be in the middle third most of the time, so the bottom third is available for
887     "before" context printing.
888    
889     Arguments:
890 ph10 286 handle the fopened FILE stream for a normal file
891     the gzFile pointer when reading is via libz
892     the BZFILE pointer when reading is via libbz2
893     frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
894 nigel 77 printname the file name if it is to be printed for each match
895     or NULL if the file name is not to be printed
896     it cannot be NULL if filenames[_nomatch]_only is set
897    
898     Returns: 0 if there was at least one match
899     1 otherwise (no matches)
900 ph10 286 2 if there is a read error on a .bz2 file
901 nigel 77 */
902    
903 nigel 49 static int
904 ph10 286 pcregrep(void *handle, int frtype, char *printname)
905 nigel 49 {
906     int rc = 1;
907 nigel 77 int linenumber = 1;
908     int lastmatchnumber = 0;
909 nigel 49 int count = 0;
910 ph10 280 int filepos = 0;
911 ph10 378 int offsets[OFFSET_SIZE];
912 nigel 77 char *lastmatchrestart = NULL;
913     char buffer[3*MBUFTHIRD];
914     char *ptr = buffer;
915     char *endptr;
916     size_t bufflength;
917     BOOL endhyphenpending = FALSE;
918 ph10 286 FILE *in = NULL; /* Ensure initialized */
919 nigel 49
920 ph10 286 #ifdef SUPPORT_LIBZ
921     gzFile ingz = NULL;
922     #endif
923 nigel 77
924 ph10 286 #ifdef SUPPORT_LIBBZ2
925     BZFILE *inbz2 = NULL;
926     #endif
927    
928    
929     /* Do the first read into the start of the buffer and set up the pointer to end
930     of what we have. In the case of libz, a non-zipped .gz file will be read as a
931     plain file. However, if a .bz2 file isn't actually bzipped, the first read will
932     fail. */
933    
934     #ifdef SUPPORT_LIBZ
935     if (frtype == FR_LIBZ)
936     {
937     ingz = (gzFile)handle;
938     bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
939     }
940     else
941     #endif
942    
943     #ifdef SUPPORT_LIBBZ2
944     if (frtype == FR_LIBBZ2)
945     {
946     inbz2 = (BZFILE *)handle;
947     bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
948     if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
949     } /* without the cast it is unsigned. */
950     else
951     #endif
952    
953     {
954     in = (FILE *)handle;
955     bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
956     }
957    
958 nigel 77 endptr = buffer + bufflength;
959    
960     /* Loop while the current pointer is not at the end of the file. For large
961     files, endptr will be at the end of the buffer when we are in the middle of the
962     file, but ptr will never get there, because as soon as it gets over 2/3 of the
963     way, the buffer is shifted left and re-filled. */
964    
965     while (ptr < endptr)
966 nigel 49 {
967 ph10 378 int endlinelength;
968 nigel 87 int mrc = 0;
969 ph10 378 BOOL match;
970 ph10 286 char *matchptr = ptr;
971 nigel 77 char *t = ptr;
972     size_t length, linelength;
973 nigel 49
974 nigel 77 /* At this point, ptr is at the start of a line. We need to find the length
975     of the subject string to pass to pcre_exec(). In multiline mode, it is the
976     length remainder of the data in the buffer. Otherwise, it is the length of
977 ph10 378 the next line, excluding the terminating newline. After matching, we always
978     advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
979     option is used for compiling, so that any match is constrained to be in the
980     first line. */
981 nigel 77
982 nigel 93 t = end_of_line(t, endptr, &endlinelength);
983     linelength = t - ptr - endlinelength;
984 ph10 199 length = multiline? (size_t)(endptr - ptr) : linelength;
985 nigel 77
986 nigel 89 /* Extra processing for Jeffrey Friedl's debugging. */
987    
988     #ifdef JFRIEDL_DEBUG
989     if (jfriedl_XT || jfriedl_XR)
990     {
991     #include <sys/time.h>
992     #include <time.h>
993     struct timeval start_time, end_time;
994     struct timezone dummy;
995 ph10 378 int i;
996 nigel 89
997     if (jfriedl_XT)
998     {
999     unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1000     const char *orig = ptr;
1001     ptr = malloc(newlen + 1);
1002     if (!ptr) {
1003     printf("out of memory");
1004     exit(2);
1005     }
1006     endptr = ptr;
1007     strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1008     for (i = 0; i < jfriedl_XT; i++) {
1009     strncpy(endptr, orig, length);
1010     endptr += length;
1011     }
1012     strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1013     length = newlen;
1014     }
1015    
1016     if (gettimeofday(&start_time, &dummy) != 0)
1017     perror("bad gettimeofday");
1018    
1019    
1020     for (i = 0; i < jfriedl_XR; i++)
1021 ph10 378 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, OFFSET_SIZE) >= 0);
1022 nigel 89
1023     if (gettimeofday(&end_time, &dummy) != 0)
1024     perror("bad gettimeofday");
1025    
1026     double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1027     -
1028     (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1029    
1030     printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1031     return 0;
1032     }
1033     #endif
1034    
1035 ph10 286 /* We come back here after a match when the -o option (only_matching) is set,
1036 ph10 279 in order to find any further matches in the same line. */
1037 nigel 89
1038 ph10 286 ONLY_MATCHING_RESTART:
1039    
1040 ph10 378 /* Run through all the patterns until one matches or there is an error other
1041     than NOMATCH. This code is in a subroutine so that it can be re-used for
1042     finding subsequent matches when colouring matched lines. */
1043    
1044     match = match_patterns(matchptr, length, offsets, &mrc);
1045 nigel 77
1046 nigel 87 /* If it's a match or a not-match (as required), do what's wanted. */
1047 nigel 77
1048 nigel 49 if (match != invert)
1049     {
1050 nigel 77 BOOL hyphenprinted = FALSE;
1051    
1052 nigel 87 /* We've failed if we want a file that doesn't have any matches. */
1053 nigel 77
1054 nigel 87 if (filenames == FN_NOMATCH_ONLY) return 1;
1055    
1056     /* Just count if just counting is wanted. */
1057    
1058 nigel 49 if (count_only) count++;
1059    
1060 nigel 87 /* If all we want is a file name, there is no need to scan any more lines
1061     in the file. */
1062    
1063     else if (filenames == FN_ONLY)
1064 nigel 49 {
1065 nigel 77 fprintf(stdout, "%s\n", printname);
1066 nigel 49 return 0;
1067     }
1068    
1069 nigel 87 /* Likewise, if all we want is a yes/no answer. */
1070    
1071 nigel 77 else if (quiet) return 0;
1072 nigel 49
1073 nigel 87 /* The --only-matching option prints just the substring that matched, and
1074 ph10 286 the --file-offsets and --line-offsets options output offsets for the
1075 ph10 280 matching substring (they both force --only-matching). None of these options
1076     prints any context. Afterwards, adjust the start and length, and then jump
1077     back to look for further matches in the same line. If we are in invert
1078     mode, however, nothing is printed - this could be still useful because the
1079     return code is set. */
1080 nigel 87
1081     else if (only_matching)
1082     {
1083 ph10 279 if (!invert)
1084 ph10 286 {
1085 ph10 279 if (printname != NULL) fprintf(stdout, "%s:", printname);
1086     if (number) fprintf(stdout, "%d:", linenumber);
1087 ph10 280 if (line_offsets)
1088 ph10 357 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1089 ph10 286 offsets[1] - offsets[0]);
1090 ph10 280 else if (file_offsets)
1091 ph10 357 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1092 ph10 286 offsets[1] - offsets[0]);
1093     else
1094 ph10 377 {
1095     if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1096 ph10 280 fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1097 ph10 377 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1098     }
1099 ph10 279 fprintf(stdout, "\n");
1100     matchptr += offsets[1];
1101     length -= offsets[1];
1102 ph10 286 match = FALSE;
1103     goto ONLY_MATCHING_RESTART;
1104     }
1105 nigel 87 }
1106    
1107     /* This is the default case when none of the above options is set. We print
1108     the matching lines(s), possibly preceded and/or followed by other lines of
1109     context. */
1110    
1111 nigel 49 else
1112     {
1113 nigel 77 /* See if there is a requirement to print some "after" lines from a
1114     previous match. We never print any overlaps. */
1115    
1116     if (after_context > 0 && lastmatchnumber > 0)
1117     {
1118 nigel 93 int ellength;
1119 nigel 77 int linecount = 0;
1120     char *p = lastmatchrestart;
1121    
1122     while (p < ptr && linecount < after_context)
1123     {
1124 nigel 93 p = end_of_line(p, ptr, &ellength);
1125 nigel 77 linecount++;
1126     }
1127    
1128     /* It is important to advance lastmatchrestart during this printing so
1129 nigel 87 that it interacts correctly with any "before" printing below. Print
1130     each line's data using fwrite() in case there are binary zeroes. */
1131 nigel 77
1132     while (lastmatchrestart < p)
1133     {
1134     char *pp = lastmatchrestart;
1135     if (printname != NULL) fprintf(stdout, "%s-", printname);
1136     if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1137 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1138     fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1139     lastmatchrestart = pp;
1140 nigel 77 }
1141     if (lastmatchrestart != ptr) hyphenpending = TRUE;
1142     }
1143    
1144     /* If there were non-contiguous lines printed above, insert hyphens. */
1145    
1146     if (hyphenpending)
1147     {
1148     fprintf(stdout, "--\n");
1149     hyphenpending = FALSE;
1150     hyphenprinted = TRUE;
1151     }
1152    
1153     /* See if there is a requirement to print some "before" lines for this
1154     match. Again, don't print overlaps. */
1155    
1156     if (before_context > 0)
1157     {
1158     int linecount = 0;
1159     char *p = ptr;
1160    
1161     while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1162 nigel 87 linecount < before_context)
1163 nigel 77 {
1164 nigel 87 linecount++;
1165 nigel 93 p = previous_line(p, buffer);
1166 nigel 77 }
1167    
1168     if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1169     fprintf(stdout, "--\n");
1170    
1171     while (p < ptr)
1172     {
1173 nigel 93 int ellength;
1174 nigel 77 char *pp = p;
1175     if (printname != NULL) fprintf(stdout, "%s-", printname);
1176     if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1177 nigel 93 pp = end_of_line(pp, endptr, &ellength);
1178     fwrite(p, 1, pp - p, stdout);
1179     p = pp;
1180 nigel 77 }
1181     }
1182    
1183     /* Now print the matching line(s); ensure we set hyphenpending at the end
1184 nigel 85 of the file if any context lines are being output. */
1185 nigel 77
1186 nigel 85 if (after_context > 0 || before_context > 0)
1187     endhyphenpending = TRUE;
1188    
1189 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1190 nigel 49 if (number) fprintf(stdout, "%d:", linenumber);
1191 nigel 77
1192     /* In multiline mode, we want to print to the end of the line in which
1193     the end of the matched string is found, so we adjust linelength and the
1194 ph10 222 line number appropriately, but only when there actually was a match
1195     (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1196     the match will always be before the first newline sequence. */
1197 nigel 77
1198     if (multiline)
1199     {
1200 nigel 93 int ellength;
1201 ph10 222 char *endmatch = ptr;
1202     if (!invert)
1203 nigel 93 {
1204 ph10 222 endmatch += offsets[1];
1205     t = ptr;
1206     while (t < endmatch)
1207     {
1208     t = end_of_line(t, endptr, &ellength);
1209     if (t <= endmatch) linenumber++; else break;
1210     }
1211 nigel 93 }
1212     endmatch = end_of_line(endmatch, endptr, &ellength);
1213     linelength = endmatch - ptr - ellength;
1214 nigel 77 }
1215    
1216 nigel 87 /*** NOTE: Use only fwrite() to output the data line, so that binary
1217     zeroes are treated as just another data character. */
1218    
1219     /* This extra option, for Jeffrey Friedl's debugging requirements,
1220     replaces the matched string, or a specific captured string if it exists,
1221     with X. When this happens, colouring is ignored. */
1222    
1223     #ifdef JFRIEDL_DEBUG
1224     if (S_arg >= 0 && S_arg < mrc)
1225     {
1226     int first = S_arg * 2;
1227     int last = first + 1;
1228     fwrite(ptr, 1, offsets[first], stdout);
1229     fprintf(stdout, "X");
1230     fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1231     }
1232     else
1233     #endif
1234    
1235 ph10 378 /* We have to split the line(s) up if colouring, and search for further
1236     matches. */
1237 nigel 87
1238     if (do_colour)
1239     {
1240 ph10 378 int last_offset = 0;
1241 nigel 87 fwrite(ptr, 1, offsets[0], stdout);
1242     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1243     fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1244     fprintf(stdout, "%c[00m", 0x1b);
1245 ph10 378 for (;;)
1246     {
1247     last_offset += offsets[1];
1248     matchptr += offsets[1];
1249     length -= offsets[1];
1250     if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1251     fwrite(matchptr, 1, offsets[0], stdout);
1252     fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1253     fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1254     fprintf(stdout, "%c[00m", 0x1b);
1255     }
1256     fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1257 ph10 239 stdout);
1258 nigel 87 }
1259 ph10 378
1260     /* Not colouring; no need to search for further matches */
1261    
1262 nigel 93 else fwrite(ptr, 1, linelength + endlinelength, stdout);
1263 nigel 49 }
1264    
1265 nigel 87 /* End of doing what has to be done for a match */
1266    
1267 nigel 77 rc = 0; /* Had some success */
1268    
1269     /* Remember where the last match happened for after_context. We remember
1270     where we are about to restart, and that line's number. */
1271    
1272 nigel 93 lastmatchrestart = ptr + linelength + endlinelength;
1273 nigel 77 lastmatchnumber = linenumber + 1;
1274 nigel 49 }
1275 nigel 77
1276 ph10 222 /* For a match in multiline inverted mode (which of course did not cause
1277     anything to be printed), we have to move on to the end of the match before
1278     proceeding. */
1279    
1280     if (multiline && invert && match)
1281     {
1282     int ellength;
1283     char *endmatch = ptr + offsets[1];
1284     t = ptr;
1285     while (t < endmatch)
1286     {
1287     t = end_of_line(t, endptr, &ellength);
1288     if (t <= endmatch) linenumber++; else break;
1289     }
1290     endmatch = end_of_line(endmatch, endptr, &ellength);
1291     linelength = endmatch - ptr - ellength;
1292     }
1293    
1294 ph10 286 /* Advance to after the newline and increment the line number. The file
1295 ph10 280 offset to the current line is maintained in filepos. */
1296 nigel 77
1297 nigel 93 ptr += linelength + endlinelength;
1298 ph10 280 filepos += linelength + endlinelength;
1299 nigel 77 linenumber++;
1300    
1301     /* If we haven't yet reached the end of the file (the buffer is full), and
1302     the current point is in the top 1/3 of the buffer, slide the buffer down by
1303     1/3 and refill it. Before we do this, if some unprinted "after" lines are
1304     about to be lost, print them. */
1305    
1306     if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1307     {
1308     if (after_context > 0 &&
1309     lastmatchnumber > 0 &&
1310     lastmatchrestart < buffer + MBUFTHIRD)
1311     {
1312     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1313     lastmatchnumber = 0;
1314     }
1315    
1316     /* Now do the shuffle */
1317    
1318     memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1319     ptr -= MBUFTHIRD;
1320 ph10 286
1321     #ifdef SUPPORT_LIBZ
1322     if (frtype == FR_LIBZ)
1323     bufflength = 2*MBUFTHIRD +
1324     gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1325     else
1326     #endif
1327    
1328     #ifdef SUPPORT_LIBBZ2
1329     if (frtype == FR_LIBBZ2)
1330     bufflength = 2*MBUFTHIRD +
1331     BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1332     else
1333     #endif
1334    
1335 nigel 77 bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1336 ph10 286
1337 nigel 77 endptr = buffer + bufflength;
1338    
1339     /* Adjust any last match point */
1340    
1341     if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1342     }
1343     } /* Loop through the whole file */
1344    
1345     /* End of file; print final "after" lines if wanted; do_after_lines sets
1346     hyphenpending if it prints something. */
1347    
1348 nigel 87 if (!only_matching && !count_only)
1349     {
1350     do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1351     hyphenpending |= endhyphenpending;
1352     }
1353 nigel 77
1354     /* Print the file name if we are looking for those without matches and there
1355     were none. If we found a match, we won't have got this far. */
1356    
1357 nigel 87 if (filenames == FN_NOMATCH_ONLY)
1358 nigel 77 {
1359     fprintf(stdout, "%s\n", printname);
1360     return 0;
1361 nigel 49 }
1362    
1363 nigel 77 /* Print the match count if wanted */
1364    
1365 nigel 49 if (count_only)
1366     {
1367 nigel 77 if (printname != NULL) fprintf(stdout, "%s:", printname);
1368 nigel 49 fprintf(stdout, "%d\n", count);
1369     }
1370    
1371     return rc;
1372     }
1373    
1374    
1375    
1376     /*************************************************
1377 nigel 53 * Grep a file or recurse into a directory *
1378     *************************************************/
1379    
1380 nigel 77 /* Given a path name, if it's a directory, scan all the files if we are
1381     recursing; if it's a file, grep it.
1382    
1383     Arguments:
1384     pathname the path to investigate
1385 nigel 87 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1386 nigel 77 only_one_at_top TRUE if the path is the only one at toplevel
1387    
1388     Returns: 0 if there was at least one match
1389     1 if there were no matches
1390     2 there was some kind of error
1391    
1392     However, file opening failures are suppressed if "silent" is set.
1393     */
1394    
1395 nigel 53 static int
1396 nigel 87 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1397 nigel 53 {
1398     int rc = 1;
1399     int sep;
1400 ph10 286 int frtype;
1401     int pathlen;
1402     void *handle;
1403     FILE *in = NULL; /* Ensure initialized */
1404 nigel 53
1405 ph10 286 #ifdef SUPPORT_LIBZ
1406     gzFile ingz = NULL;
1407     #endif
1408    
1409     #ifdef SUPPORT_LIBBZ2
1410     BZFILE *inbz2 = NULL;
1411     #endif
1412    
1413 nigel 77 /* If the file name is "-" we scan stdin */
1414 nigel 53
1415 nigel 77 if (strcmp(pathname, "-") == 0)
1416 nigel 53 {
1417 ph10 286 return pcregrep(stdin, FR_PLAIN,
1418 nigel 87 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1419 nigel 77 stdin_name : NULL);
1420     }
1421    
1422 nigel 87 /* If the file is a directory, skip if skipping or if we are recursing, scan
1423 ph10 325 each file and directory within it, subject to any include or exclude patterns
1424     that were set. The scanning code is localized so it can be made
1425     system-specific. */
1426 nigel 87
1427     if ((sep = isdirectory(pathname)) != 0)
1428 nigel 77 {
1429 nigel 87 if (dee_action == dee_SKIP) return 1;
1430     if (dee_action == dee_RECURSE)
1431 nigel 53 {
1432 nigel 87 char buffer[1024];
1433     char *nextfile;
1434     directory_type *dir = opendirectory(pathname);
1435 nigel 53
1436 nigel 87 if (dir == NULL)
1437     {
1438     if (!silent)
1439     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1440     strerror(errno));
1441     return 2;
1442     }
1443 nigel 77
1444 nigel 87 while ((nextfile = readdirectory(dir)) != NULL)
1445     {
1446 ph10 324 int frc, nflen;
1447 nigel 87 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1448 ph10 324 nflen = strlen(nextfile);
1449 ph10 345
1450 ph10 325 if (isdirectory(buffer))
1451     {
1452     if (exclude_dir_compiled != NULL &&
1453     pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1454     continue;
1455 ph10 345
1456 ph10 325 if (include_dir_compiled != NULL &&
1457     pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1458     continue;
1459     }
1460 ph10 345 else
1461     {
1462 ph10 324 if (exclude_compiled != NULL &&
1463     pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1464     continue;
1465 ph10 345
1466 ph10 324 if (include_compiled != NULL &&
1467     pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1468     continue;
1469 ph10 345 }
1470 nigel 77
1471 nigel 87 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1472     if (frc > 1) rc = frc;
1473     else if (frc == 0 && rc == 1) rc = 0;
1474     }
1475    
1476     closedirectory(dir);
1477     return rc;
1478 nigel 53 }
1479     }
1480    
1481 nigel 87 /* If the file is not a directory and not a regular file, skip it if that's
1482     been requested. */
1483 nigel 53
1484 nigel 87 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1485    
1486     /* Control reaches here if we have a regular file, or if we have a directory
1487     and recursion or skipping was not requested, or if we have anything else and
1488     skipping was not requested. The scan proceeds. If this is the first and only
1489     argument at top level, we don't show the file name, unless we are only showing
1490     the file name, or the filename was forced (-H). */
1491    
1492 ph10 286 pathlen = strlen(pathname);
1493    
1494     /* Open using zlib if it is supported and the file name ends with .gz. */
1495    
1496     #ifdef SUPPORT_LIBZ
1497     if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1498 nigel 53 {
1499 ph10 286 ingz = gzopen(pathname, "rb");
1500     if (ingz == NULL)
1501     {
1502     if (!silent)
1503     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1504     strerror(errno));
1505     return 2;
1506     }
1507     handle = (void *)ingz;
1508     frtype = FR_LIBZ;
1509     }
1510     else
1511     #endif
1512    
1513     /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1514    
1515     #ifdef SUPPORT_LIBBZ2
1516     if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1517     {
1518     inbz2 = BZ2_bzopen(pathname, "rb");
1519     handle = (void *)inbz2;
1520     frtype = FR_LIBBZ2;
1521     }
1522     else
1523     #endif
1524    
1525     /* Otherwise use plain fopen(). The label is so that we can come back here if
1526     an attempt to read a .bz2 file indicates that it really is a plain file. */
1527    
1528     #ifdef SUPPORT_LIBBZ2
1529     PLAIN_FILE:
1530     #endif
1531     {
1532     in = fopen(pathname, "r");
1533     handle = (void *)in;
1534     frtype = FR_PLAIN;
1535     }
1536    
1537     /* All the opening methods return errno when they fail. */
1538    
1539     if (handle == NULL)
1540     {
1541 nigel 77 if (!silent)
1542     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1543     strerror(errno));
1544 nigel 53 return 2;
1545     }
1546    
1547 ph10 286 /* Now grep the file */
1548    
1549     rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1550 nigel 87 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1551 nigel 77
1552 ph10 286 /* Close in an appropriate manner. */
1553    
1554     #ifdef SUPPORT_LIBZ
1555     if (frtype == FR_LIBZ)
1556     gzclose(ingz);
1557     else
1558     #endif
1559    
1560     /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1561     read failed. If the error indicates that the file isn't in fact bzipped, try
1562     again as a normal file. */
1563    
1564     #ifdef SUPPORT_LIBBZ2
1565     if (frtype == FR_LIBBZ2)
1566     {
1567     if (rc == 2)
1568     {
1569     int errnum;
1570     const char *err = BZ2_bzerror(inbz2, &errnum);
1571     if (errnum == BZ_DATA_ERROR_MAGIC)
1572     {
1573     BZ2_bzclose(inbz2);
1574     goto PLAIN_FILE;
1575     }
1576     else if (!silent)
1577     fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1578     pathname, err);
1579     }
1580     BZ2_bzclose(inbz2);
1581     }
1582     else
1583     #endif
1584    
1585     /* Normal file close */
1586    
1587 nigel 53 fclose(in);
1588 ph10 286
1589     /* Pass back the yield from pcregrep(). */
1590    
1591 nigel 53 return rc;
1592     }
1593    
1594    
1595    
1596    
1597     /*************************************************
1598 nigel 49 * Usage function *
1599     *************************************************/
1600    
1601     static int
1602     usage(int rc)
1603     {
1604 nigel 87 option_item *op;
1605     fprintf(stderr, "Usage: pcregrep [-");
1606     for (op = optionlist; op->one_char != 0; op++)
1607     {
1608     if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1609     }
1610     fprintf(stderr, "] [long options] [pattern] [files]\n");
1611 ph10 280 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1612     "options.\n");
1613 nigel 49 return rc;
1614     }
1615    
1616    
1617    
1618    
1619     /*************************************************
1620 nigel 53 * Help function *
1621     *************************************************/
1622    
1623     static void
1624     help(void)
1625     {
1626     option_item *op;
1627    
1628 nigel 63 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1629 nigel 53 printf("Search for PATTERN in each FILE or standard input.\n");
1630 nigel 87 printf("PATTERN must be present if neither -e nor -f is used.\n");
1631 ph10 286 printf("\"-\" can be used as a file name to mean STDIN.\n");
1632    
1633     #ifdef SUPPORT_LIBZ
1634     printf("Files whose names end in .gz are read using zlib.\n");
1635     #endif
1636    
1637     #ifdef SUPPORT_LIBBZ2
1638     printf("Files whose names end in .bz2 are read using bzlib2.\n");
1639     #endif
1640    
1641     #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1642     printf("Other files and the standard input are read as plain files.\n\n");
1643     #else
1644     printf("All files are read as plain files, without any interpretation.\n\n");
1645     #endif
1646    
1647 nigel 53 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1648     printf("Options:\n");
1649    
1650     for (op = optionlist; op->one_char != 0; op++)
1651     {
1652     int n;
1653     char s[4];
1654     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1655 ph10 296 n = 30 - printf(" %s --%s", s, op->long_name);
1656 nigel 53 if (n < 1) n = 1;
1657     printf("%.*s%s\n", n, " ", op->help_text);
1658     }
1659    
1660 nigel 77 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1661     printf("trailing white space is removed and blank lines are ignored.\n");
1662     printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1663 nigel 53
1664 nigel 77 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1665 nigel 53 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1666     }
1667    
1668    
1669    
1670    
1671     /*************************************************
1672 nigel 77 * Handle a single-letter, no data option *
1673 nigel 53 *************************************************/
1674    
1675     static int
1676     handle_option(int letter, int options)
1677     {
1678     switch(letter)
1679     {
1680 ph10 286 case N_FOFFSETS: file_offsets = TRUE; break;
1681 nigel 87 case N_HELP: help(); exit(0);
1682 ph10 286 case N_LOFFSETS: line_offsets = number = TRUE; break;
1683 nigel 53 case 'c': count_only = TRUE; break;
1684 nigel 87 case 'F': process_options |= PO_FIXED_STRINGS; break;
1685     case 'H': filenames = FN_FORCE; break;
1686     case 'h': filenames = FN_NONE; break;
1687 nigel 53 case 'i': options |= PCRE_CASELESS; break;
1688 nigel 87 case 'l': filenames = FN_ONLY; break;
1689     case 'L': filenames = FN_NOMATCH_ONLY; break;
1690 nigel 77 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1691 nigel 53 case 'n': number = TRUE; break;
1692 nigel 87 case 'o': only_matching = TRUE; break;
1693 nigel 77 case 'q': quiet = TRUE; break;
1694 nigel 87 case 'r': dee_action = dee_RECURSE; break;
1695 nigel 53 case 's': silent = TRUE; break;
1696 nigel 93 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1697 nigel 53 case 'v': invert = TRUE; break;
1698 nigel 87 case 'w': process_options |= PO_WORD_MATCH; break;
1699     case 'x': process_options |= PO_LINE_MATCH; break;
1700 nigel 53
1701     case 'V':
1702 ph10 97 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1703 nigel 53 exit(0);
1704     break;
1705    
1706     default:
1707     fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1708     exit(usage(2));
1709     }
1710    
1711     return options;
1712     }
1713    
1714    
1715    
1716    
1717     /*************************************************
1718 nigel 87 * Construct printed ordinal *
1719     *************************************************/
1720    
1721     /* This turns a number into "1st", "3rd", etc. */
1722    
1723     static char *
1724     ordin(int n)
1725     {
1726     static char buffer[8];
1727     char *p = buffer;
1728     sprintf(p, "%d", n);
1729     while (*p != 0) p++;
1730     switch (n%10)
1731     {
1732     case 1: strcpy(p, "st"); break;
1733     case 2: strcpy(p, "nd"); break;
1734     case 3: strcpy(p, "rd"); break;
1735     default: strcpy(p, "th"); break;
1736     }
1737     return buffer;
1738     }
1739    
1740    
1741    
1742     /*************************************************
1743     * Compile a single pattern *
1744     *************************************************/
1745    
1746     /* When the -F option has been used, this is called for each substring.
1747     Otherwise it's called for each supplied pattern.
1748    
1749     Arguments:
1750     pattern the pattern string
1751     options the PCRE options
1752     filename the file name, or NULL for a command-line pattern
1753     count 0 if this is the only command line pattern, or
1754     number of the command line pattern, or
1755     linenumber for a pattern from a file
1756    
1757     Returns: TRUE on success, FALSE after an error
1758     */
1759    
1760     static BOOL
1761     compile_single_pattern(char *pattern, int options, char *filename, int count)
1762     {
1763     char buffer[MBUFTHIRD + 16];
1764     const char *error;
1765     int errptr;
1766    
1767     if (pattern_count >= MAX_PATTERN_COUNT)
1768     {
1769     fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1770     (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1771     return FALSE;
1772     }
1773    
1774     sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1775     suffix[process_options]);
1776     pattern_list[pattern_count] =
1777     pcre_compile(buffer, options, &error, &errptr, pcretables);
1778 ph10 142 if (pattern_list[pattern_count] != NULL)
1779 ph10 141 {
1780 ph10 142 pattern_count++;
1781 ph10 141 return TRUE;
1782 ph10 142 }
1783 nigel 87
1784     /* Handle compile errors */
1785    
1786     errptr -= (int)strlen(prefix[process_options]);
1787     if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1788    
1789     if (filename == NULL)
1790     {
1791     if (count == 0)
1792     fprintf(stderr, "pcregrep: Error in command-line regex "
1793     "at offset %d: %s\n", errptr, error);
1794     else
1795     fprintf(stderr, "pcregrep: Error in %s command-line regex "
1796     "at offset %d: %s\n", ordin(count), errptr, error);
1797     }
1798     else
1799     {
1800     fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1801     "at offset %d: %s\n", count, filename, errptr, error);
1802     }
1803    
1804     return FALSE;
1805     }
1806    
1807    
1808    
1809     /*************************************************
1810     * Compile one supplied pattern *
1811     *************************************************/
1812    
1813     /* When the -F option has been used, each string may be a list of strings,
1814 nigel 91 separated by line breaks. They will be matched literally.
1815 nigel 87
1816     Arguments:
1817     pattern the pattern string
1818     options the PCRE options
1819     filename the file name, or NULL for a command-line pattern
1820     count 0 if this is the only command line pattern, or
1821     number of the command line pattern, or
1822     linenumber for a pattern from a file
1823    
1824     Returns: TRUE on success, FALSE after an error
1825     */
1826    
1827     static BOOL
1828     compile_pattern(char *pattern, int options, char *filename, int count)
1829     {
1830     if ((process_options & PO_FIXED_STRINGS) != 0)
1831     {
1832 nigel 93 char *eop = pattern + strlen(pattern);
1833 nigel 87 char buffer[MBUFTHIRD];
1834     for(;;)
1835     {
1836 nigel 93 int ellength;
1837     char *p = end_of_line(pattern, eop, &ellength);
1838     if (ellength == 0)
1839 nigel 87 return compile_single_pattern(pattern, options, filename, count);
1840 ph10 151 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1841 nigel 93 pattern = p;
1842 nigel 87 if (!compile_single_pattern(buffer, options, filename, count))
1843     return FALSE;
1844     }
1845     }
1846     else return compile_single_pattern(pattern, options, filename, count);
1847     }
1848    
1849    
1850    
1851     /*************************************************
1852 nigel 49 * Main program *
1853     *************************************************/
1854    
1855 nigel 77 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1856    
1857 nigel 49 int
1858     main(int argc, char **argv)
1859     {
1860 nigel 53 int i, j;
1861 nigel 49 int rc = 1;
1862 nigel 87 int pcre_options = 0;
1863     int cmd_pattern_count = 0;
1864 ph10 141 int hint_count = 0;
1865 nigel 49 int errptr;
1866 nigel 87 BOOL only_one_at_top;
1867     char *patterns[MAX_PATTERN_COUNT];
1868     const char *locale_from = "--locale";
1869 nigel 49 const char *error;
1870    
1871 nigel 93 /* Set the default line ending value from the default in the PCRE library;
1872     "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1873     */
1874 nigel 91
1875     (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1876     switch(i)
1877     {
1878     default: newline = (char *)"lf"; break;
1879     case '\r': newline = (char *)"cr"; break;
1880     case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1881 nigel 93 case -1: newline = (char *)"any"; break;
1882 ph10 150 case -2: newline = (char *)"anycrlf"; break;
1883 nigel 91 }
1884    
1885 nigel 49 /* Process the options */
1886    
1887     for (i = 1; i < argc; i++)
1888     {
1889 nigel 77 option_item *op = NULL;
1890     char *option_data = (char *)""; /* default to keep compiler happy */
1891     BOOL longop;
1892     BOOL longopwasequals = FALSE;
1893    
1894 nigel 49 if (argv[i][0] != '-') break;
1895 nigel 53
1896 nigel 77 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1897 nigel 87 but only if we have previously had -e or -f to define the patterns. */
1898 nigel 63
1899 nigel 77 if (argv[i][1] == 0)
1900     {
1901 nigel 87 if (pattern_filename != NULL || pattern_count > 0) break;
1902 nigel 77 else exit(usage(2));
1903     }
1904 nigel 63
1905 nigel 77 /* Handle a long name option, or -- to terminate the options */
1906 nigel 53
1907     if (argv[i][1] == '-')
1908 nigel 49 {
1909 nigel 77 char *arg = argv[i] + 2;
1910     char *argequals = strchr(arg, '=');
1911 nigel 53
1912 nigel 77 if (*arg == 0) /* -- terminates options */
1913 nigel 49 {
1914 nigel 77 i++;
1915     break; /* out of the options-handling loop */
1916 nigel 53 }
1917 nigel 49
1918 nigel 77 longop = TRUE;
1919    
1920     /* Some long options have data that follows after =, for example file=name.
1921     Some options have variations in the long name spelling: specifically, we
1922     allow "regexp" because GNU grep allows it, though I personally go along
1923 nigel 87 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1924     These options are entered in the table as "regex(p)". No option is in both
1925     these categories, fortunately. */
1926 nigel 77
1927 nigel 53 for (op = optionlist; op->one_char != 0; op++)
1928     {
1929 nigel 77 char *opbra = strchr(op->long_name, '(');
1930     char *equals = strchr(op->long_name, '=');
1931     if (opbra == NULL) /* Not a (p) case */
1932 nigel 53 {
1933 nigel 77 if (equals == NULL) /* Not thing=data case */
1934     {
1935     if (strcmp(arg, op->long_name) == 0) break;
1936     }
1937     else /* Special case xxx=data */
1938     {
1939     int oplen = equals - op->long_name;
1940 ph10 199 int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1941 nigel 77 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1942     {
1943     option_data = arg + arglen;
1944     if (*option_data == '=')
1945     {
1946     option_data++;
1947     longopwasequals = TRUE;
1948     }
1949     break;
1950     }
1951     }
1952 nigel 53 }
1953 nigel 77 else /* Special case xxxx(p) */
1954     {
1955     char buff1[24];
1956     char buff2[24];
1957     int baselen = opbra - op->long_name;
1958     sprintf(buff1, "%.*s", baselen, op->long_name);
1959 ph10 152 sprintf(buff2, "%s%.*s", buff1,
1960 ph10 151 (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1961 nigel 77 if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1962     break;
1963     }
1964 nigel 53 }
1965 nigel 77
1966 nigel 53 if (op->one_char == 0)
1967     {
1968     fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1969     exit(usage(2));
1970     }
1971     }
1972 nigel 49
1973 nigel 89
1974     /* Jeffrey Friedl's debugging harness uses these additional options which
1975     are not in the right form for putting in the option table because they use
1976     only one hyphen, yet are more than one character long. By putting them
1977     separately here, they will not get displayed as part of the help() output,
1978     but I don't think Jeffrey will care about that. */
1979    
1980     #ifdef JFRIEDL_DEBUG
1981     else if (strcmp(argv[i], "-pre") == 0) {
1982     jfriedl_prefix = argv[++i];
1983     continue;
1984     } else if (strcmp(argv[i], "-post") == 0) {
1985     jfriedl_postfix = argv[++i];
1986     continue;
1987     } else if (strcmp(argv[i], "-XT") == 0) {
1988     sscanf(argv[++i], "%d", &jfriedl_XT);
1989     continue;
1990     } else if (strcmp(argv[i], "-XR") == 0) {
1991     sscanf(argv[++i], "%d", &jfriedl_XR);
1992     continue;
1993     }
1994     #endif
1995    
1996    
1997 nigel 77 /* One-char options; many that have no data may be in a single argument; we
1998     continue till we hit the last one or one that needs data. */
1999 nigel 53
2000     else
2001     {
2002     char *s = argv[i] + 1;
2003 nigel 77 longop = FALSE;
2004 nigel 53 while (*s != 0)
2005     {
2006 nigel 77 for (op = optionlist; op->one_char != 0; op++)
2007     { if (*s == op->one_char) break; }
2008     if (op->one_char == 0)
2009 nigel 53 {
2010 nigel 77 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2011     *s, argv[i]);
2012     exit(usage(2));
2013     }
2014     if (op->type != OP_NODATA || s[1] == 0)
2015     {
2016     option_data = s+1;
2017 nigel 53 break;
2018     }
2019 nigel 87 pcre_options = handle_option(*s++, pcre_options);
2020 nigel 49 }
2021     }
2022 nigel 77
2023 nigel 87 /* At this point we should have op pointing to a matched option. If the type
2024     is NO_DATA, it means that there is no data, and the option might set
2025     something in the PCRE options. */
2026 nigel 77
2027     if (op->type == OP_NODATA)
2028     {
2029 nigel 87 pcre_options = handle_option(op->one_char, pcre_options);
2030     continue;
2031     }
2032    
2033     /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2034     either has a value or defaults to something. It cannot have data in a
2035     separate item. At the moment, the only such options are "colo(u)r" and
2036 nigel 89 Jeffrey Friedl's special -S debugging option. */
2037 nigel 87
2038     if (*option_data == 0 &&
2039     (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2040     {
2041     switch (op->one_char)
2042 nigel 77 {
2043 nigel 87 case N_COLOUR:
2044     colour_option = (char *)"auto";
2045     break;
2046     #ifdef JFRIEDL_DEBUG
2047     case 'S':
2048     S_arg = 0;
2049     break;
2050     #endif
2051 nigel 77 }
2052 nigel 87 continue;
2053     }
2054 nigel 77
2055 nigel 87 /* Otherwise, find the data string for the option. */
2056    
2057     if (*option_data == 0)
2058     {
2059     if (i >= argc - 1 || longopwasequals)
2060 nigel 77 {
2061 nigel 87 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2062     exit(usage(2));
2063     }
2064     option_data = argv[++i];
2065     }
2066    
2067     /* If the option type is OP_PATLIST, it's the -e option, which can be called
2068     multiple times to create a list of patterns. */
2069    
2070     if (op->type == OP_PATLIST)
2071     {
2072     if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2073     {
2074     fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2075     MAX_PATTERN_COUNT);
2076     return 2;
2077     }
2078     patterns[cmd_pattern_count++] = option_data;
2079     }
2080    
2081     /* Otherwise, deal with single string or numeric data values. */
2082    
2083     else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2084     {
2085     *((char **)op->dataptr) = option_data;
2086     }
2087     else
2088     {
2089     char *endptr;
2090     int n = strtoul(option_data, &endptr, 10);
2091     if (*endptr != 0)
2092     {
2093     if (longop)
2094 nigel 77 {
2095 nigel 87 char *equals = strchr(op->long_name, '=');
2096     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2097     equals - op->long_name;
2098     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2099     option_data, nlen, op->long_name);
2100 nigel 77 }
2101 nigel 87 else
2102     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2103     option_data, op->one_char);
2104     exit(usage(2));
2105 nigel 77 }
2106 nigel 87 *((int *)op->dataptr) = n;
2107 nigel 77 }
2108 nigel 49 }
2109    
2110 nigel 77 /* Options have been decoded. If -C was used, its value is used as a default
2111     for -A and -B. */
2112    
2113     if (both_context > 0)
2114     {
2115     if (after_context == 0) after_context = both_context;
2116     if (before_context == 0) before_context = both_context;
2117     }
2118 ph10 286
2119     /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2120 ph10 280 However, the latter two set the only_matching flag. */
2121 nigel 77
2122 ph10 280 if ((only_matching && (file_offsets || line_offsets)) ||
2123 ph10 286 (file_offsets && line_offsets))
2124 ph10 280 {
2125     fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2126     "and/or --line-offsets\n");
2127     exit(usage(2));
2128     }
2129    
2130 ph10 286 if (file_offsets || line_offsets) only_matching = TRUE;
2131    
2132 nigel 87 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2133     LC_ALL environment variable is set, and if so, use it. */
2134 nigel 49
2135 nigel 87 if (locale == NULL)
2136 nigel 53 {
2137 nigel 87 locale = getenv("LC_ALL");
2138     locale_from = "LCC_ALL";
2139 nigel 53 }
2140 nigel 49
2141 nigel 87 if (locale == NULL)
2142     {
2143     locale = getenv("LC_CTYPE");
2144     locale_from = "LC_CTYPE";
2145     }
2146 nigel 49
2147 nigel 87 /* If a locale has been provided, set it, and generate the tables the PCRE
2148     needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2149    
2150     if (locale != NULL)
2151 nigel 49 {
2152 nigel 87 if (setlocale(LC_CTYPE, locale) == NULL)
2153 nigel 53 {
2154 nigel 87 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2155     locale, locale_from);
2156 nigel 53 return 2;
2157     }
2158 nigel 87 pcretables = pcre_maketables();
2159     }
2160 nigel 77
2161 nigel 87 /* Sort out colouring */
2162    
2163     if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2164     {
2165     if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2166     else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2167     else
2168 nigel 53 {
2169 nigel 87 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2170     colour_option);
2171     return 2;
2172 nigel 77 }
2173 nigel 87 if (do_colour)
2174 nigel 77 {
2175 nigel 87 char *cs = getenv("PCREGREP_COLOUR");
2176     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2177     if (cs != NULL) colour_string = cs;
2178 nigel 77 }
2179 nigel 87 }
2180 nigel 77
2181 nigel 91 /* Interpret the newline type; the default settings are Unix-like. */
2182    
2183     if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2184     {
2185     pcre_options |= PCRE_NEWLINE_CR;
2186 nigel 93 endlinetype = EL_CR;
2187 nigel 91 }
2188     else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2189     {
2190     pcre_options |= PCRE_NEWLINE_LF;
2191 nigel 93 endlinetype = EL_LF;
2192 nigel 91 }
2193     else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2194     {
2195     pcre_options |= PCRE_NEWLINE_CRLF;
2196 nigel 93 endlinetype = EL_CRLF;
2197 nigel 91 }
2198 nigel 93 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2199     {
2200     pcre_options |= PCRE_NEWLINE_ANY;
2201     endlinetype = EL_ANY;
2202     }
2203 ph10 149 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2204     {
2205     pcre_options |= PCRE_NEWLINE_ANYCRLF;
2206     endlinetype = EL_ANYCRLF;
2207     }
2208 nigel 91 else
2209     {
2210     fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2211     return 2;
2212     }
2213    
2214 nigel 87 /* Interpret the text values for -d and -D */
2215    
2216     if (dee_option != NULL)
2217     {
2218     if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2219     else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2220     else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2221     else
2222 nigel 77 {
2223 nigel 87 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2224     return 2;
2225 nigel 53 }
2226 nigel 49 }
2227    
2228 nigel 87 if (DEE_option != NULL)
2229     {
2230     if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2231     else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2232     else
2233     {
2234     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2235     return 2;
2236     }
2237     }
2238 nigel 49
2239 nigel 89 /* Check the values for Jeffrey Friedl's debugging options. */
2240 nigel 87
2241     #ifdef JFRIEDL_DEBUG
2242     if (S_arg > 9)
2243 nigel 49 {
2244 nigel 87 fprintf(stderr, "pcregrep: bad value for -S option\n");
2245     return 2;
2246     }
2247 nigel 89 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2248     {
2249     if (jfriedl_XT == 0) jfriedl_XT = 1;
2250     if (jfriedl_XR == 0) jfriedl_XR = 1;
2251     }
2252 nigel 87 #endif
2253 nigel 77
2254 nigel 87 /* Get memory to store the pattern and hints lists. */
2255    
2256     pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2257     hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2258    
2259     if (pattern_list == NULL || hints_list == NULL)
2260     {
2261     fprintf(stderr, "pcregrep: malloc failed\n");
2262 ph10 123 goto EXIT2;
2263 nigel 87 }
2264    
2265     /* If no patterns were provided by -e, and there is no file provided by -f,
2266     the first argument is the one and only pattern, and it must exist. */
2267    
2268     if (cmd_pattern_count == 0 && pattern_filename == NULL)
2269     {
2270 nigel 63 if (i >= argc) return usage(2);
2271 nigel 87 patterns[cmd_pattern_count++] = argv[i++];
2272     }
2273 nigel 77
2274 nigel 87 /* Compile the patterns that were provided on the command line, either by
2275     multiple uses of -e or as a single unkeyed pattern. */
2276    
2277     for (j = 0; j < cmd_pattern_count; j++)
2278     {
2279     if (!compile_pattern(patterns[j], pcre_options, NULL,
2280     (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2281 ph10 123 goto EXIT2;
2282 nigel 87 }
2283    
2284     /* Compile the regular expressions that are provided in a file. */
2285    
2286     if (pattern_filename != NULL)
2287     {
2288     int linenumber = 0;
2289     FILE *f;
2290     char *filename;
2291     char buffer[MBUFTHIRD];
2292    
2293     if (strcmp(pattern_filename, "-") == 0)
2294 nigel 77 {
2295 nigel 87 f = stdin;
2296     filename = stdin_name;
2297 nigel 77 }
2298 nigel 87 else
2299 nigel 77 {
2300 nigel 87 f = fopen(pattern_filename, "r");
2301     if (f == NULL)
2302     {
2303     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2304     strerror(errno));
2305 ph10 123 goto EXIT2;
2306 nigel 87 }
2307     filename = pattern_filename;
2308 nigel 77 }
2309    
2310 nigel 87 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2311 nigel 53 {
2312 nigel 87 char *s = buffer + (int)strlen(buffer);
2313     while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2314     *s = 0;
2315     linenumber++;
2316     if (buffer[0] == 0) continue; /* Skip blank lines */
2317     if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2318 ph10 121 goto EXIT2;
2319 nigel 53 }
2320 nigel 87
2321     if (f != stdin) fclose(f);
2322 nigel 49 }
2323    
2324 nigel 77 /* Study the regular expressions, as we will be running them many times */
2325 nigel 53
2326     for (j = 0; j < pattern_count; j++)
2327     {
2328     hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2329     if (error != NULL)
2330     {
2331     char s[16];
2332     if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2333     fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2334 ph10 121 goto EXIT2;
2335 nigel 53 }
2336 ph10 142 hint_count++;
2337 nigel 53 }
2338    
2339 nigel 77 /* If there are include or exclude patterns, compile them. */
2340    
2341     if (exclude_pattern != NULL)
2342     {
2343 nigel 87 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2344     pcretables);
2345 nigel 77 if (exclude_compiled == NULL)
2346     {
2347     fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2348     errptr, error);
2349 ph10 121 goto EXIT2;
2350 nigel 77 }
2351     }
2352    
2353     if (include_pattern != NULL)
2354     {
2355 nigel 87 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2356     pcretables);
2357 nigel 77 if (include_compiled == NULL)
2358     {
2359     fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2360     errptr, error);
2361 ph10 121 goto EXIT2;
2362 nigel 77 }
2363     }
2364    
2365 ph10 325 if (exclude_dir_pattern != NULL)
2366     {
2367     exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2368     pcretables);
2369     if (exclude_dir_compiled == NULL)
2370     {
2371     fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2372     errptr, error);
2373     goto EXIT2;
2374     }
2375     }
2376    
2377     if (include_dir_pattern != NULL)
2378     {
2379     include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2380     pcretables);
2381     if (include_dir_compiled == NULL)
2382     {
2383     fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2384     errptr, error);
2385     goto EXIT2;
2386     }
2387     }
2388    
2389 nigel 87 /* If there are no further arguments, do the business on stdin and exit. */
2390 nigel 49
2391 nigel 87 if (i >= argc)
2392 ph10 121 {
2393 ph10 286 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2394 ph10 121 goto EXIT;
2395 ph10 123 }
2396 nigel 49
2397 nigel 53 /* Otherwise, work through the remaining arguments as files or directories.
2398     Pass in the fact that there is only one argument at top level - this suppresses
2399 nigel 87 the file name if the argument is not a directory and filenames are not
2400     otherwise forced. */
2401 nigel 49
2402 nigel 87 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2403 nigel 49
2404     for (; i < argc; i++)
2405     {
2406 nigel 87 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2407     only_one_at_top);
2408 nigel 77 if (frc > 1) rc = frc;
2409     else if (frc == 0 && rc == 1) rc = 0;
2410 nigel 49 }
2411    
2412 ph10 121 EXIT:
2413     if (pattern_list != NULL)
2414     {
2415 ph10 123 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2416 ph10 121 free(pattern_list);
2417 ph10 123 }
2418 ph10 121 if (hints_list != NULL)
2419     {
2420 ph10 141 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2421 ph10 121 free(hints_list);
2422 ph10 123 }
2423 nigel 49 return rc;
2424 ph10 121
2425     EXIT2:
2426     rc = 2;
2427     goto EXIT;
2428 nigel 49 }
2429    
2430 nigel 77 /* End of pcregrep */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12