/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2006 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #include <ctype.h>
41    #include <locale.h>
42  #include <stdio.h>  #include <stdio.h>
43  #include <string.h>  #include <string.h>
44  #include <stdlib.h>  #include <stdlib.h>
45  #include <errno.h>  #include <errno.h>
46    
47    #include <sys/types.h>
48    #include <sys/stat.h>
49    #include <unistd.h>
50    
51  #include "config.h"  #include "config.h"
52  #include "pcre.h"  #include "pcre.h"
53    
# Line 17  its pattern matching. */ Line 56  its pattern matching. */
56    
57  typedef int BOOL;  typedef int BOOL;
58    
59    #define VERSION "4.3 01-Jun-2006"
60    #define MAX_PATTERN_COUNT 100
61    
62    #if BUFSIZ > 8192
63    #define MBUFTHIRD BUFSIZ
64    #else
65    #define MBUFTHIRD 8192
66    #endif
67    
68    
69    /* Values for the "filenames" variable, which specifies options for file name
70    output. The order is important; it is assumed that a file name is wanted for
71    all values greater than FN_DEFAULT. */
72    
73    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
74    
75    /* Actions for the -d and -D options */
76    
77    enum { dee_READ, dee_SKIP, dee_RECURSE };
78    enum { DEE_READ, DEE_SKIP };
79    
80    /* Actions for special processing options (flag bits) */
81    
82    #define PO_WORD_MATCH     0x0001
83    #define PO_LINE_MATCH     0x0002
84    #define PO_FIXED_STRINGS  0x0004
85    
86    
87    
88  /*************************************************  /*************************************************
89  *               Global variables                 *  *               Global variables                 *
90  *************************************************/  *************************************************/
91    
92  static pcre *pattern;  /* Jeffrey Friedl has some debugging requirements that are not part of the
93  static pcre_extra *hints;  regular code. */
94    
95    #ifdef JFRIEDL_DEBUG
96    static int S_arg = -1;
97    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
98    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
99    static const char *jfriedl_prefix = "";
100    static const char *jfriedl_postfix = "";
101    #endif
102    
103    static int  endlinebyte = '\n';     /* Last byte of endline sequence */
104    static int  endlineextra = 0;       /* Extra bytes for endline sequence */
105    
106    static char *colour_string = (char *)"1;31";
107    static char *colour_option = NULL;
108    static char *dee_option = NULL;
109    static char *DEE_option = NULL;
110    static char *newline = NULL;
111    static char *pattern_filename = NULL;
112    static char *stdin_name = (char *)"(standard input)";
113    static char *locale = NULL;
114    
115    static const unsigned char *pcretables = NULL;
116    
117    static int  pattern_count = 0;
118    static pcre **pattern_list;
119    static pcre_extra **hints_list;
120    
121    static char *include_pattern = NULL;
122    static char *exclude_pattern = NULL;
123    
124    static pcre *include_compiled = NULL;
125    static pcre *exclude_compiled = NULL;
126    
127    static int after_context = 0;
128    static int before_context = 0;
129    static int both_context = 0;
130    static int dee_action = dee_READ;
131    static int DEE_action = DEE_READ;
132    static int error_count = 0;
133    static int filenames = FN_DEFAULT;
134    static int process_options = 0;
135    
136  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
137  static BOOL filenames_only = FALSE;  static BOOL do_colour = FALSE;
138    static BOOL hyphenpending = FALSE;
139  static BOOL invert = FALSE;  static BOOL invert = FALSE;
140    static BOOL multiline = FALSE;
141  static BOOL number = FALSE;  static BOOL number = FALSE;
142    static BOOL only_matching = FALSE;
143    static BOOL quiet = FALSE;
144  static BOOL silent = FALSE;  static BOOL silent = FALSE;
145  static BOOL whole_lines = FALSE;  
146    /* Structure for options and list of them */
147    
148    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
149           OP_PATLIST };
150    
151    typedef struct option_item {
152      int type;
153      int one_char;
154      void *dataptr;
155      const char *long_name;
156      const char *help_text;
157    } option_item;
158    
159    /* Options without a single-letter equivalent get a negative value. This can be
160    used to identify them. */
161    
162    #define N_COLOUR    (-1)
163    #define N_EXCLUDE   (-2)
164    #define N_HELP      (-3)
165    #define N_INCLUDE   (-4)
166    #define N_LABEL     (-5)
167    #define N_LOCALE    (-6)
168    #define N_NULL      (-7)
169    
170    static option_item optionlist[] = {
171      { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
172      { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
173      { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
174      { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
175      { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
176      { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
177      { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
178      { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
179      { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
180      { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
181      { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
182      { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
183      { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
184      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
185      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
186      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
187      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
188      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
189      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
190      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
191      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
192      { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },
193      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
194      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
195      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
196      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
197      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
198      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
199    #ifdef JFRIEDL_DEBUG
200      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
201    #endif
202      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
203      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
204      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
205      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
206      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
207      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
208      { OP_NODATA,    0,        NULL,               NULL,            NULL }
209    };
210    
211    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
212    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
213    that the combination of -w and -x has the same effect as -x on its own, so we
214    can treat them as the same. */
215    
216    static const char *prefix[] = {
217      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
218    
219    static const char *suffix[] = {
220      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
221    
222    
223    
224    /*************************************************
225    *            OS-specific functions               *
226    *************************************************/
227    
228    /* These functions are defined so that they can be made system specific,
229    although at present the only ones are for Unix, Win32, and for "no support". */
230    
231    
232    /************* Directory scanning in Unix ***********/
233    
234    #if IS_UNIX
235    #include <sys/types.h>
236    #include <sys/stat.h>
237    #include <dirent.h>
238    
239    typedef DIR directory_type;
240    
241    static int
242    isdirectory(char *filename)
243    {
244    struct stat statbuf;
245    if (stat(filename, &statbuf) < 0)
246      return 0;        /* In the expectation that opening as a file will fail */
247    return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
248    }
249    
250    static directory_type *
251    opendirectory(char *filename)
252    {
253    return opendir(filename);
254    }
255    
256    static char *
257    readdirectory(directory_type *dir)
258    {
259    for (;;)
260      {
261      struct dirent *dent = readdir(dir);
262      if (dent == NULL) return NULL;
263      if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
264        return dent->d_name;
265      }
266    return NULL;   /* Keep compiler happy; never executed */
267    }
268    
269    static void
270    closedirectory(directory_type *dir)
271    {
272    closedir(dir);
273    }
274    
275    
276    /************* Test for regular file in Unix **********/
277    
278    static int
279    isregfile(char *filename)
280    {
281    struct stat statbuf;
282    if (stat(filename, &statbuf) < 0)
283      return 1;        /* In the expectation that opening as a file will fail */
284    return (statbuf.st_mode & S_IFMT) == S_IFREG;
285    }
286    
287    
288    /************* Test stdout for being a terminal in Unix **********/
289    
290    static BOOL
291    is_stdout_tty(void)
292    {
293    return isatty(fileno(stdout));
294    }
295    
296    
297    /************* Directory scanning in Win32 ***********/
298    
299    /* I (Philip Hazel) have no means of testing this code. It was contributed by
300    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
301    when it did not exist. */
302    
303    
304    #elif HAVE_WIN32API
305    
306    #ifndef STRICT
307    # define STRICT
308    #endif
309    #ifndef WIN32_LEAN_AND_MEAN
310    # define WIN32_LEAN_AND_MEAN
311    #endif
312    #ifndef INVALID_FILE_ATTRIBUTES
313    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
314    #endif
315    
316    #include <windows.h>
317    
318    typedef struct directory_type
319    {
320    HANDLE handle;
321    BOOL first;
322    WIN32_FIND_DATA data;
323    } directory_type;
324    
325    int
326    isdirectory(char *filename)
327    {
328    DWORD attr = GetFileAttributes(filename);
329    if (attr == INVALID_FILE_ATTRIBUTES)
330      return 0;
331    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
332    }
333    
334    directory_type *
335    opendirectory(char *filename)
336    {
337    size_t len;
338    char *pattern;
339    directory_type *dir;
340    DWORD err;
341    len = strlen(filename);
342    pattern = (char *) malloc(len + 3);
343    dir = (directory_type *) malloc(sizeof(*dir));
344    if ((pattern == NULL) || (dir == NULL))
345      {
346      fprintf(stderr, "pcregrep: malloc failed\n");
347      exit(2);
348      }
349    memcpy(pattern, filename, len);
350    memcpy(&(pattern[len]), "\\*", 3);
351    dir->handle = FindFirstFile(pattern, &(dir->data));
352    if (dir->handle != INVALID_HANDLE_VALUE)
353      {
354      free(pattern);
355      dir->first = TRUE;
356      return dir;
357      }
358    err = GetLastError();
359    free(pattern);
360    free(dir);
361    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
362    return NULL;
363    }
364    
365    char *
366    readdirectory(directory_type *dir)
367    {
368    for (;;)
369      {
370      if (!dir->first)
371        {
372        if (!FindNextFile(dir->handle, &(dir->data)))
373          return NULL;
374        }
375      else
376        {
377        dir->first = FALSE;
378        }
379      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
380        return dir->data.cFileName;
381      }
382    #ifndef _MSC_VER
383    return NULL;   /* Keep compiler happy; never executed */
384    #endif
385    }
386    
387    void
388    closedirectory(directory_type *dir)
389    {
390    FindClose(dir->handle);
391    free(dir);
392    }
393    
394    
395    /************* Test for regular file in Win32 **********/
396    
397    /* I don't know how to do this, or if it can be done; assume all paths are
398    regular if they are not directories. */
399    
400    int isregfile(char *filename)
401    {
402    return !isdirectory(filename)
403    }
404    
405    
406    /************* Test stdout for being a terminal in Win32 **********/
407    
408    /* I don't know how to do this; assume never */
409    
410    static BOOL
411    is_stdout_tty(void)
412    {
413    FALSE;
414    }
415    
416    
417    /************* Directory scanning when we can't do it ***********/
418    
419    /* The type is void, and apart from isdirectory(), the functions do nothing. */
420    
421    #else
422    
423    typedef void directory_type;
424    
425    int isdirectory(char *filename) { return 0; }
426    directory_type * opendirectory(char *filename) {}
427    char *readdirectory(directory_type *dir) {}
428    void closedirectory(directory_type *dir) {}
429    
430    
431    /************* Test for regular when we can't do it **********/
432    
433    /* Assume all files are regular. */
434    
435    int isregfile(char *filename) { return 1; }
436    
437    
438    /************* Test stdout for being a terminal when we can't do it **********/
439    
440    static BOOL
441    is_stdout_tty(void)
442    {
443    return FALSE;
444    }
445    
446    
447    #endif
448    
449    
450    
# Line 58  return sys_errlist[n]; Line 471  return sys_errlist[n];
471    
472    
473  /*************************************************  /*************************************************
474  *              Grep an individual file           *  *       Print the previous "after" lines         *
475  *************************************************/  *************************************************/
476    
477    /* This is called if we are about to lose said lines because of buffer filling,
478    and at the end of the file. The data in the line is written using fwrite() so
479    that a binary zero does not terminate it.
480    
481    Arguments:
482      lastmatchnumber   the number of the last matching line, plus one
483      lastmatchrestart  where we restarted after the last match
484      endptr            end of available data
485      printname         filename for printing
486    
487    Returns:            nothing
488    */
489    
490    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
491      char *endptr, char *printname)
492    {
493    if (after_context > 0 && lastmatchnumber > 0)
494      {
495      int count = 0;
496      while (lastmatchrestart < endptr && count++ < after_context)
497        {
498        char *pp = lastmatchrestart;
499        if (printname != NULL) fprintf(stdout, "%s-", printname);
500        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
501        while (*pp != endlinebyte) pp++;
502        fwrite(lastmatchrestart, 1, pp - lastmatchrestart + (1 + endlineextra),
503          stdout);
504        lastmatchrestart = pp + 1;
505        }
506      hyphenpending = TRUE;
507      }
508    }
509    
510    
511    
512    /*************************************************
513    *            Grep an individual file             *
514    *************************************************/
515    
516    /* This is called from grep_or_recurse() below. It uses a buffer that is three
517    times the value of MBUFTHIRD. The matching point is never allowed to stray into
518    the top third of the buffer, thus keeping more of the file available for
519    context printing or for multiline scanning. For large files, the pointer will
520    be in the middle third most of the time, so the bottom third is available for
521    "before" context printing.
522    
523    Arguments:
524      in           the fopened FILE stream
525      printname    the file name if it is to be printed for each match
526                   or NULL if the file name is not to be printed
527                   it cannot be NULL if filenames[_nomatch]_only is set
528    
529    Returns:       0 if there was at least one match
530                   1 otherwise (no matches)
531    */
532    
533  static int  static int
534  pcregrep(FILE *in, char *name)  pcregrep(FILE *in, char *printname)
535  {  {
536  int rc = 1;  int rc = 1;
537  int linenumber = 0;  int linenumber = 1;
538    int lastmatchnumber = 0;
539  int count = 0;  int count = 0;
540  int offsets[99];  int offsets[99];
541  char buffer[BUFSIZ];  char *lastmatchrestart = NULL;
542    char buffer[3*MBUFTHIRD];
543    char *ptr = buffer;
544    char *endptr;
545    size_t bufflength;
546    BOOL endhyphenpending = FALSE;
547    
548    /* Do the first read into the start of the buffer and set up the pointer to
549    end of what we have. */
550    
551    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
552    endptr = buffer + bufflength;
553    
554    /* Loop while the current pointer is not at the end of the file. For large
555    files, endptr will be at the end of the buffer when we are in the middle of the
556    file, but ptr will never get there, because as soon as it gets over 2/3 of the
557    way, the buffer is shifted left and re-filled. */
558    
559  while (fgets(buffer, sizeof(buffer), in) != NULL)  while (ptr < endptr)
560    {    {
561    BOOL match;    int i;
562    int length = (int)strlen(buffer);    int mrc = 0;
563    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    BOOL match = FALSE;
564    linenumber++;    char *t = ptr;
565      size_t length, linelength;
566    
567      /* At this point, ptr is at the start of a line. We need to find the length
568      of the subject string to pass to pcre_exec(). In multiline mode, it is the
569      length remainder of the data in the buffer. Otherwise, it is the length of
570      the next line. After matching, we always advance by the length of the next
571      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
572      that any match is constrained to be in the first line. */
573    
574      linelength = 0;
575      while (t < endptr && *t++ != endlinebyte) linelength++;
576      length = multiline? endptr - ptr : linelength;
577    
578    
579      /* Extra processing for Jeffrey Friedl's debugging. */
580    
581    #ifdef JFRIEDL_DEBUG
582      if (jfriedl_XT || jfriedl_XR)
583      {
584          #include <sys/time.h>
585          #include <time.h>
586          struct timeval start_time, end_time;
587          struct timezone dummy;
588    
589          if (jfriedl_XT)
590          {
591              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
592              const char *orig = ptr;
593              ptr = malloc(newlen + 1);
594              if (!ptr) {
595                      printf("out of memory");
596                      exit(2);
597              }
598              endptr = ptr;
599              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
600              for (i = 0; i < jfriedl_XT; i++) {
601                      strncpy(endptr, orig,  length);
602                      endptr += length;
603              }
604              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
605              length = newlen;
606          }
607    
608          if (gettimeofday(&start_time, &dummy) != 0)
609                  perror("bad gettimeofday");
610    
611    
612          for (i = 0; i < jfriedl_XR; i++)
613              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
614    
615          if (gettimeofday(&end_time, &dummy) != 0)
616                  perror("bad gettimeofday");
617    
618          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
619                          -
620                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
621    
622          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
623          return 0;
624      }
625    #endif
626    
627    match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0;  
628    if (match && whole_lines && offsets[1] != length) match = FALSE;    /* Run through all the patterns until one matches. Note that we don't include
629      the final newline in the subject string. */
630    
631      for (i = 0; i < pattern_count; i++)
632        {
633        mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
634          offsets, 99);
635        if (mrc >= 0) { match = TRUE; break; }
636        if (mrc != PCRE_ERROR_NOMATCH)
637          {
638          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
639          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
640          fprintf(stderr, "this line:\n");
641          fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */
642          fprintf(stderr, "\n");
643          if (error_count == 0 &&
644              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
645            {
646            fprintf(stderr, "pcregrep: error %d means that a resource limit "
647              "was exceeded\n", mrc);
648            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
649            }
650          if (error_count++ > 20)
651            {
652            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
653            exit(2);
654            }
655          match = invert;    /* No more matching; don't show the line again */
656          break;
657          }
658        }
659    
660      /* If it's a match or a not-match (as required), do what's wanted. */
661    
662    if (match != invert)    if (match != invert)
663      {      {
664        BOOL hyphenprinted = FALSE;
665    
666        /* We've failed if we want a file that doesn't have any matches. */
667    
668        if (filenames == FN_NOMATCH_ONLY) return 1;
669    
670        /* Just count if just counting is wanted. */
671    
672      if (count_only) count++;      if (count_only) count++;
673    
674      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
675        in the file. */
676    
677        else if (filenames == FN_ONLY)
678        {        {
679        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        fprintf(stdout, "%s\n", printname);
680        return 0;        return 0;
681        }        }
682    
683      else if (silent) return 0;      /* Likewise, if all we want is a yes/no answer. */
684    
685        else if (quiet) return 0;
686    
687        /* The --only-matching option prints just the substring that matched, and
688        does not pring any context. */
689    
690        else if (only_matching)
691          {
692          if (printname != NULL) fprintf(stdout, "%s:", printname);
693          if (number) fprintf(stdout, "%d:", linenumber);
694          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
695          fprintf(stdout, "\n");
696          }
697    
698        /* This is the default case when none of the above options is set. We print
699        the matching lines(s), possibly preceded and/or followed by other lines of
700        context. */
701    
702      else      else
703        {        {
704        if (name != NULL) fprintf(stdout, "%s:", name);        /* See if there is a requirement to print some "after" lines from a
705          previous match. We never print any overlaps. */
706    
707          if (after_context > 0 && lastmatchnumber > 0)
708            {
709            int linecount = 0;
710            char *p = lastmatchrestart;
711    
712            while (p < ptr && linecount < after_context)
713              {
714              while (*p != endlinebyte) p++;
715              p++;
716              linecount++;
717              }
718    
719            /* It is important to advance lastmatchrestart during this printing so
720            that it interacts correctly with any "before" printing below. Print
721            each line's data using fwrite() in case there are binary zeroes. */
722    
723            while (lastmatchrestart < p)
724              {
725              char *pp = lastmatchrestart;
726              if (printname != NULL) fprintf(stdout, "%s-", printname);
727              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
728              while (*pp != endlinebyte) pp++;
729              fwrite(lastmatchrestart, 1, pp - lastmatchrestart +
730                (1 + endlineextra), stdout);
731              lastmatchrestart = pp + 1;
732              }
733            if (lastmatchrestart != ptr) hyphenpending = TRUE;
734            }
735    
736          /* If there were non-contiguous lines printed above, insert hyphens. */
737    
738          if (hyphenpending)
739            {
740            fprintf(stdout, "--\n");
741            hyphenpending = FALSE;
742            hyphenprinted = TRUE;
743            }
744    
745          /* See if there is a requirement to print some "before" lines for this
746          match. Again, don't print overlaps. */
747    
748          if (before_context > 0)
749            {
750            int linecount = 0;
751            char *p = ptr;
752    
753            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
754                   linecount < before_context)
755              {
756              linecount++;
757              p--;
758              while (p > buffer && p[-1] != endlinebyte) p--;
759              }
760    
761            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
762              fprintf(stdout, "--\n");
763    
764            while (p < ptr)
765              {
766              char *pp = p;
767              if (printname != NULL) fprintf(stdout, "%s-", printname);
768              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
769              while (*pp != endlinebyte) pp++;
770              fwrite(p, 1, pp - p + (1 + endlineextra), stdout);
771              p = pp + 1;
772              }
773            }
774    
775          /* Now print the matching line(s); ensure we set hyphenpending at the end
776          of the file if any context lines are being output. */
777    
778          if (after_context > 0 || before_context > 0)
779            endhyphenpending = TRUE;
780    
781          if (printname != NULL) fprintf(stdout, "%s:", printname);
782        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
783        fprintf(stdout, "%s\n", buffer);  
784          /* In multiline mode, we want to print to the end of the line in which
785          the end of the matched string is found, so we adjust linelength and the
786          line number appropriately. Because the PCRE_FIRSTLINE option is set, the
787          start of the match will always be before the first newline sequence. */
788    
789          if (multiline)
790            {
791            char *endmatch = ptr + offsets[1];
792            t = ptr;
793            while (t < endmatch) { if (*t++ == endlinebyte) linenumber++; }
794            while (endmatch < endptr && *endmatch != endlinebyte) endmatch++;
795            linelength = endmatch - ptr;
796            }
797    
798          /*** NOTE: Use only fwrite() to output the data line, so that binary
799          zeroes are treated as just another data character. */
800    
801          /* This extra option, for Jeffrey Friedl's debugging requirements,
802          replaces the matched string, or a specific captured string if it exists,
803          with X. When this happens, colouring is ignored. */
804    
805    #ifdef JFRIEDL_DEBUG
806          if (S_arg >= 0 && S_arg < mrc)
807            {
808            int first = S_arg * 2;
809            int last  = first + 1;
810            fwrite(ptr, 1, offsets[first], stdout);
811            fprintf(stdout, "X");
812            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
813            }
814          else
815    #endif
816    
817          /* We have to split the line(s) up if colouring. */
818    
819          if (do_colour)
820            {
821            fwrite(ptr, 1, offsets[0], stdout);
822            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
823            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
824            fprintf(stdout, "%c[00m", 0x1b);
825            fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
826            }
827          else fwrite(ptr, 1, linelength, stdout);
828    
829          fprintf(stdout, "\n");
830          }
831    
832        /* End of doing what has to be done for a match */
833    
834        rc = 0;    /* Had some success */
835    
836        /* Remember where the last match happened for after_context. We remember
837        where we are about to restart, and that line's number. */
838    
839        lastmatchrestart = ptr + linelength + 1;
840        lastmatchnumber = linenumber + 1;
841        }
842    
843      /* Advance to after the newline and increment the line number. */
844    
845      ptr += linelength + 1;
846      linenumber++;
847    
848      /* If we haven't yet reached the end of the file (the buffer is full), and
849      the current point is in the top 1/3 of the buffer, slide the buffer down by
850      1/3 and refill it. Before we do this, if some unprinted "after" lines are
851      about to be lost, print them. */
852    
853      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
854        {
855        if (after_context > 0 &&
856            lastmatchnumber > 0 &&
857            lastmatchrestart < buffer + MBUFTHIRD)
858          {
859          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
860          lastmatchnumber = 0;
861        }        }
862    
863      rc = 0;      /* Now do the shuffle */
864    
865        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
866        ptr -= MBUFTHIRD;
867        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
868        endptr = buffer + bufflength;
869    
870        /* Adjust any last match point */
871    
872        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
873      }      }
874      }     /* Loop through the whole file */
875    
876    /* End of file; print final "after" lines if wanted; do_after_lines sets
877    hyphenpending if it prints something. */
878    
879    if (!only_matching && !count_only)
880      {
881      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
882      hyphenpending |= endhyphenpending;
883      }
884    
885    /* Print the file name if we are looking for those without matches and there
886    were none. If we found a match, we won't have got this far. */
887    
888    if (filenames == FN_NOMATCH_ONLY)
889      {
890      fprintf(stdout, "%s\n", printname);
891      return 0;
892    }    }
893    
894    /* Print the match count if wanted */
895    
896  if (count_only)  if (count_only)
897    {    {
898    if (name != NULL) fprintf(stdout, "%s:", name);    if (printname != NULL) fprintf(stdout, "%s:", printname);
899    fprintf(stdout, "%d\n", count);    fprintf(stdout, "%d\n", count);
900    }    }
901    
# Line 114  return rc; Line 904  return rc;
904    
905    
906    
907    /*************************************************
908    *     Grep a file or recurse into a directory    *
909    *************************************************/
910    
911    /* Given a path name, if it's a directory, scan all the files if we are
912    recursing; if it's a file, grep it.
913    
914    Arguments:
915      pathname          the path to investigate
916      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
917      only_one_at_top   TRUE if the path is the only one at toplevel
918    
919    Returns:   0 if there was at least one match
920               1 if there were no matches
921               2 there was some kind of error
922    
923    However, file opening failures are suppressed if "silent" is set.
924    */
925    
926    static int
927    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
928    {
929    int rc = 1;
930    int sep;
931    FILE *in;
932    
933    /* If the file name is "-" we scan stdin */
934    
935    if (strcmp(pathname, "-") == 0)
936      {
937      return pcregrep(stdin,
938        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
939          stdin_name : NULL);
940      }
941    
942    
943    /* If the file is a directory, skip if skipping or if we are recursing, scan
944    each file within it, subject to any include or exclude patterns that were set.
945    The scanning code is localized so it can be made system-specific. */
946    
947    if ((sep = isdirectory(pathname)) != 0)
948      {
949      if (dee_action == dee_SKIP) return 1;
950      if (dee_action == dee_RECURSE)
951        {
952        char buffer[1024];
953        char *nextfile;
954        directory_type *dir = opendirectory(pathname);
955    
956        if (dir == NULL)
957          {
958          if (!silent)
959            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
960              strerror(errno));
961          return 2;
962          }
963    
964        while ((nextfile = readdirectory(dir)) != NULL)
965          {
966          int frc, blen;
967          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
968          blen = strlen(buffer);
969    
970          if (exclude_compiled != NULL &&
971              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
972            continue;
973    
974          if (include_compiled != NULL &&
975              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
976            continue;
977    
978          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
979          if (frc > 1) rc = frc;
980           else if (frc == 0 && rc == 1) rc = 0;
981          }
982    
983        closedirectory(dir);
984        return rc;
985        }
986      }
987    
988    /* If the file is not a directory and not a regular file, skip it if that's
989    been requested. */
990    
991    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
992    
993    /* Control reaches here if we have a regular file, or if we have a directory
994    and recursion or skipping was not requested, or if we have anything else and
995    skipping was not requested. The scan proceeds. If this is the first and only
996    argument at top level, we don't show the file name, unless we are only showing
997    the file name, or the filename was forced (-H). */
998    
999    in = fopen(pathname, "r");
1000    if (in == NULL)
1001      {
1002      if (!silent)
1003        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1004          strerror(errno));
1005      return 2;
1006      }
1007    
1008    rc = pcregrep(in, (filenames > FN_DEFAULT ||
1009      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1010    
1011    fclose(in);
1012    return rc;
1013    }
1014    
1015    
1016    
1017    
1018  /*************************************************  /*************************************************
1019  *                Usage function                  *  *                Usage function                  *
# Line 122  return rc; Line 1022  return rc;
1022  static int  static int
1023  usage(int rc)  usage(int rc)
1024  {  {
1025  fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n");  option_item *op;
1026    fprintf(stderr, "Usage: pcregrep [-");
1027    for (op = optionlist; op->one_char != 0; op++)
1028      {
1029      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1030      }
1031    fprintf(stderr, "] [long options] [pattern] [files]\n");
1032    fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1033  return rc;  return rc;
1034  }  }
1035    
# Line 130  return rc; Line 1037  return rc;
1037    
1038    
1039  /*************************************************  /*************************************************
1040    *                Help function                   *
1041    *************************************************/
1042    
1043    static void
1044    help(void)
1045    {
1046    option_item *op;
1047    
1048    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1049    printf("Search for PATTERN in each FILE or standard input.\n");
1050    printf("PATTERN must be present if neither -e nor -f is used.\n");
1051    printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1052    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1053    
1054    printf("Options:\n");
1055    
1056    for (op = optionlist; op->one_char != 0; op++)
1057      {
1058      int n;
1059      char s[4];
1060      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1061      printf("  %s --%s%n", s, op->long_name, &n);
1062      n = 30 - n;
1063      if (n < 1) n = 1;
1064      printf("%.*s%s\n", n, "                    ", op->help_text);
1065      }
1066    
1067    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1068    printf("trailing white space is removed and blank lines are ignored.\n");
1069    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1070    
1071    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1072    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1073    }
1074    
1075    
1076    
1077    
1078    /*************************************************
1079    *    Handle a single-letter, no data option      *
1080    *************************************************/
1081    
1082    static int
1083    handle_option(int letter, int options)
1084    {
1085    switch(letter)
1086      {
1087      case N_HELP: help(); exit(0);
1088      case 'c': count_only = TRUE; break;
1089      case 'F': process_options |= PO_FIXED_STRINGS; break;
1090      case 'H': filenames = FN_FORCE; break;
1091      case 'h': filenames = FN_NONE; break;
1092      case 'i': options |= PCRE_CASELESS; break;
1093      case 'l': filenames = FN_ONLY; break;
1094      case 'L': filenames = FN_NOMATCH_ONLY; break;
1095      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1096      case 'n': number = TRUE; break;
1097      case 'o': only_matching = TRUE; break;
1098      case 'q': quiet = TRUE; break;
1099      case 'r': dee_action = dee_RECURSE; break;
1100      case 's': silent = TRUE; break;
1101      case 'u': options |= PCRE_UTF8; break;
1102      case 'v': invert = TRUE; break;
1103      case 'w': process_options |= PO_WORD_MATCH; break;
1104      case 'x': process_options |= PO_LINE_MATCH; break;
1105    
1106      case 'V':
1107      fprintf(stderr, "pcregrep version %s using ", VERSION);
1108      fprintf(stderr, "PCRE version %s\n", pcre_version());
1109      exit(0);
1110      break;
1111    
1112      default:
1113      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1114      exit(usage(2));
1115      }
1116    
1117    return options;
1118    }
1119    
1120    
1121    
1122    
1123    /*************************************************
1124    *          Construct printed ordinal             *
1125    *************************************************/
1126    
1127    /* This turns a number into "1st", "3rd", etc. */
1128    
1129    static char *
1130    ordin(int n)
1131    {
1132    static char buffer[8];
1133    char *p = buffer;
1134    sprintf(p, "%d", n);
1135    while (*p != 0) p++;
1136    switch (n%10)
1137      {
1138      case 1: strcpy(p, "st"); break;
1139      case 2: strcpy(p, "nd"); break;
1140      case 3: strcpy(p, "rd"); break;
1141      default: strcpy(p, "th"); break;
1142      }
1143    return buffer;
1144    }
1145    
1146    
1147    
1148    /*************************************************
1149    *          Compile a single pattern              *
1150    *************************************************/
1151    
1152    /* When the -F option has been used, this is called for each substring.
1153    Otherwise it's called for each supplied pattern.
1154    
1155    Arguments:
1156      pattern        the pattern string
1157      options        the PCRE options
1158      filename       the file name, or NULL for a command-line pattern
1159      count          0 if this is the only command line pattern, or
1160                     number of the command line pattern, or
1161                     linenumber for a pattern from a file
1162    
1163    Returns:         TRUE on success, FALSE after an error
1164    */
1165    
1166    static BOOL
1167    compile_single_pattern(char *pattern, int options, char *filename, int count)
1168    {
1169    char buffer[MBUFTHIRD + 16];
1170    const char *error;
1171    int errptr;
1172    
1173    if (pattern_count >= MAX_PATTERN_COUNT)
1174      {
1175      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1176        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1177      return FALSE;
1178      }
1179    
1180    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1181      suffix[process_options]);
1182    pattern_list[pattern_count] =
1183      pcre_compile(buffer, options, &error, &errptr, pcretables);
1184    if (pattern_list[pattern_count++] != NULL) return TRUE;
1185    
1186    /* Handle compile errors */
1187    
1188    errptr -= (int)strlen(prefix[process_options]);
1189    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1190    
1191    if (filename == NULL)
1192      {
1193      if (count == 0)
1194        fprintf(stderr, "pcregrep: Error in command-line regex "
1195          "at offset %d: %s\n", errptr, error);
1196      else
1197        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1198          "at offset %d: %s\n", ordin(count), errptr, error);
1199      }
1200    else
1201      {
1202      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1203        "at offset %d: %s\n", count, filename, errptr, error);
1204      }
1205    
1206    return FALSE;
1207    }
1208    
1209    
1210    
1211    /*************************************************
1212    *           Compile one supplied pattern         *
1213    *************************************************/
1214    
1215    /* When the -F option has been used, each string may be a list of strings,
1216    separated by line breaks. They will be matched literally.
1217    
1218    Arguments:
1219      pattern        the pattern string
1220      options        the PCRE options
1221      filename       the file name, or NULL for a command-line pattern
1222      count          0 if this is the only command line pattern, or
1223                     number of the command line pattern, or
1224                     linenumber for a pattern from a file
1225    
1226    Returns:         TRUE on success, FALSE after an error
1227    */
1228    
1229    static BOOL
1230    compile_pattern(char *pattern, int options, char *filename, int count)
1231    {
1232    if ((process_options & PO_FIXED_STRINGS) != 0)
1233      {
1234      char buffer[MBUFTHIRD];
1235      for(;;)
1236        {
1237        char *p = strchr(pattern, endlinebyte);
1238        if (p == NULL)
1239          return compile_single_pattern(pattern, options, filename, count);
1240        sprintf(buffer, "%.*s", p - pattern - endlineextra, pattern);
1241        pattern = p + 1;
1242        if (!compile_single_pattern(buffer, options, filename, count))
1243          return FALSE;
1244        }
1245      }
1246    else return compile_single_pattern(pattern, options, filename, count);
1247    }
1248    
1249    
1250    
1251    /*************************************************
1252  *                Main program                    *  *                Main program                    *
1253  *************************************************/  *************************************************/
1254    
1255    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1256    
1257  int  int
1258  main(int argc, char **argv)  main(int argc, char **argv)
1259  {  {
1260  int i;  int i, j;
1261  int rc = 1;  int rc = 1;
1262  int options = 0;  int pcre_options = 0;
1263    int cmd_pattern_count = 0;
1264  int errptr;  int errptr;
1265    BOOL only_one_at_top;
1266    char *patterns[MAX_PATTERN_COUNT];
1267    const char *locale_from = "--locale";
1268  const char *error;  const char *error;
1269  BOOL filenames = TRUE;  
1270    /* Set the default line ending value from the default in the PCRE library. */
1271    
1272    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1273    switch(i)
1274      {
1275      default:                 newline = (char *)"lf"; break;
1276      case '\r':               newline = (char *)"cr"; break;
1277      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1278      }
1279    
1280  /* Process the options */  /* Process the options */
1281    
1282  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
1283    {    {
1284    char *s;    option_item *op = NULL;
1285      char *option_data = (char *)"";    /* default to keep compiler happy */
1286      BOOL longop;
1287      BOOL longopwasequals = FALSE;
1288    
1289    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1290    s = argv[i] + 1;  
1291    while (*s != 0)    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1292      but only if we have previously had -e or -f to define the patterns. */
1293    
1294      if (argv[i][1] == 0)
1295        {
1296        if (pattern_filename != NULL || pattern_count > 0) break;
1297          else exit(usage(2));
1298        }
1299    
1300      /* Handle a long name option, or -- to terminate the options */
1301    
1302      if (argv[i][1] == '-')
1303      {      {
1304      switch (*s++)      char *arg = argv[i] + 2;
1305        char *argequals = strchr(arg, '=');
1306    
1307        if (*arg == 0)    /* -- terminates options */
1308        {        {
1309        case 'c': count_only = TRUE; break;        i++;
1310        case 'h': filenames = FALSE; break;        break;                /* out of the options-handling loop */
1311        case 'i': options |= PCRE_CASELESS; break;        }
1312        case 'l': filenames_only = TRUE;  
1313        case 'n': number = TRUE; break;      longop = TRUE;
1314        case 's': silent = TRUE; break;  
1315        case 'v': invert = TRUE; break;      /* Some long options have data that follows after =, for example file=name.
1316        case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;      Some options have variations in the long name spelling: specifically, we
1317        allow "regexp" because GNU grep allows it, though I personally go along
1318        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1319        These options are entered in the table as "regex(p)". No option is in both
1320        these categories, fortunately. */
1321    
1322        for (op = optionlist; op->one_char != 0; op++)
1323          {
1324          char *opbra = strchr(op->long_name, '(');
1325          char *equals = strchr(op->long_name, '=');
1326          if (opbra == NULL)     /* Not a (p) case */
1327            {
1328            if (equals == NULL)  /* Not thing=data case */
1329              {
1330              if (strcmp(arg, op->long_name) == 0) break;
1331              }
1332            else                 /* Special case xxx=data */
1333              {
1334              int oplen = equals - op->long_name;
1335              int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1336              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1337                {
1338                option_data = arg + arglen;
1339                if (*option_data == '=')
1340                  {
1341                  option_data++;
1342                  longopwasequals = TRUE;
1343                  }
1344                break;
1345                }
1346              }
1347            }
1348          else                   /* Special case xxxx(p) */
1349            {
1350            char buff1[24];
1351            char buff2[24];
1352            int baselen = opbra - op->long_name;
1353            sprintf(buff1, "%.*s", baselen, op->long_name);
1354            sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1355              opbra + 1);
1356            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1357              break;
1358            }
1359          }
1360    
1361        case 'V':      if (op->one_char == 0)
1362        fprintf(stderr, "PCRE version %s\n", pcre_version());        {
1363          fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1364          exit(usage(2));
1365          }
1366        }
1367    
1368    
1369      /* Jeffrey Friedl's debugging harness uses these additional options which
1370      are not in the right form for putting in the option table because they use
1371      only one hyphen, yet are more than one character long. By putting them
1372      separately here, they will not get displayed as part of the help() output,
1373      but I don't think Jeffrey will care about that. */
1374    
1375    #ifdef JFRIEDL_DEBUG
1376      else if (strcmp(argv[i], "-pre") == 0) {
1377              jfriedl_prefix = argv[++i];
1378              continue;
1379      } else if (strcmp(argv[i], "-post") == 0) {
1380              jfriedl_postfix = argv[++i];
1381              continue;
1382      } else if (strcmp(argv[i], "-XT") == 0) {
1383              sscanf(argv[++i], "%d", &jfriedl_XT);
1384              continue;
1385      } else if (strcmp(argv[i], "-XR") == 0) {
1386              sscanf(argv[++i], "%d", &jfriedl_XR);
1387              continue;
1388      }
1389    #endif
1390    
1391    
1392      /* One-char options; many that have no data may be in a single argument; we
1393      continue till we hit the last one or one that needs data. */
1394    
1395      else
1396        {
1397        char *s = argv[i] + 1;
1398        longop = FALSE;
1399        while (*s != 0)
1400          {
1401          for (op = optionlist; op->one_char != 0; op++)
1402            { if (*s == op->one_char) break; }
1403          if (op->one_char == 0)
1404            {
1405            fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1406              *s, argv[i]);
1407            exit(usage(2));
1408            }
1409          if (op->type != OP_NODATA || s[1] == 0)
1410            {
1411            option_data = s+1;
1412            break;
1413            }
1414          pcre_options = handle_option(*s++, pcre_options);
1415          }
1416        }
1417    
1418      /* At this point we should have op pointing to a matched option. If the type
1419      is NO_DATA, it means that there is no data, and the option might set
1420      something in the PCRE options. */
1421    
1422      if (op->type == OP_NODATA)
1423        {
1424        pcre_options = handle_option(op->one_char, pcre_options);
1425        continue;
1426        }
1427    
1428      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1429      either has a value or defaults to something. It cannot have data in a
1430      separate item. At the moment, the only such options are "colo(u)r" and
1431      Jeffrey Friedl's special -S debugging option. */
1432    
1433      if (*option_data == 0 &&
1434          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1435        {
1436        switch (op->one_char)
1437          {
1438          case N_COLOUR:
1439          colour_option = (char *)"auto";
1440        break;        break;
1441    #ifdef JFRIEDL_DEBUG
1442          case 'S':
1443          S_arg = 0;
1444          break;
1445    #endif
1446          }
1447        continue;
1448        }
1449    
1450      /* Otherwise, find the data string for the option. */
1451    
1452      if (*option_data == 0)
1453        {
1454        if (i >= argc - 1 || longopwasequals)
1455          {
1456          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1457          exit(usage(2));
1458          }
1459        option_data = argv[++i];
1460        }
1461    
1462        default:    /* If the option type is OP_PATLIST, it's the -e option, which can be called
1463        fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]);    multiple times to create a list of patterns. */
1464        return usage(2);  
1465      if (op->type == OP_PATLIST)
1466        {
1467        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1468          {
1469          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1470            MAX_PATTERN_COUNT);
1471          return 2;
1472        }        }
1473        patterns[cmd_pattern_count++] = option_data;
1474      }      }
1475    
1476      /* Otherwise, deal with single string or numeric data values. */
1477    
1478      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1479        {
1480        *((char **)op->dataptr) = option_data;
1481        }
1482      else
1483        {
1484        char *endptr;
1485        int n = strtoul(option_data, &endptr, 10);
1486        if (*endptr != 0)
1487          {
1488          if (longop)
1489            {
1490            char *equals = strchr(op->long_name, '=');
1491            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1492              equals - op->long_name;
1493            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1494              option_data, nlen, op->long_name);
1495            }
1496          else
1497            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1498              option_data, op->one_char);
1499          exit(usage(2));
1500          }
1501        *((int *)op->dataptr) = n;
1502        }
1503      }
1504    
1505    /* Options have been decoded. If -C was used, its value is used as a default
1506    for -A and -B. */
1507    
1508    if (both_context > 0)
1509      {
1510      if (after_context == 0) after_context = both_context;
1511      if (before_context == 0) before_context = both_context;
1512      }
1513    
1514    /* If a locale has not been provided as an option, see if the LC_CTYPE or
1515    LC_ALL environment variable is set, and if so, use it. */
1516    
1517    if (locale == NULL)
1518      {
1519      locale = getenv("LC_ALL");
1520      locale_from = "LCC_ALL";
1521      }
1522    
1523    if (locale == NULL)
1524      {
1525      locale = getenv("LC_CTYPE");
1526      locale_from = "LC_CTYPE";
1527      }
1528    
1529    /* If a locale has been provided, set it, and generate the tables the PCRE
1530    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1531    
1532    if (locale != NULL)
1533      {
1534      if (setlocale(LC_CTYPE, locale) == NULL)
1535        {
1536        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1537          locale, locale_from);
1538        return 2;
1539        }
1540      pcretables = pcre_maketables();
1541      }
1542    
1543    /* Sort out colouring */
1544    
1545    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1546      {
1547      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1548      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1549      else
1550        {
1551        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1552          colour_option);
1553        return 2;
1554        }
1555      if (do_colour)
1556        {
1557        char *cs = getenv("PCREGREP_COLOUR");
1558        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1559        if (cs != NULL) colour_string = cs;
1560        }
1561      }
1562    
1563    /* Interpret the newline type; the default settings are Unix-like. */
1564    
1565    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1566      {
1567      pcre_options |= PCRE_NEWLINE_CR;
1568      endlinebyte = '\r';
1569      }
1570    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1571      {
1572      pcre_options |= PCRE_NEWLINE_LF;
1573      }
1574    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1575      {
1576      pcre_options |= PCRE_NEWLINE_CRLF;
1577      endlineextra = 1;
1578      }
1579    else
1580      {
1581      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1582      return 2;
1583    }    }
1584    
1585  /* There must be at least a regexp argument */  /* Interpret the text values for -d and -D */
1586    
1587  if (i >= argc) return usage(0);  if (dee_option != NULL)
1588      {
1589      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1590      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1591      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1592      else
1593        {
1594        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1595        return 2;
1596        }
1597      }
1598    
1599    if (DEE_option != NULL)
1600      {
1601      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1602      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1603      else
1604        {
1605        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1606        return 2;
1607        }
1608      }
1609    
1610  /* Compile the regular expression. */  /* Check the values for Jeffrey Friedl's debugging options. */
1611    
1612  pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL);  #ifdef JFRIEDL_DEBUG
1613  if (pattern == NULL)  if (S_arg > 9)
1614    {    {
1615    fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error);    fprintf(stderr, "pcregrep: bad value for -S option\n");
1616    return 2;    return 2;
1617    }    }
1618    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1619      {
1620      if (jfriedl_XT == 0) jfriedl_XT = 1;
1621      if (jfriedl_XR == 0) jfriedl_XR = 1;
1622      }
1623    #endif
1624    
1625    /* Get memory to store the pattern and hints lists. */
1626    
1627  /* Study the regular expression, as we will be running it may times */  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1628    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1629    
1630  hints = pcre_study(pattern, 0, &error);  if (pattern_list == NULL || hints_list == NULL)
 if (error != NULL)  
1631    {    {
1632    fprintf(stderr, "pcregrep: error while studing regex: %s\n", error);    fprintf(stderr, "pcregrep: malloc failed\n");
1633    return 2;    return 2;
1634    }    }
1635    
1636  /* If there are no further arguments, do the business on stdin and exit */  /* If no patterns were provided by -e, and there is no file provided by -f,
1637    the first argument is the one and only pattern, and it must exist. */
1638    
1639  if (i >= argc) return pcregrep(stdin, NULL);  if (cmd_pattern_count == 0 && pattern_filename == NULL)
1640      {
1641      if (i >= argc) return usage(2);
1642      patterns[cmd_pattern_count++] = argv[i++];
1643      }
1644    
1645  /* Otherwise, work through the remaining arguments as files. If there is only  /* Compile the patterns that were provided on the command line, either by
1646  one, don't give its name on the output. */  multiple uses of -e or as a single unkeyed pattern. */
1647    
1648  if (i == argc - 1) filenames = FALSE;  for (j = 0; j < cmd_pattern_count; j++)
1649  if (filenames_only) filenames = TRUE;    {
1650      if (!compile_pattern(patterns[j], pcre_options, NULL,
1651           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1652        return 2;
1653      }
1654    
1655  for (; i < argc; i++)  /* Compile the regular expressions that are provided in a file. */
1656    
1657    if (pattern_filename != NULL)
1658    {    {
1659    FILE *in = fopen(argv[i], "r");    int linenumber = 0;
1660    if (in == NULL)    FILE *f;
1661      char *filename;
1662      char buffer[MBUFTHIRD];
1663    
1664      if (strcmp(pattern_filename, "-") == 0)
1665      {      {
1666      fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno));      f = stdin;
1667      rc = 2;      filename = stdin_name;
1668      }      }
1669    else    else
1670      {      {
1671      int frc = pcregrep(in, filenames? argv[i] : NULL);      f = fopen(pattern_filename, "r");
1672      if (frc == 0 && rc == 1) rc = 0;      if (f == NULL)
1673      fclose(in);        {
1674          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1675            strerror(errno));
1676          return 2;
1677          }
1678        filename = pattern_filename;
1679      }      }
1680    
1681      while (fgets(buffer, MBUFTHIRD, f) != NULL)
1682        {
1683        char *s = buffer + (int)strlen(buffer);
1684        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1685        *s = 0;
1686        linenumber++;
1687        if (buffer[0] == 0) continue;   /* Skip blank lines */
1688        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1689          return 2;
1690        }
1691    
1692      if (f != stdin) fclose(f);
1693      }
1694    
1695    /* Study the regular expressions, as we will be running them many times */
1696    
1697    for (j = 0; j < pattern_count; j++)
1698      {
1699      hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1700      if (error != NULL)
1701        {
1702        char s[16];
1703        if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1704        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1705        return 2;
1706        }
1707      }
1708    
1709    /* If there are include or exclude patterns, compile them. */
1710    
1711    if (exclude_pattern != NULL)
1712      {
1713      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1714        pcretables);
1715      if (exclude_compiled == NULL)
1716        {
1717        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1718          errptr, error);
1719        return 2;
1720        }
1721      }
1722    
1723    if (include_pattern != NULL)
1724      {
1725      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1726        pcretables);
1727      if (include_compiled == NULL)
1728        {
1729        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1730          errptr, error);
1731        return 2;
1732        }
1733      }
1734    
1735    /* If there are no further arguments, do the business on stdin and exit. */
1736    
1737    if (i >= argc)
1738      return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1739    
1740    /* Otherwise, work through the remaining arguments as files or directories.
1741    Pass in the fact that there is only one argument at top level - this suppresses
1742    the file name if the argument is not a directory and filenames are not
1743    otherwise forced. */
1744    
1745    only_one_at_top = i == argc - 1;   /* Catch initial value of i */
1746    
1747    for (; i < argc; i++)
1748      {
1749      int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1750        only_one_at_top);
1751      if (frc > 1) rc = frc;
1752        else if (frc == 0 && rc == 1) rc = 0;
1753    }    }
1754    
1755  return rc;  return rc;
1756  }  }
1757    
1758  /* End */  /* End of pcregrep */

Legend:
Removed from v.49  
changed lines
  Added in v.91

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12