/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 67 by nigel, Sat Feb 24 21:40:13 2007 UTC revision 89 by nigel, Sat Feb 24 21:41:27 2007 UTC
# Line 4  Line 4 
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories. */  directories.
8    
9               Copyright (c) 1997-2006 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40  #include <ctype.h>  #include <ctype.h>
41    #include <locale.h>
42  #include <stdio.h>  #include <stdio.h>
43  #include <string.h>  #include <string.h>
44  #include <stdlib.h>  #include <stdlib.h>
45  #include <errno.h>  #include <errno.h>
46    
47    #include <sys/types.h>
48    #include <sys/stat.h>
49    #include <unistd.h>
50    
51  #include "config.h"  #include "config.h"
52  #include "pcre.h"  #include "pcre.h"
53    
# Line 19  directories. */ Line 56  directories. */
56    
57  typedef int BOOL;  typedef int BOOL;
58    
59  #define VERSION "3.0 14-Jan-2003"  #define VERSION "4.2 09-Jan-2006"
60  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
61    
62    #if BUFSIZ > 8192
63    #define MBUFTHIRD BUFSIZ
64    #else
65    #define MBUFTHIRD 8192
66    #endif
67    
68    
69    /* Values for the "filenames" variable, which specifies options for file name
70    output. The order is important; it is assumed that a file name is wanted for
71    all values greater than FN_DEFAULT. */
72    
73    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
74    
75    /* Actions for the -d and -D options */
76    
77    enum { dee_READ, dee_SKIP, dee_RECURSE };
78    enum { DEE_READ, DEE_SKIP };
79    
80    /* Actions for special processing options (flag bits) */
81    
82    #define PO_WORD_MATCH     0x0001
83    #define PO_LINE_MATCH     0x0002
84    #define PO_FIXED_STRINGS  0x0004
85    
86    
87    
88  /*************************************************  /*************************************************
89  *               Global variables                 *  *               Global variables                 *
90  *************************************************/  *************************************************/
91    
92    /* Jeffrey Friedl has some debugging requirements that are not part of the
93    regular code. */
94    
95    #ifdef JFRIEDL_DEBUG
96    static int S_arg = -1;
97    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
98    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
99    static const char *jfriedl_prefix = "";
100    static const char *jfriedl_postfix = "";
101    #endif
102    
103    static char *colour_string = (char *)"1;31";
104    static char *colour_option = NULL;
105    static char *dee_option = NULL;
106    static char *DEE_option = NULL;
107  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
108    static char *stdin_name = (char *)"(standard input)";
109    static char *locale = NULL;
110    
111    static const unsigned char *pcretables = NULL;
112    
113  static int  pattern_count = 0;  static int  pattern_count = 0;
114  static pcre **pattern_list;  static pcre **pattern_list;
115  static pcre_extra **hints_list;  static pcre_extra **hints_list;
116    
117    static char *include_pattern = NULL;
118    static char *exclude_pattern = NULL;
119    
120    static pcre *include_compiled = NULL;
121    static pcre *exclude_compiled = NULL;
122    
123    static int after_context = 0;
124    static int before_context = 0;
125    static int both_context = 0;
126    static int dee_action = dee_READ;
127    static int DEE_action = DEE_READ;
128    static int error_count = 0;
129    static int filenames = FN_DEFAULT;
130    static int process_options = 0;
131    
132  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
133  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
134  static BOOL filenames_only = FALSE;  static BOOL hyphenpending = FALSE;
135  static BOOL invert = FALSE;  static BOOL invert = FALSE;
136    static BOOL multiline = FALSE;
137  static BOOL number = FALSE;  static BOOL number = FALSE;
138  static BOOL recurse = FALSE;  static BOOL only_matching = FALSE;
139    static BOOL quiet = FALSE;
140  static BOOL silent = FALSE;  static BOOL silent = FALSE;
 static BOOL whole_lines = FALSE;  
141    
142  /* Structure for options and list of them */  /* Structure for options and list of them */
143    
144    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
145           OP_PATLIST };
146    
147  typedef struct option_item {  typedef struct option_item {
148      int type;
149    int one_char;    int one_char;
150      void *dataptr;
151    const char *long_name;    const char *long_name;
152    const char *help_text;    const char *help_text;
153  } option_item;  } option_item;
154    
155    /* Options without a single-letter equivalent get a negative value. This can be
156    used to identify them. */
157    
158    #define N_COLOUR    (-1)
159    #define N_EXCLUDE   (-2)
160    #define N_HELP      (-3)
161    #define N_INCLUDE   (-4)
162    #define N_LABEL     (-5)
163    #define N_LOCALE    (-6)
164    #define N_NULL      (-7)
165    
166  static option_item optionlist[] = {  static option_item optionlist[] = {
167    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
168    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
169    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
170    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
171    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
172    { 'n', "line-number",  "print line number with output lines" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
173    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
174    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
175    { 'u', "utf-8",        "use UTF-8 mode" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
176    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
177    { 'v', "invert-match", "select non-matching lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
178    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
179    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
180    { 0,    NULL,           NULL }    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
181      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
182      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
183      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
184      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
185      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
186      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
187      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
188      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
189      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
190      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
191      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
192      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
193      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
194    #ifdef JFRIEDL_DEBUG
195      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
196    #endif
197      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
198      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
199      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
200      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
201      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
202      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
203      { OP_NODATA,    0,        NULL,               NULL,            NULL }
204  };  };
205    
206    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
207    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
208    that the combination of -w and -x has the same effect as -x on its own, so we
209    can treat them as the same. */
210    
211    static const char *prefix[] = {
212      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
213    
214    static const char *suffix[] = {
215      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
216    
217    
218    
219  /*************************************************  /*************************************************
220  *       Functions for directory scanning         *  *            OS-specific functions               *
221  *************************************************/  *************************************************/
222    
223  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
224  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
225    
226    
227  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
# Line 120  closedir(dir); Line 268  closedir(dir);
268  }  }
269    
270    
271    /************* Test for regular file in Unix **********/
272    
273    static int
274    isregfile(char *filename)
275    {
276    struct stat statbuf;
277    if (stat(filename, &statbuf) < 0)
278      return 1;        /* In the expectation that opening as a file will fail */
279    return (statbuf.st_mode & S_IFMT) == S_IFREG;
280    }
281    
282    
283    /************* Test stdout for being a terminal in Unix **********/
284    
285    static BOOL
286    is_stdout_tty(void)
287    {
288    return isatty(fileno(stdout));
289    }
290    
291    
292  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
293    
294  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
295  Lionel Fourquaux. */  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
296    when it did not exist. */
297    
298    
299  #elif HAVE_WIN32API  #elif HAVE_WIN32API
# Line 134  Lionel Fourquaux. */ Line 304  Lionel Fourquaux. */
304  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
305  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
306  #endif  #endif
307    #ifndef INVALID_FILE_ATTRIBUTES
308    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
309    #endif
310    
311  #include <windows.h>  #include <windows.h>
312    
313  typedef struct directory_type  typedef struct directory_type
# Line 213  free(dir); Line 387  free(dir);
387  }  }
388    
389    
390    /************* Test for regular file in Win32 **********/
391    
392    /* I don't know how to do this, or if it can be done; assume all paths are
393    regular if they are not directories. */
394    
395    int isregfile(char *filename)
396    {
397    return !isdirectory(filename)
398    }
399    
400    
401    /************* Test stdout for being a terminal in Win32 **********/
402    
403    /* I don't know how to do this; assume never */
404    
405    static BOOL
406    is_stdout_tty(void)
407    {
408    FALSE;
409    }
410    
411    
412  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
413    
414  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 221  free(dir); Line 417  free(dir);
417    
418  typedef void directory_type;  typedef void directory_type;
419    
420  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
421  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) {}
422  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) {}
423  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
424    
425    
426    /************* Test for regular when we can't do it **********/
427    
428    /* Assume all files are regular. */
429    
430    int isregfile(char *filename) { return 1; }
431    
432    
433    /************* Test stdout for being a terminal when we can't do it **********/
434    
435    static BOOL
436    is_stdout_tty(void)
437    {
438    return FALSE;
439    }
440    
441    
442  #endif  #endif
443    
444    
# Line 253  return sys_errlist[n]; Line 466  return sys_errlist[n];
466    
467    
468  /*************************************************  /*************************************************
469  *              Grep an individual file           *  *       Print the previous "after" lines         *
470    *************************************************/
471    
472    /* This is called if we are about to lose said lines because of buffer filling,
473    and at the end of the file. The data in the line is written using fwrite() so
474    that a binary zero does not terminate it.
475    
476    Arguments:
477      lastmatchnumber   the number of the last matching line, plus one
478      lastmatchrestart  where we restarted after the last match
479      endptr            end of available data
480      printname         filename for printing
481    
482    Returns:            nothing
483    */
484    
485    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
486      char *endptr, char *printname)
487    {
488    if (after_context > 0 && lastmatchnumber > 0)
489      {
490      int count = 0;
491      while (lastmatchrestart < endptr && count++ < after_context)
492        {
493        char *pp = lastmatchrestart;
494        if (printname != NULL) fprintf(stdout, "%s-", printname);
495        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
496        while (*pp != '\n') pp++;
497        fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
498        lastmatchrestart = pp + 1;
499        }
500      hyphenpending = TRUE;
501      }
502    }
503    
504    
505    
506    /*************************************************
507    *            Grep an individual file             *
508  *************************************************/  *************************************************/
509    
510    /* This is called from grep_or_recurse() below. It uses a buffer that is three
511    times the value of MBUFTHIRD. The matching point is never allowed to stray into
512    the top third of the buffer, thus keeping more of the file available for
513    context printing or for multiline scanning. For large files, the pointer will
514    be in the middle third most of the time, so the bottom third is available for
515    "before" context printing.
516    
517    Arguments:
518      in           the fopened FILE stream
519      printname    the file name if it is to be printed for each match
520                   or NULL if the file name is not to be printed
521                   it cannot be NULL if filenames[_nomatch]_only is set
522    
523    Returns:       0 if there was at least one match
524                   1 otherwise (no matches)
525    */
526    
527  static int  static int
528  pcregrep(FILE *in, char *name)  pcregrep(FILE *in, char *printname)
529  {  {
530  int rc = 1;  int rc = 1;
531  int linenumber = 0;  int linenumber = 1;
532    int lastmatchnumber = 0;
533  int count = 0;  int count = 0;
534  int offsets[99];  int offsets[99];
535  char buffer[BUFSIZ];  char *lastmatchrestart = NULL;
536    char buffer[3*MBUFTHIRD];
537    char *ptr = buffer;
538    char *endptr;
539    size_t bufflength;
540    BOOL endhyphenpending = FALSE;
541    
542    /* Do the first read into the start of the buffer and set up the pointer to
543    end of what we have. */
544    
545    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
546    endptr = buffer + bufflength;
547    
548    /* Loop while the current pointer is not at the end of the file. For large
549    files, endptr will be at the end of the buffer when we are in the middle of the
550    file, but ptr will never get there, because as soon as it gets over 2/3 of the
551    way, the buffer is shifted left and re-filled. */
552    
553  while (fgets(buffer, sizeof(buffer), in) != NULL)  while (ptr < endptr)
554    {    {
   BOOL match = FALSE;  
555    int i;    int i;
556    int length = (int)strlen(buffer);    int mrc = 0;
557    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    BOOL match = FALSE;
558    linenumber++;    char *t = ptr;
559      size_t length, linelength;
560    
561      /* At this point, ptr is at the start of a line. We need to find the length
562      of the subject string to pass to pcre_exec(). In multiline mode, it is the
563      length remainder of the data in the buffer. Otherwise, it is the length of
564      the next line. After matching, we always advance by the length of the next
565      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
566      that any match is constrained to be in the first line. */
567    
568      linelength = 0;
569      while (t < endptr && *t++ != '\n') linelength++;
570      length = multiline? endptr - ptr : linelength;
571    
572    
573    for (i = 0; !match && i < pattern_count; i++)    /* Extra processing for Jeffrey Friedl's debugging. */
574    
575    #ifdef JFRIEDL_DEBUG
576      if (jfriedl_XT || jfriedl_XR)
577      {
578          #include <sys/time.h>
579          #include <time.h>
580          struct timeval start_time, end_time;
581          struct timezone dummy;
582    
583          if (jfriedl_XT)
584          {
585              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
586              const char *orig = ptr;
587              ptr = malloc(newlen + 1);
588              if (!ptr) {
589                      printf("out of memory");
590                      exit(2);
591              }
592              endptr = ptr;
593              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
594              for (i = 0; i < jfriedl_XT; i++) {
595                      strncpy(endptr, orig,  length);
596                      endptr += length;
597              }
598              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
599              length = newlen;
600          }
601    
602          if (gettimeofday(&start_time, &dummy) != 0)
603                  perror("bad gettimeofday");
604    
605    
606          for (i = 0; i < jfriedl_XR; i++)
607              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
608    
609          if (gettimeofday(&end_time, &dummy) != 0)
610                  perror("bad gettimeofday");
611    
612          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
613                          -
614                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
615    
616          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
617          return 0;
618      }
619    #endif
620    
621    
622      /* Run through all the patterns until one matches. Note that we don't include
623      the final newline in the subject string. */
624    
625      for (i = 0; i < pattern_count; i++)
626      {      {
627      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
628        offsets, 99) >= 0;        offsets, 99);
629      if (match && whole_lines && offsets[1] != length) match = FALSE;      if (mrc >= 0) { match = TRUE; break; }
630        if (mrc != PCRE_ERROR_NOMATCH)
631          {
632          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
633          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
634          fprintf(stderr, "this line:\n");
635          fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */
636          fprintf(stderr, "\n");
637          if (error_count == 0 &&
638              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
639            {
640            fprintf(stderr, "pcregrep: error %d means that a resource limit "
641              "was exceeded\n", mrc);
642            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
643            }
644          if (error_count++ > 20)
645            {
646            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
647            exit(2);
648            }
649          match = invert;    /* No more matching; don't show the line again */
650          break;
651          }
652      }      }
653    
654      /* If it's a match or a not-match (as required), do what's wanted. */
655    
656    if (match != invert)    if (match != invert)
657      {      {
658        BOOL hyphenprinted = FALSE;
659    
660        /* We've failed if we want a file that doesn't have any matches. */
661    
662        if (filenames == FN_NOMATCH_ONLY) return 1;
663    
664        /* Just count if just counting is wanted. */
665    
666      if (count_only) count++;      if (count_only) count++;
667    
668      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
669        in the file. */
670    
671        else if (filenames == FN_ONLY)
672        {        {
673        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        fprintf(stdout, "%s\n", printname);
674        return 0;        return 0;
675        }        }
676    
677      else if (silent) return 0;      /* Likewise, if all we want is a yes/no answer. */
678    
679        else if (quiet) return 0;
680    
681        /* The --only-matching option prints just the substring that matched, and
682        does not pring any context. */
683    
684        else if (only_matching)
685          {
686          if (printname != NULL) fprintf(stdout, "%s:", printname);
687          if (number) fprintf(stdout, "%d:", linenumber);
688          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
689          fprintf(stdout, "\n");
690          }
691    
692        /* This is the default case when none of the above options is set. We print
693        the matching lines(s), possibly preceded and/or followed by other lines of
694        context. */
695    
696      else      else
697        {        {
698        if (name != NULL) fprintf(stdout, "%s:", name);        /* See if there is a requirement to print some "after" lines from a
699          previous match. We never print any overlaps. */
700    
701          if (after_context > 0 && lastmatchnumber > 0)
702            {
703            int linecount = 0;
704            char *p = lastmatchrestart;
705    
706            while (p < ptr && linecount < after_context)
707              {
708              while (*p != '\n') p++;
709              p++;
710              linecount++;
711              }
712    
713            /* It is important to advance lastmatchrestart during this printing so
714            that it interacts correctly with any "before" printing below. Print
715            each line's data using fwrite() in case there are binary zeroes. */
716    
717            while (lastmatchrestart < p)
718              {
719              char *pp = lastmatchrestart;
720              if (printname != NULL) fprintf(stdout, "%s-", printname);
721              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
722              while (*pp != '\n') pp++;
723              fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);
724              lastmatchrestart = pp + 1;
725              }
726            if (lastmatchrestart != ptr) hyphenpending = TRUE;
727            }
728    
729          /* If there were non-contiguous lines printed above, insert hyphens. */
730    
731          if (hyphenpending)
732            {
733            fprintf(stdout, "--\n");
734            hyphenpending = FALSE;
735            hyphenprinted = TRUE;
736            }
737    
738          /* See if there is a requirement to print some "before" lines for this
739          match. Again, don't print overlaps. */
740    
741          if (before_context > 0)
742            {
743            int linecount = 0;
744            char *p = ptr;
745    
746            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
747                   linecount < before_context)
748              {
749              linecount++;
750              p--;
751              while (p > buffer && p[-1] != '\n') p--;
752              }
753    
754            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
755              fprintf(stdout, "--\n");
756    
757            while (p < ptr)
758              {
759              char *pp = p;
760              if (printname != NULL) fprintf(stdout, "%s-", printname);
761              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
762              while (*pp != '\n') pp++;
763              fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */
764              p = pp + 1;
765              }
766            }
767    
768          /* Now print the matching line(s); ensure we set hyphenpending at the end
769          of the file if any context lines are being output. */
770    
771          if (after_context > 0 || before_context > 0)
772            endhyphenpending = TRUE;
773    
774          if (printname != NULL) fprintf(stdout, "%s:", printname);
775        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
776        fprintf(stdout, "%s\n", buffer);  
777          /* In multiline mode, we want to print to the end of the line in which
778          the end of the matched string is found, so we adjust linelength and the
779          line number appropriately. Because the PCRE_FIRSTLINE option is set, the
780          start of the match will always be before the first \n character. */
781    
782          if (multiline)
783            {
784            char *endmatch = ptr + offsets[1];
785            t = ptr;
786            while (t < endmatch) { if (*t++ == '\n') linenumber++; }
787            while (endmatch < endptr && *endmatch != '\n') endmatch++;
788            linelength = endmatch - ptr;
789            }
790    
791          /*** NOTE: Use only fwrite() to output the data line, so that binary
792          zeroes are treated as just another data character. */
793    
794          /* This extra option, for Jeffrey Friedl's debugging requirements,
795          replaces the matched string, or a specific captured string if it exists,
796          with X. When this happens, colouring is ignored. */
797    
798    #ifdef JFRIEDL_DEBUG
799          if (S_arg >= 0 && S_arg < mrc)
800            {
801            int first = S_arg * 2;
802            int last  = first + 1;
803            fwrite(ptr, 1, offsets[first], stdout);
804            fprintf(stdout, "X");
805            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
806            }
807          else
808    #endif
809    
810          /* We have to split the line(s) up if colouring. */
811    
812          if (do_colour)
813            {
814            fwrite(ptr, 1, offsets[0], stdout);
815            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
816            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
817            fprintf(stdout, "%c[00m", 0x1b);
818            fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
819            }
820          else fwrite(ptr, 1, linelength, stdout);
821    
822          fprintf(stdout, "\n");
823          }
824    
825        /* End of doing what has to be done for a match */
826    
827        rc = 0;    /* Had some success */
828    
829        /* Remember where the last match happened for after_context. We remember
830        where we are about to restart, and that line's number. */
831    
832        lastmatchrestart = ptr + linelength + 1;
833        lastmatchnumber = linenumber + 1;
834        }
835    
836      /* Advance to after the newline and increment the line number. */
837    
838      ptr += linelength + 1;
839      linenumber++;
840    
841      /* If we haven't yet reached the end of the file (the buffer is full), and
842      the current point is in the top 1/3 of the buffer, slide the buffer down by
843      1/3 and refill it. Before we do this, if some unprinted "after" lines are
844      about to be lost, print them. */
845    
846      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
847        {
848        if (after_context > 0 &&
849            lastmatchnumber > 0 &&
850            lastmatchrestart < buffer + MBUFTHIRD)
851          {
852          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
853          lastmatchnumber = 0;
854        }        }
855    
856      rc = 0;      /* Now do the shuffle */
857    
858        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
859        ptr -= MBUFTHIRD;
860        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
861        endptr = buffer + bufflength;
862    
863        /* Adjust any last match point */
864    
865        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
866      }      }
867      }     /* Loop through the whole file */
868    
869    /* End of file; print final "after" lines if wanted; do_after_lines sets
870    hyphenpending if it prints something. */
871    
872    if (!only_matching && !count_only)
873      {
874      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
875      hyphenpending |= endhyphenpending;
876    }    }
877    
878    /* Print the file name if we are looking for those without matches and there
879    were none. If we found a match, we won't have got this far. */
880    
881    if (filenames == FN_NOMATCH_ONLY)
882      {
883      fprintf(stdout, "%s\n", printname);
884      return 0;
885      }
886    
887    /* Print the match count if wanted */
888    
889  if (count_only)  if (count_only)
890    {    {
891    if (name != NULL) fprintf(stdout, "%s:", name);    if (printname != NULL) fprintf(stdout, "%s:", printname);
892    fprintf(stdout, "%d\n", count);    fprintf(stdout, "%d\n", count);
893    }    }
894    
# Line 314  return rc; Line 897  return rc;
897    
898    
899    
   
900  /*************************************************  /*************************************************
901  *     Grep a file or recurse into a directory    *  *     Grep a file or recurse into a directory    *
902  *************************************************/  *************************************************/
903    
904    /* Given a path name, if it's a directory, scan all the files if we are
905    recursing; if it's a file, grep it.
906    
907    Arguments:
908      pathname          the path to investigate
909      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
910      only_one_at_top   TRUE if the path is the only one at toplevel
911    
912    Returns:   0 if there was at least one match
913               1 if there were no matches
914               2 there was some kind of error
915    
916    However, file opening failures are suppressed if "silent" is set.
917    */
918    
919  static int  static int
920  grep_or_recurse(char *filename, BOOL dir_recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   BOOL only_one_at_top)  
921  {  {
922  int rc = 1;  int rc = 1;
923  int sep;  int sep;
924  FILE *in;  FILE *in;
925    
926  /* If the file is a directory and we are recursing, scan each file within it.  /* If the file name is "-" we scan stdin */
 The scanning code is localized so it can be made system-specific. */  
927    
928  if ((sep = isdirectory(filename)) != 0 && dir_recurse)  if (strcmp(pathname, "-") == 0)
929    {    {
930    char buffer[1024];    return pcregrep(stdin,
931    char *nextfile;      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
932    directory_type *dir = opendirectory(filename);        stdin_name : NULL);
933      }
934    
   if (dir == NULL)  
     {  
     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  
       strerror(errno));  
     return 2;  
     }  
935    
936    while ((nextfile = readdirectory(dir)) != NULL)  /* If the file is a directory, skip if skipping or if we are recursing, scan
937    each file within it, subject to any include or exclude patterns that were set.
938    The scanning code is localized so it can be made system-specific. */
939    
940    if ((sep = isdirectory(pathname)) != 0)
941      {
942      if (dee_action == dee_SKIP) return 1;
943      if (dee_action == dee_RECURSE)
944      {      {
945      int frc;      char buffer[1024];
946      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      char *nextfile;
947      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);      directory_type *dir = opendirectory(pathname);
     if (frc == 0 && rc == 1) rc = 0;  
     }  
948    
949    closedirectory(dir);      if (dir == NULL)
950    return rc;        {
951          if (!silent)
952            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
953              strerror(errno));
954          return 2;
955          }
956    
957        while ((nextfile = readdirectory(dir)) != NULL)
958          {
959          int frc, blen;
960          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
961          blen = strlen(buffer);
962    
963          if (exclude_compiled != NULL &&
964              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
965            continue;
966    
967          if (include_compiled != NULL &&
968              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
969            continue;
970    
971          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
972          if (frc > 1) rc = frc;
973           else if (frc == 0 && rc == 1) rc = 0;
974          }
975    
976        closedirectory(dir);
977        return rc;
978        }
979    }    }
980    
981  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* If the file is not a directory and not a regular file, skip it if that's
982  the first and only argument at top level, we don't show the file name (unless  been requested. */
 we are only showing the file name). Otherwise, control is via the  
 show_filenames variable. */  
983    
984  in = fopen(filename, "r");  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
985    
986    /* Control reaches here if we have a regular file, or if we have a directory
987    and recursion or skipping was not requested, or if we have anything else and
988    skipping was not requested. The scan proceeds. If this is the first and only
989    argument at top level, we don't show the file name, unless we are only showing
990    the file name, or the filename was forced (-H). */
991    
992    in = fopen(pathname, "r");
993  if (in == NULL)  if (in == NULL)
994    {    {
995    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));    if (!silent)
996        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
997          strerror(errno));
998    return 2;    return 2;
999    }    }
1000    
1001  rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?  rc = pcregrep(in, (filenames > FN_DEFAULT ||
1002    filename : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1003    
1004  fclose(in);  fclose(in);
1005  return rc;  return rc;
1006  }  }
# Line 383  return rc; Line 1015  return rc;
1015  static int  static int
1016  usage(int rc)  usage(int rc)
1017  {  {
1018  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");  option_item *op;
1019    fprintf(stderr, "Usage: pcregrep [-");
1020    for (op = optionlist; op->one_char != 0; op++)
1021      {
1022      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1023      }
1024    fprintf(stderr, "] [long options] [pattern] [files]\n");
1025  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1026  return rc;  return rc;
1027  }  }
# Line 402  option_item *op; Line 1040  option_item *op;
1040    
1041  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1042  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1043  printf("PATTERN must be present if -f is not used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1044    printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1045  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1046    
1047  printf("Options:\n");  printf("Options:\n");
# Line 418  for (op = optionlist; op->one_char != 0; Line 1057  for (op = optionlist; op->one_char != 0;
1057    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1058    }    }
1059    
1060  printf("\n  -f<filename>  or  --file=<filename>\n");  printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1061  printf("    Read patterns from <filename> instead of using a command line option.\n");  printf("trailing white space is removed and blank lines are ignored.\n");
1062  printf("    Trailing white space is removed; blanks lines are ignored.\n");  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
1063    
1064  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1065  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1066  }  }
1067    
# Line 431  printf("Exit status is 0 if any matches, Line 1069  printf("Exit status is 0 if any matches,
1069    
1070    
1071  /*************************************************  /*************************************************
1072  *                Handle an option                *  *    Handle a single-letter, no data option      *
1073  *************************************************/  *************************************************/
1074    
1075  static int  static int
# Line 439  handle_option(int letter, int options) Line 1077  handle_option(int letter, int options)
1077  {  {
1078  switch(letter)  switch(letter)
1079    {    {
1080    case -1:  help(); exit(0);    case N_HELP: help(); exit(0);
1081    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1082    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1083      case 'H': filenames = FN_FORCE; break;
1084      case 'h': filenames = FN_NONE; break;
1085    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1086    case 'l': filenames_only = TRUE;    case 'l': filenames = FN_ONLY; break;
1087      case 'L': filenames = FN_NOMATCH_ONLY; break;
1088      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1089    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1090    case 'r': recurse = TRUE; break;    case 'o': only_matching = TRUE; break;
1091      case 'q': quiet = TRUE; break;
1092      case 'r': dee_action = dee_RECURSE; break;
1093    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1094    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; break;
1095    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1096    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1097      case 'x': process_options |= PO_LINE_MATCH; break;
1098    
1099    case 'V':    case 'V':
1100    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s using ", VERSION);
# Line 469  return options; Line 1114  return options;
1114    
1115    
1116  /*************************************************  /*************************************************
1117    *          Construct printed ordinal             *
1118    *************************************************/
1119    
1120    /* This turns a number into "1st", "3rd", etc. */
1121    
1122    static char *
1123    ordin(int n)
1124    {
1125    static char buffer[8];
1126    char *p = buffer;
1127    sprintf(p, "%d", n);
1128    while (*p != 0) p++;
1129    switch (n%10)
1130      {
1131      case 1: strcpy(p, "st"); break;
1132      case 2: strcpy(p, "nd"); break;
1133      case 3: strcpy(p, "rd"); break;
1134      default: strcpy(p, "th"); break;
1135      }
1136    return buffer;
1137    }
1138    
1139    
1140    
1141    /*************************************************
1142    *          Compile a single pattern              *
1143    *************************************************/
1144    
1145    /* When the -F option has been used, this is called for each substring.
1146    Otherwise it's called for each supplied pattern.
1147    
1148    Arguments:
1149      pattern        the pattern string
1150      options        the PCRE options
1151      filename       the file name, or NULL for a command-line pattern
1152      count          0 if this is the only command line pattern, or
1153                     number of the command line pattern, or
1154                     linenumber for a pattern from a file
1155    
1156    Returns:         TRUE on success, FALSE after an error
1157    */
1158    
1159    static BOOL
1160    compile_single_pattern(char *pattern, int options, char *filename, int count)
1161    {
1162    char buffer[MBUFTHIRD + 16];
1163    const char *error;
1164    int errptr;
1165    
1166    if (pattern_count >= MAX_PATTERN_COUNT)
1167      {
1168      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1169        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1170      return FALSE;
1171      }
1172    
1173    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1174      suffix[process_options]);
1175    pattern_list[pattern_count] =
1176      pcre_compile(buffer, options, &error, &errptr, pcretables);
1177    if (pattern_list[pattern_count++] != NULL) return TRUE;
1178    
1179    /* Handle compile errors */
1180    
1181    errptr -= (int)strlen(prefix[process_options]);
1182    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1183    
1184    if (filename == NULL)
1185      {
1186      if (count == 0)
1187        fprintf(stderr, "pcregrep: Error in command-line regex "
1188          "at offset %d: %s\n", errptr, error);
1189      else
1190        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1191          "at offset %d: %s\n", ordin(count), errptr, error);
1192      }
1193    else
1194      {
1195      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1196        "at offset %d: %s\n", count, filename, errptr, error);
1197      }
1198    
1199    return FALSE;
1200    }
1201    
1202    
1203    
1204    /*************************************************
1205    *           Compile one supplied pattern         *
1206    *************************************************/
1207    
1208    /* When the -F option has been used, each string may be a list of strings,
1209    separated by newlines. They will be matched literally.
1210    
1211    Arguments:
1212      pattern        the pattern string
1213      options        the PCRE options
1214      filename       the file name, or NULL for a command-line pattern
1215      count          0 if this is the only command line pattern, or
1216                     number of the command line pattern, or
1217                     linenumber for a pattern from a file
1218    
1219    Returns:         TRUE on success, FALSE after an error
1220    */
1221    
1222    static BOOL
1223    compile_pattern(char *pattern, int options, char *filename, int count)
1224    {
1225    if ((process_options & PO_FIXED_STRINGS) != 0)
1226      {
1227      char buffer[MBUFTHIRD];
1228      for(;;)
1229        {
1230        char *p = strchr(pattern, '\n');
1231        if (p == NULL)
1232          return compile_single_pattern(pattern, options, filename, count);
1233        sprintf(buffer, "%.*s", p - pattern, pattern);
1234        pattern = p + 1;
1235        if (!compile_single_pattern(buffer, options, filename, count))
1236          return FALSE;
1237        }
1238      }
1239    else return compile_single_pattern(pattern, options, filename, count);
1240    }
1241    
1242    
1243    
1244    /*************************************************
1245  *                Main program                    *  *                Main program                    *
1246  *************************************************/  *************************************************/
1247    
1248    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1249    
1250  int  int
1251  main(int argc, char **argv)  main(int argc, char **argv)
1252  {  {
1253  int i, j;  int i, j;
1254  int rc = 1;  int rc = 1;
1255  int options = 0;  int pcre_options = 0;
1256    int cmd_pattern_count = 0;
1257  int errptr;  int errptr;
 const char *error;  
1258  BOOL only_one_at_top;  BOOL only_one_at_top;
1259    char *patterns[MAX_PATTERN_COUNT];
1260    const char *locale_from = "--locale";
1261    const char *error;
1262    
1263  /* Process the options */  /* Process the options */
1264    
1265  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
1266    {    {
1267      option_item *op = NULL;
1268      char *option_data = (char *)"";    /* default to keep compiler happy */
1269      BOOL longop;
1270      BOOL longopwasequals = FALSE;
1271    
1272    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1273    
1274    /* Missing options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1275      but only if we have previously had -e or -f to define the patterns. */
1276    
1277    if (argv[i][1] == 0) exit(usage(2));    if (argv[i][1] == 0)
1278        {
1279        if (pattern_filename != NULL || pattern_count > 0) break;
1280          else exit(usage(2));
1281        }
1282    
1283    /* Long name options */    /* Handle a long name option, or -- to terminate the options */
1284    
1285    if (argv[i][1] == '-')    if (argv[i][1] == '-')
1286      {      {
1287      option_item *op;      char *arg = argv[i] + 2;
1288        char *argequals = strchr(arg, '=');
1289    
1290      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
1291        {        {
1292        pattern_filename = argv[i] + 7;        i++;
1293        continue;        break;                /* out of the options-handling loop */
1294        }        }
1295    
1296        longop = TRUE;
1297    
1298        /* Some long options have data that follows after =, for example file=name.
1299        Some options have variations in the long name spelling: specifically, we
1300        allow "regexp" because GNU grep allows it, though I personally go along
1301        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1302        These options are entered in the table as "regex(p)". No option is in both
1303        these categories, fortunately. */
1304    
1305      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1306        {        {
1307        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
1308          char *equals = strchr(op->long_name, '=');
1309          if (opbra == NULL)     /* Not a (p) case */
1310          {          {
1311          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
1312          break;            {
1313              if (strcmp(arg, op->long_name) == 0) break;
1314              }
1315            else                 /* Special case xxx=data */
1316              {
1317              int oplen = equals - op->long_name;
1318              int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1319              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1320                {
1321                option_data = arg + arglen;
1322                if (*option_data == '=')
1323                  {
1324                  option_data++;
1325                  longopwasequals = TRUE;
1326                  }
1327                break;
1328                }
1329              }
1330            }
1331          else                   /* Special case xxxx(p) */
1332            {
1333            char buff1[24];
1334            char buff2[24];
1335            int baselen = opbra - op->long_name;
1336            sprintf(buff1, "%.*s", baselen, op->long_name);
1337            sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1338              opbra + 1);
1339            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1340              break;
1341          }          }
1342        }        }
1343    
1344      if (op->one_char == 0)      if (op->one_char == 0)
1345        {        {
1346        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 519  for (i = 1; i < argc; i++) Line 1348  for (i = 1; i < argc; i++)
1348        }        }
1349      }      }
1350    
1351    /* One-char options */  
1352      /* Jeffrey Friedl's debugging harness uses these additional options which
1353      are not in the right form for putting in the option table because they use
1354      only one hyphen, yet are more than one character long. By putting them
1355      separately here, they will not get displayed as part of the help() output,
1356      but I don't think Jeffrey will care about that. */
1357    
1358    #ifdef JFRIEDL_DEBUG
1359      else if (strcmp(argv[i], "-pre") == 0) {
1360              jfriedl_prefix = argv[++i];
1361              continue;
1362      } else if (strcmp(argv[i], "-post") == 0) {
1363              jfriedl_postfix = argv[++i];
1364              continue;
1365      } else if (strcmp(argv[i], "-XT") == 0) {
1366              sscanf(argv[++i], "%d", &jfriedl_XT);
1367              continue;
1368      } else if (strcmp(argv[i], "-XR") == 0) {
1369              sscanf(argv[++i], "%d", &jfriedl_XR);
1370              continue;
1371      }
1372    #endif
1373    
1374    
1375      /* One-char options; many that have no data may be in a single argument; we
1376      continue till we hit the last one or one that needs data. */
1377    
1378    else    else
1379      {      {
1380      char *s = argv[i] + 1;      char *s = argv[i] + 1;
1381        longop = FALSE;
1382      while (*s != 0)      while (*s != 0)
1383        {        {
1384        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
1385            { if (*s == op->one_char) break; }
1386          if (op->one_char == 0)
1387          {          {
1388          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1389          if (pattern_filename[0] == 0)            *s, argv[i]);
1390            {          exit(usage(2));
1391            if (i >= argc - 1)          }
1392              {        if (op->type != OP_NODATA || s[1] == 0)
1393              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
1394              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
1395          break;          break;
1396          }          }
1397        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1398        }        }
1399      }      }
1400    
1401      /* At this point we should have op pointing to a matched option. If the type
1402      is NO_DATA, it means that there is no data, and the option might set
1403      something in the PCRE options. */
1404    
1405      if (op->type == OP_NODATA)
1406        {
1407        pcre_options = handle_option(op->one_char, pcre_options);
1408        continue;
1409        }
1410    
1411      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1412      either has a value or defaults to something. It cannot have data in a
1413      separate item. At the moment, the only such options are "colo(u)r" and
1414      Jeffrey Friedl's special -S debugging option. */
1415    
1416      if (*option_data == 0 &&
1417          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1418        {
1419        switch (op->one_char)
1420          {
1421          case N_COLOUR:
1422          colour_option = (char *)"auto";
1423          break;
1424    #ifdef JFRIEDL_DEBUG
1425          case 'S':
1426          S_arg = 0;
1427          break;
1428    #endif
1429          }
1430        continue;
1431        }
1432    
1433      /* Otherwise, find the data string for the option. */
1434    
1435      if (*option_data == 0)
1436        {
1437        if (i >= argc - 1 || longopwasequals)
1438          {
1439          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1440          exit(usage(2));
1441          }
1442        option_data = argv[++i];
1443        }
1444    
1445      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1446      multiple times to create a list of patterns. */
1447    
1448      if (op->type == OP_PATLIST)
1449        {
1450        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1451          {
1452          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1453            MAX_PATTERN_COUNT);
1454          return 2;
1455          }
1456        patterns[cmd_pattern_count++] = option_data;
1457        }
1458    
1459      /* Otherwise, deal with single string or numeric data values. */
1460    
1461      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1462        {
1463        *((char **)op->dataptr) = option_data;
1464        }
1465      else
1466        {
1467        char *endptr;
1468        int n = strtoul(option_data, &endptr, 10);
1469        if (*endptr != 0)
1470          {
1471          if (longop)
1472            {
1473            char *equals = strchr(op->long_name, '=');
1474            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1475              equals - op->long_name;
1476            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1477              option_data, nlen, op->long_name);
1478            }
1479          else
1480            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1481              option_data, op->one_char);
1482          exit(usage(2));
1483          }
1484        *((int *)op->dataptr) = n;
1485        }
1486      }
1487    
1488    /* Options have been decoded. If -C was used, its value is used as a default
1489    for -A and -B. */
1490    
1491    if (both_context > 0)
1492      {
1493      if (after_context == 0) after_context = both_context;
1494      if (before_context == 0) before_context = both_context;
1495      }
1496    
1497    /* If a locale has not been provided as an option, see if the LC_CTYPE or
1498    LC_ALL environment variable is set, and if so, use it. */
1499    
1500    if (locale == NULL)
1501      {
1502      locale = getenv("LC_ALL");
1503      locale_from = "LCC_ALL";
1504      }
1505    
1506    if (locale == NULL)
1507      {
1508      locale = getenv("LC_CTYPE");
1509      locale_from = "LC_CTYPE";
1510      }
1511    
1512    /* If a locale has been provided, set it, and generate the tables the PCRE
1513    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1514    
1515    if (locale != NULL)
1516      {
1517      if (setlocale(LC_CTYPE, locale) == NULL)
1518        {
1519        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1520          locale, locale_from);
1521        return 2;
1522        }
1523      pcretables = pcre_maketables();
1524      }
1525    
1526    /* Sort out colouring */
1527    
1528    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1529      {
1530      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1531      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1532      else
1533        {
1534        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1535          colour_option);
1536        return 2;
1537        }
1538      if (do_colour)
1539        {
1540        char *cs = getenv("PCREGREP_COLOUR");
1541        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1542        if (cs != NULL) colour_string = cs;
1543        }
1544      }
1545    
1546    /* Interpret the text values for -d and -D */
1547    
1548    if (dee_option != NULL)
1549      {
1550      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1551      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1552      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1553      else
1554        {
1555        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1556        return 2;
1557        }
1558      }
1559    
1560    if (DEE_option != NULL)
1561      {
1562      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1563      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1564      else
1565        {
1566        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1567        return 2;
1568        }
1569      }
1570    
1571    /* Check the values for Jeffrey Friedl's debugging options. */
1572    
1573    #ifdef JFRIEDL_DEBUG
1574    if (S_arg > 9)
1575      {
1576      fprintf(stderr, "pcregrep: bad value for -S option\n");
1577      return 2;
1578      }
1579    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1580      {
1581      if (jfriedl_XT == 0) jfriedl_XT = 1;
1582      if (jfriedl_XR == 0) jfriedl_XR = 1;
1583    }    }
1584    #endif
1585    
1586  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  /* Get memory to store the pattern and hints lists. */
1587  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  
1588    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1589    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1590    
1591  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
1592    {    {
# Line 554  if (pattern_list == NULL || hints_list = Line 1594  if (pattern_list == NULL || hints_list =
1594    return 2;    return 2;
1595    }    }
1596    
1597  /* Compile the regular expression(s). */  /* If no patterns were provided by -e, and there is no file provided by -f,
1598    the first argument is the one and only pattern, and it must exist. */
1599    
1600    if (cmd_pattern_count == 0 && pattern_filename == NULL)
1601      {
1602      if (i >= argc) return usage(2);
1603      patterns[cmd_pattern_count++] = argv[i++];
1604      }
1605    
1606    /* Compile the patterns that were provided on the command line, either by
1607    multiple uses of -e or as a single unkeyed pattern. */
1608    
1609    for (j = 0; j < cmd_pattern_count; j++)
1610      {
1611      if (!compile_pattern(patterns[j], pcre_options, NULL,
1612           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1613        return 2;
1614      }
1615    
1616    /* Compile the regular expressions that are provided in a file. */
1617    
1618  if (pattern_filename != NULL)  if (pattern_filename != NULL)
1619    {    {
1620    FILE *f = fopen(pattern_filename, "r");    int linenumber = 0;
1621    char buffer[BUFSIZ];    FILE *f;
1622    if (f == NULL)    char *filename;
1623      char buffer[MBUFTHIRD];
1624    
1625      if (strcmp(pattern_filename, "-") == 0)
1626      {      {
1627      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      f = stdin;
1628        strerror(errno));      filename = stdin_name;
     return 2;  
1629      }      }
1630    while (fgets(buffer, sizeof(buffer), f) != NULL)    else
1631      {      {
1632      char *s = buffer + (int)strlen(buffer);      f = fopen(pattern_filename, "r");
1633      if (pattern_count >= MAX_PATTERN_COUNT)      if (f == NULL)
1634        {        {
1635        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1636          MAX_PATTERN_COUNT);          strerror(errno));
1637        return 2;        return 2;
1638        }        }
1639        filename = pattern_filename;
1640        }
1641    
1642      while (fgets(buffer, MBUFTHIRD, f) != NULL)
1643        {
1644        char *s = buffer + (int)strlen(buffer);
1645      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
     if (s == buffer) continue;  
1646      *s = 0;      *s = 0;
1647      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,      linenumber++;
1648        &errptr, NULL);      if (buffer[0] == 0) continue;   /* Skip blank lines */
1649      if (pattern_list[pattern_count++] == NULL)      if (!compile_pattern(buffer, pcre_options, filename, linenumber))
       {  
       fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",  
         pattern_count, errptr, error);  
1650        return 2;        return 2;
       }  
1651      }      }
   fclose(f);  
   }  
1652    
1653  /* If no file name, a single regex must be given inline */    if (f != stdin) fclose(f);
   
 else  
   {  
   if (i >= argc) return usage(2);  
   pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);  
   if (pattern_list[0] == NULL)  
     {  
     fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,  
       error);  
     return 2;  
     }  
   pattern_count++;  
1654    }    }
1655    
1656  /* Study the regular expressions, as we will be running them may times */  /* Study the regular expressions, as we will be running them many times */
1657    
1658  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
1659    {    {
# Line 619  for (j = 0; j < pattern_count; j++) Line 1667  for (j = 0; j < pattern_count; j++)
1667      }      }
1668    }    }
1669    
1670  /* If there are no further arguments, do the business on stdin and exit */  /* If there are include or exclude patterns, compile them. */
1671    
1672    if (exclude_pattern != NULL)
1673      {
1674      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1675        pcretables);
1676      if (exclude_compiled == NULL)
1677        {
1678        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1679          errptr, error);
1680        return 2;
1681        }
1682      }
1683    
1684    if (include_pattern != NULL)
1685      {
1686      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1687        pcretables);
1688      if (include_compiled == NULL)
1689        {
1690        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1691          errptr, error);
1692        return 2;
1693        }
1694      }
1695    
1696    /* If there are no further arguments, do the business on stdin and exit. */
1697    
1698  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
1699      return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1700    
1701  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
1702  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
1703  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
1704    otherwise forced. */
1705    
1706  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
1707    
1708  for (; i < argc; i++)  for (; i < argc; i++)
1709    {    {
1710    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1711    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
1712      if (frc > 1) rc = frc;
1713        else if (frc == 0 && rc == 1) rc = 0;
1714    }    }
1715    
1716  return rc;  return rc;
1717  }  }
1718    
1719  /* End */  /* End of pcregrep */

Legend:
Removed from v.67  
changed lines
  Added in v.89

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12