/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 75 by nigel, Sat Feb 24 21:40:37 2007 UTC revision 150 by ph10, Tue Apr 17 08:22:40 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2004 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifdef HAVE_CONFIG_H
41    #  include <config.h>
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51  #include "pcre.h"  #include <sys/types.h>
52    #include <sys/stat.h>
53    #ifdef HAVE_UNISTD_H
54    #  include <unistd.h>
55    #endif
56    
57    #include <pcre.h>
58    
59  #define FALSE 0  #define FALSE 0
60  #define TRUE 1  #define TRUE 1
61    
62  typedef int BOOL;  typedef int BOOL;
63    
 #define VERSION "3.0 14-Jan-2003"  
64  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
65    
66    #if BUFSIZ > 8192
67    #define MBUFTHIRD BUFSIZ
68    #else
69    #define MBUFTHIRD 8192
70    #endif
71    
72    /* Values for the "filenames" variable, which specifies options for file name
73    output. The order is important; it is assumed that a file name is wanted for
74    all values greater than FN_DEFAULT. */
75    
76    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
77    
78    /* Actions for the -d and -D options */
79    
80    enum { dee_READ, dee_SKIP, dee_RECURSE };
81    enum { DEE_READ, DEE_SKIP };
82    
83    /* Actions for special processing options (flag bits) */
84    
85    #define PO_WORD_MATCH     0x0001
86    #define PO_LINE_MATCH     0x0002
87    #define PO_FIXED_STRINGS  0x0004
88    
89    /* Line ending types */
90    
91    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
92    
93    
94    
95  /*************************************************  /*************************************************
96  *               Global variables                 *  *               Global variables                 *
97  *************************************************/  *************************************************/
98    
99    /* Jeffrey Friedl has some debugging requirements that are not part of the
100    regular code. */
101    
102    #ifdef JFRIEDL_DEBUG
103    static int S_arg = -1;
104    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
105    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
106    static const char *jfriedl_prefix = "";
107    static const char *jfriedl_postfix = "";
108    #endif
109    
110    static int  endlinetype;
111    
112    static char *colour_string = (char *)"1;31";
113    static char *colour_option = NULL;
114    static char *dee_option = NULL;
115    static char *DEE_option = NULL;
116    static char *newline = NULL;
117  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
118    static char *stdin_name = (char *)"(standard input)";
119    static char *locale = NULL;
120    
121    static const unsigned char *pcretables = NULL;
122    
123  static int  pattern_count = 0;  static int  pattern_count = 0;
124  static pcre **pattern_list;  static pcre **pattern_list = NULL;
125  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
126    
127    static char *include_pattern = NULL;
128    static char *exclude_pattern = NULL;
129    
130    static pcre *include_compiled = NULL;
131    static pcre *exclude_compiled = NULL;
132    
133    static int after_context = 0;
134    static int before_context = 0;
135    static int both_context = 0;
136    static int dee_action = dee_READ;
137    static int DEE_action = DEE_READ;
138    static int error_count = 0;
139    static int filenames = FN_DEFAULT;
140    static int process_options = 0;
141    
142  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
143  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
144  static BOOL filenames_only = FALSE;  static BOOL hyphenpending = FALSE;
145  static BOOL invert = FALSE;  static BOOL invert = FALSE;
146    static BOOL multiline = FALSE;
147  static BOOL number = FALSE;  static BOOL number = FALSE;
148  static BOOL recurse = FALSE;  static BOOL only_matching = FALSE;
149    static BOOL quiet = FALSE;
150  static BOOL silent = FALSE;  static BOOL silent = FALSE;
151  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
152    
153  /* Structure for options and list of them */  /* Structure for options and list of them */
154    
155    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
156           OP_PATLIST };
157    
158  typedef struct option_item {  typedef struct option_item {
159      int type;
160    int one_char;    int one_char;
161      void *dataptr;
162    const char *long_name;    const char *long_name;
163    const char *help_text;    const char *help_text;
164  } option_item;  } option_item;
165    
166    /* Options without a single-letter equivalent get a negative value. This can be
167    used to identify them. */
168    
169    #define N_COLOUR    (-1)
170    #define N_EXCLUDE   (-2)
171    #define N_HELP      (-3)
172    #define N_INCLUDE   (-4)
173    #define N_LABEL     (-5)
174    #define N_LOCALE    (-6)
175    #define N_NULL      (-7)
176    
177  static option_item optionlist[] = {  static option_item optionlist[] = {
178    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
179    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
180    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
181    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
182    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
183    { 'n', "line-number",  "print line number with output lines" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
184    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
185    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
186    { 'u', "utf-8",        "use UTF-8 mode" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
187    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
188    { 'v', "invert-match", "select non-matching lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
189    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
190    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
191    { 0,    NULL,           NULL }    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
192      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
193      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
194      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
195      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
196      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
197      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
198      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
199      { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
200      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
201      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
202      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
203      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
204      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
205      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
206    #ifdef JFRIEDL_DEBUG
207      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
208    #endif
209      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
210      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
211      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
212      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
213      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
214      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
215      { OP_NODATA,    0,        NULL,               NULL,            NULL }
216  };  };
217    
218    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
219    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
220    that the combination of -w and -x has the same effect as -x on its own, so we
221    can treat them as the same. */
222    
223    static const char *prefix[] = {
224      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
225    
226    static const char *suffix[] = {
227      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
228    
229    /* UTF-8 tables - used only when the newline setting is "any". */
230    
231    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
232    
233    const char utf8_table4[] = {
234      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
235      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
237      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
238    
239    
240    
241  /*************************************************  /*************************************************
242  *       Functions for directory scanning         *  *            OS-specific functions               *
243  *************************************************/  *************************************************/
244    
245  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
246  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
247    
248    
249  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
250    
251  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
252  #include <sys/types.h>  #include <sys/types.h>
253  #include <sys/stat.h>  #include <sys/stat.h>
254  #include <dirent.h>  #include <dirent.h>
# Line 151  closedir(dir); Line 290  closedir(dir);
290  }  }
291    
292    
293    /************* Test for regular file in Unix **********/
294    
295    static int
296    isregfile(char *filename)
297    {
298    struct stat statbuf;
299    if (stat(filename, &statbuf) < 0)
300      return 1;        /* In the expectation that opening as a file will fail */
301    return (statbuf.st_mode & S_IFMT) == S_IFREG;
302    }
303    
304    
305    /************* Test stdout for being a terminal in Unix **********/
306    
307    static BOOL
308    is_stdout_tty(void)
309    {
310    return isatty(fileno(stdout));
311    }
312    
313    
314  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
315    
316  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
317  Lionel Fourquaux. */  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
318    when it did not exist. */
319    
320    
321  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
322    
323  #ifndef STRICT  #ifndef STRICT
324  # define STRICT  # define STRICT
# Line 165  Lionel Fourquaux. */ Line 326  Lionel Fourquaux. */
326  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
327  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
328  #endif  #endif
329    #ifndef INVALID_FILE_ATTRIBUTES
330    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
331    #endif
332    
333  #include <windows.h>  #include <windows.h>
334    
335  typedef struct directory_type  typedef struct directory_type
# Line 244  free(dir); Line 409  free(dir);
409  }  }
410    
411    
412    /************* Test for regular file in Win32 **********/
413    
414    /* I don't know how to do this, or if it can be done; assume all paths are
415    regular if they are not directories. */
416    
417    int isregfile(char *filename)
418    {
419    return !isdirectory(filename)
420    }
421    
422    
423    /************* Test stdout for being a terminal in Win32 **********/
424    
425    /* I don't know how to do this; assume never */
426    
427    static BOOL
428    is_stdout_tty(void)
429    {
430    FALSE;
431    }
432    
433    
434  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
435    
436  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 252  free(dir); Line 439  free(dir);
439    
440  typedef void directory_type;  typedef void directory_type;
441    
442  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
443  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
444  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
445  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
446    
447    
448    /************* Test for regular when we can't do it **********/
449    
450    /* Assume all files are regular. */
451    
452    int isregfile(char *filename) { return 1; }
453    
454    
455    /************* Test stdout for being a terminal when we can't do it **********/
456    
457    static BOOL
458    is_stdout_tty(void)
459    {
460    return FALSE;
461    }
462    
463    
464  #endif  #endif
465    
466    
467    
468  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
469  /*************************************************  /*************************************************
470  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
471  *************************************************/  *************************************************/
# Line 284  return sys_errlist[n]; Line 488  return sys_errlist[n];
488    
489    
490  /*************************************************  /*************************************************
491  *              Grep an individual file           *  *             Find end of line                   *
492  *************************************************/  *************************************************/
493    
494  static int  /* The length of the endline sequence that is found is set via lenptr. This may
495  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
 {  
 int rc = 1;  
 int linenumber = 0;  
 int count = 0;  
 int offsets[99];  
 char buffer[BUFSIZ];  
496    
497  while (fgets(buffer, sizeof(buffer), in) != NULL)  Arguments:
498      p         current position in line
499      endptr    end of available data
500      lenptr    where to put the length of the eol sequence
501    
502    Returns:    pointer to the last byte of the line
503    */
504    
505    static char *
506    end_of_line(char *p, char *endptr, int *lenptr)
507    {
508    switch(endlinetype)
509    {    {
510    BOOL match = FALSE;    default:      /* Just in case */
511    int i;    case EL_LF:
512    int length = (int)strlen(buffer);    while (p < endptr && *p != '\n') p++;
513    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (p < endptr)
514    linenumber++;      {
515        *lenptr = 1;
516        return p + 1;
517        }
518      *lenptr = 0;
519      return endptr;
520    
521    for (i = 0; !match && i < pattern_count; i++)    case EL_CR:
522      while (p < endptr && *p != '\r') p++;
523      if (p < endptr)
524      {      {
525      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      *lenptr = 1;
526        offsets, 99) >= 0;      return p + 1;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
527      }      }
528      *lenptr = 0;
529      return endptr;
530    
531    if (match != invert)    case EL_CRLF:
532      for (;;)
533      {      {
534      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
535        if (++p >= endptr)
536          {
537          *lenptr = 0;
538          return endptr;
539          }
540        if (*p == '\n')
541          {
542          *lenptr = 2;
543          return p + 1;
544          }
545        }
546      break;
547    
548      case EL_ANYCRLF:
549      while (p < endptr)
550        {
551        int extra = 0;
552        register int c = *((unsigned char *)p);
553    
554      else if (filenames_only)      if (utf8 && c >= 0xc0)
555        {        {
556        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
557        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
558          gcss = 6*extra;
559          c = (c & utf8_table3[extra]) << gcss;
560          for (gcii = 1; gcii <= extra; gcii++)
561            {
562            gcss -= 6;
563            c |= (p[gcii] & 0x3f) << gcss;
564            }
565        }        }
566    
567      else if (silent) return 0;      p += 1 + extra;
568    
569      else      switch (c)
570        {        {
571        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
572        if (number) fprintf(stdout, "%d:", linenumber);        *lenptr = 1;
573        fprintf(stdout, "%s\n", buffer);        return p;
574    
575          case 0x0d:    /* CR */
576          if (p < endptr && *p == 0x0a)
577            {
578            *lenptr = 2;
579            p++;
580            }
581          else *lenptr = 1;
582          return p;
583    
584          default:
585          break;
586        }        }
587        }   /* End of loop for ANYCRLF case */
588    
589      rc = 0;    *lenptr = 0;  /* Must have hit the end */
590      }    return endptr;
   }  
591    
592  if (count_only)    case EL_ANY:
593    {    while (p < endptr)
594    if (name != NULL) fprintf(stdout, "%s:", name);      {
595    fprintf(stdout, "%d\n", count);      int extra = 0;
596    }      register int c = *((unsigned char *)p);
597    
598  return rc;      if (utf8 && c >= 0xc0)
599  }        {
600          int gcii, gcss;
601          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
602          gcss = 6*extra;
603          c = (c & utf8_table3[extra]) << gcss;
604          for (gcii = 1; gcii <= extra; gcii++)
605            {
606            gcss -= 6;
607            c |= (p[gcii] & 0x3f) << gcss;
608            }
609          }
610    
611        p += 1 + extra;
612    
613        switch (c)
614          {
615          case 0x0a:    /* LF */
616          case 0x0b:    /* VT */
617          case 0x0c:    /* FF */
618          *lenptr = 1;
619          return p;
620    
621          case 0x0d:    /* CR */
622          if (p < endptr && *p == 0x0a)
623            {
624            *lenptr = 2;
625            p++;
626            }
627          else *lenptr = 1;
628          return p;
629    
630          case 0x85:    /* NEL */
631          *lenptr = utf8? 2 : 1;
632          return p;
633    
634          case 0x2028:  /* LS */
635          case 0x2029:  /* PS */
636          *lenptr = 3;
637          return p;
638    
639          default:
640          break;
641          }
642        }   /* End of loop for ANY case */
643    
644      *lenptr = 0;  /* Must have hit the end */
645      return endptr;
646      }     /* End of overall switch */
647    }
648    
649    
650    
651  /*************************************************  /*************************************************
652  *     Grep a file or recurse into a directory    *  *         Find start of previous line            *
653  *************************************************/  *************************************************/
654    
655  static int  /* This is called when looking back for before lines to print.
 grep_or_recurse(char *filename, BOOL dir_recurse, BOOL show_filenames,  
   BOOL only_one_at_top)  
 {  
 int rc = 1;  
 int sep;  
 FILE *in;  
656    
657  /* If the file is a directory and we are recursing, scan each file within it.  Arguments:
658  The scanning code is localized so it can be made system-specific. */    p         start of the subsequent line
659      startptr  start of available data
660    
661  if ((sep = isdirectory(filename)) != 0 && dir_recurse)  Returns:    pointer to the start of the previous line
662    {  */
   char buffer[1024];  
   char *nextfile;  
   directory_type *dir = opendirectory(filename);  
663    
664    if (dir == NULL)  static char *
665      {  previous_line(char *p, char *startptr)
666      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  {
667        strerror(errno));  switch(endlinetype)
668      return 2;    {
669      }    default:      /* Just in case */
670      case EL_LF:
671      p--;
672      while (p > startptr && p[-1] != '\n') p--;
673      return p;
674    
675      case EL_CR:
676      p--;
677      while (p > startptr && p[-1] != '\n') p--;
678      return p;
679    
680    while ((nextfile = readdirectory(dir)) != NULL)    case EL_CRLF:
681      for (;;)
682      {      {
683      int frc;      p -= 2;
684      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      while (p > startptr && p[-1] != '\n') p--;
685      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);      if (p <= startptr + 1 || p[-2] == '\r') return p;
     if (frc == 0 && rc == 1) rc = 0;  
686      }      }
687      return p;   /* But control should never get here */
688    
689    closedirectory(dir);    case EL_ANY:
690    return rc;    case EL_ANYCRLF:
691    }    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
692      if (utf8) while ((*p & 0xc0) == 0x80) p--;
693    
694  /* If the file is not a directory, or we are not recursing, scan it. If this is    while (p > startptr)
695  the first and only argument at top level, we don't show the file name (unless      {
696  we are only showing the file name). Otherwise, control is via the      register int c;
697  show_filenames variable. */      char *pp = p - 1;
698    
699  in = fopen(filename, "r");      if (utf8)
700  if (in == NULL)        {
701    {        int extra = 0;
702    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));        while ((*pp & 0xc0) == 0x80) pp--;
703    return 2;        c = *((unsigned char *)pp);
704    }        if (c >= 0xc0)
705            {
706            int gcii, gcss;
707            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
708            gcss = 6*extra;
709            c = (c & utf8_table3[extra]) << gcss;
710            for (gcii = 1; gcii <= extra; gcii++)
711              {
712              gcss -= 6;
713              c |= (pp[gcii] & 0x3f) << gcss;
714              }
715            }
716          }
717        else c = *((unsigned char *)pp);
718    
719  rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?      if (endlinetype == EL_ANYCRLF) switch (c)
720    filename : NULL);        {
721  fclose(in);        case 0x0a:    /* LF */
722  return rc;        case 0x0d:    /* CR */
723  }        return p;
724    
725          default:
726          break;
727          }
728    
729        else switch (c)
730          {
731          case 0x0a:    /* LF */
732          case 0x0b:    /* VT */
733          case 0x0c:    /* FF */
734          case 0x0d:    /* CR */
735          case 0x85:    /* NEL */
736          case 0x2028:  /* LS */
737          case 0x2029:  /* PS */
738          return p;
739    
740          default:
741          break;
742          }
743    
744  /*************************************************      p = pp;  /* Back one character */
745  *                Usage function                  *      }        /* End of loop for ANY case */
 *************************************************/  
746    
747  static int    return startptr;  /* Hit start of data */
748  usage(int rc)    }     /* End of overall switch */
 {  
 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information.\n");  
 return rc;  
749  }  }
750    
751    
752    
753    
754    
755  /*************************************************  /*************************************************
756  *                Help function                   *  *       Print the previous "after" lines         *
757  *************************************************/  *************************************************/
758    
759  static void  /* This is called if we are about to lose said lines because of buffer filling,
760  help(void)  and at the end of the file. The data in the line is written using fwrite() so
761  {  that a binary zero does not terminate it.
762  option_item *op;  
763    Arguments:
764  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");    lastmatchnumber   the number of the last matching line, plus one
765  printf("Search for PATTERN in each FILE or standard input.\n");    lastmatchrestart  where we restarted after the last match
766  printf("PATTERN must be present if -f is not used.\n");    endptr            end of available data
767  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");    printname         filename for printing
768    
769  printf("Options:\n");  Returns:            nothing
770    */
771    
772  for (op = optionlist; op->one_char != 0; op++)  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
773      char *endptr, char *printname)
774    {
775    if (after_context > 0 && lastmatchnumber > 0)
776    {    {
777    int n;    int count = 0;
778    char s[4];    while (lastmatchrestart < endptr && count++ < after_context)
779    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");      {
780    printf("  %s --%s%n", s, op->long_name, &n);      int ellength;
781    n = 30 - n;      char *pp = lastmatchrestart;
782    if (n < 1) n = 1;      if (printname != NULL) fprintf(stdout, "%s-", printname);
783    printf("%.*s%s\n", n, "                    ", op->help_text);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
784        pp = end_of_line(pp, endptr, &ellength);
785        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
786        lastmatchrestart = pp;
787        }
788      hyphenpending = TRUE;
789    }    }
   
 printf("\n  -f<filename>  or  --file=<filename>\n");  
 printf("    Read patterns from <filename> instead of using a command line option.\n");  
 printf("    Trailing white space is removed; blanks lines are ignored.\n");  
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
   
 printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
790  }  }
791    
792    
793    
   
794  /*************************************************  /*************************************************
795  *                Handle an option                *  *            Grep an individual file             *
796  *************************************************/  *************************************************/
797    
798    /* This is called from grep_or_recurse() below. It uses a buffer that is three
799    times the value of MBUFTHIRD. The matching point is never allowed to stray into
800    the top third of the buffer, thus keeping more of the file available for
801    context printing or for multiline scanning. For large files, the pointer will
802    be in the middle third most of the time, so the bottom third is available for
803    "before" context printing.
804    
805    Arguments:
806      in           the fopened FILE stream
807      printname    the file name if it is to be printed for each match
808                   or NULL if the file name is not to be printed
809                   it cannot be NULL if filenames[_nomatch]_only is set
810    
811    Returns:       0 if there was at least one match
812                   1 otherwise (no matches)
813    */
814    
815  static int  static int
816  handle_option(int letter, int options)  pcregrep(FILE *in, char *printname)
817  {  {
818  switch(letter)  int rc = 1;
819    {  int linenumber = 1;
820    case -1:  help(); exit(0);  int lastmatchnumber = 0;
821    case 'c': count_only = TRUE; break;  int count = 0;
822    case 'h': filenames = FALSE; break;  int offsets[99];
823    case 'i': options |= PCRE_CASELESS; break;  char *lastmatchrestart = NULL;
824    case 'l': filenames_only = TRUE;  char buffer[3*MBUFTHIRD];
825    case 'n': number = TRUE; break;  char *ptr = buffer;
826    case 'r': recurse = TRUE; break;  char *endptr;
827    case 's': silent = TRUE; break;  size_t bufflength;
828    case 'u': options |= PCRE_UTF8; break;  BOOL endhyphenpending = FALSE;
829    case 'v': invert = TRUE; break;  
830    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  /* Do the first read into the start of the buffer and set up the pointer to
831    end of what we have. */
832    
833    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
834    endptr = buffer + bufflength;
835    
836    /* Loop while the current pointer is not at the end of the file. For large
837    files, endptr will be at the end of the buffer when we are in the middle of the
838    file, but ptr will never get there, because as soon as it gets over 2/3 of the
839    way, the buffer is shifted left and re-filled. */
840    
841    case 'V':  while (ptr < endptr)
842    fprintf(stderr, "pcregrep version %s using ", VERSION);    {
843    fprintf(stderr, "PCRE version %s\n", pcre_version());    int i, endlinelength;
844    exit(0);    int mrc = 0;
845    break;    BOOL match = FALSE;
846      char *t = ptr;
847      size_t length, linelength;
848    
849    default:    /* At this point, ptr is at the start of a line. We need to find the length
850    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    of the subject string to pass to pcre_exec(). In multiline mode, it is the
851    exit(usage(2));    length remainder of the data in the buffer. Otherwise, it is the length of
852    }    the next line. After matching, we always advance by the length of the next
853      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
854      that any match is constrained to be in the first line. */
855    
856      t = end_of_line(t, endptr, &endlinelength);
857      linelength = t - ptr - endlinelength;
858      length = multiline? endptr - ptr : linelength;
859    
860  return options;    /* Extra processing for Jeffrey Friedl's debugging. */
 }  
861    
862    #ifdef JFRIEDL_DEBUG
863      if (jfriedl_XT || jfriedl_XR)
864      {
865          #include <sys/time.h>
866          #include <time.h>
867          struct timeval start_time, end_time;
868          struct timezone dummy;
869    
870          if (jfriedl_XT)
871          {
872              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
873              const char *orig = ptr;
874              ptr = malloc(newlen + 1);
875              if (!ptr) {
876                      printf("out of memory");
877                      exit(2);
878              }
879              endptr = ptr;
880              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
881              for (i = 0; i < jfriedl_XT; i++) {
882                      strncpy(endptr, orig,  length);
883                      endptr += length;
884              }
885              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
886              length = newlen;
887          }
888    
889          if (gettimeofday(&start_time, &dummy) != 0)
890                  perror("bad gettimeofday");
891    
 /*************************************************  
 *                Main program                    *  
 *************************************************/  
892    
893  int        for (i = 0; i < jfriedl_XR; i++)
894  main(int argc, char **argv)            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
 {  
 int i, j;  
 int rc = 1;  
 int options = 0;  
 int errptr;  
 const char *error;  
 BOOL only_one_at_top;  
895    
896  /* Process the options */        if (gettimeofday(&end_time, &dummy) != 0)
897                  perror("bad gettimeofday");
898    
899  for (i = 1; i < argc; i++)        double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
900    {                        -
901    if (argv[i][0] != '-') break;                        (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
902    
903    /* Missing options */        printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
904          return 0;
905      }
906    #endif
907    
   if (argv[i][1] == 0) exit(usage(2));  
908    
909    /* Long name options */    /* Run through all the patterns until one matches. Note that we don't include
910      the final newline in the subject string. */
911    
912    if (argv[i][1] == '-')    for (i = 0; i < pattern_count; i++)
913      {      {
914      option_item *op;      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
915          offsets, 99);
916      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (mrc >= 0) { match = TRUE; break; }
917        {      if (mrc != PCRE_ERROR_NOMATCH)
       pattern_filename = argv[i] + 7;  
       continue;  
       }  
   
     for (op = optionlist; op->one_char != 0; op++)  
918        {        {
919        if (strcmp(argv[i]+2, op->long_name) == 0)        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
920          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
921          fprintf(stderr, "this line:\n");
922          fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */
923          fprintf(stderr, "\n");
924          if (error_count == 0 &&
925              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
926          {          {
927          options = handle_option(op->one_char, options);          fprintf(stderr, "pcregrep: error %d means that a resource limit "
928          break;            "was exceeded\n", mrc);
929            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
930          }          }
931          if (error_count++ > 20)
932            {
933            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
934            exit(2);
935            }
936          match = invert;    /* No more matching; don't show the line again */
937          break;
938        }        }
939      if (op->one_char == 0)      }
940        {  
941      /* If it's a match or a not-match (as required), do what's wanted. */
942    
943      if (match != invert)
944        {
945        BOOL hyphenprinted = FALSE;
946    
947        /* We've failed if we want a file that doesn't have any matches. */
948    
949        if (filenames == FN_NOMATCH_ONLY) return 1;
950    
951        /* Just count if just counting is wanted. */
952    
953        if (count_only) count++;
954    
955        /* If all we want is a file name, there is no need to scan any more lines
956        in the file. */
957    
958        else if (filenames == FN_ONLY)
959          {
960          fprintf(stdout, "%s\n", printname);
961          return 0;
962          }
963    
964        /* Likewise, if all we want is a yes/no answer. */
965    
966        else if (quiet) return 0;
967    
968        /* The --only-matching option prints just the substring that matched, and
969        does not pring any context. */
970    
971        else if (only_matching)
972          {
973          if (printname != NULL) fprintf(stdout, "%s:", printname);
974          if (number) fprintf(stdout, "%d:", linenumber);
975          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
976          fprintf(stdout, "\n");
977          }
978    
979        /* This is the default case when none of the above options is set. We print
980        the matching lines(s), possibly preceded and/or followed by other lines of
981        context. */
982    
983        else
984          {
985          /* See if there is a requirement to print some "after" lines from a
986          previous match. We never print any overlaps. */
987    
988          if (after_context > 0 && lastmatchnumber > 0)
989            {
990            int ellength;
991            int linecount = 0;
992            char *p = lastmatchrestart;
993    
994            while (p < ptr && linecount < after_context)
995              {
996              p = end_of_line(p, ptr, &ellength);
997              linecount++;
998              }
999    
1000            /* It is important to advance lastmatchrestart during this printing so
1001            that it interacts correctly with any "before" printing below. Print
1002            each line's data using fwrite() in case there are binary zeroes. */
1003    
1004            while (lastmatchrestart < p)
1005              {
1006              char *pp = lastmatchrestart;
1007              if (printname != NULL) fprintf(stdout, "%s-", printname);
1008              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1009              pp = end_of_line(pp, endptr, &ellength);
1010              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1011              lastmatchrestart = pp;
1012              }
1013            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1014            }
1015    
1016          /* If there were non-contiguous lines printed above, insert hyphens. */
1017    
1018          if (hyphenpending)
1019            {
1020            fprintf(stdout, "--\n");
1021            hyphenpending = FALSE;
1022            hyphenprinted = TRUE;
1023            }
1024    
1025          /* See if there is a requirement to print some "before" lines for this
1026          match. Again, don't print overlaps. */
1027    
1028          if (before_context > 0)
1029            {
1030            int linecount = 0;
1031            char *p = ptr;
1032    
1033            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1034                   linecount < before_context)
1035              {
1036              linecount++;
1037              p = previous_line(p, buffer);
1038              }
1039    
1040            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1041              fprintf(stdout, "--\n");
1042    
1043            while (p < ptr)
1044              {
1045              int ellength;
1046              char *pp = p;
1047              if (printname != NULL) fprintf(stdout, "%s-", printname);
1048              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1049              pp = end_of_line(pp, endptr, &ellength);
1050              fwrite(p, 1, pp - p, stdout);
1051              p = pp;
1052              }
1053            }
1054    
1055          /* Now print the matching line(s); ensure we set hyphenpending at the end
1056          of the file if any context lines are being output. */
1057    
1058          if (after_context > 0 || before_context > 0)
1059            endhyphenpending = TRUE;
1060    
1061          if (printname != NULL) fprintf(stdout, "%s:", printname);
1062          if (number) fprintf(stdout, "%d:", linenumber);
1063    
1064          /* In multiline mode, we want to print to the end of the line in which
1065          the end of the matched string is found, so we adjust linelength and the
1066          line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1067          start of the match will always be before the first newline sequence. */
1068    
1069          if (multiline)
1070            {
1071            int ellength;
1072            char *endmatch = ptr + offsets[1];
1073            t = ptr;
1074            while (t < endmatch)
1075              {
1076              t = end_of_line(t, endptr, &ellength);
1077              if (t <= endmatch) linenumber++; else break;
1078              }
1079            endmatch = end_of_line(endmatch, endptr, &ellength);
1080            linelength = endmatch - ptr - ellength;
1081            }
1082    
1083          /*** NOTE: Use only fwrite() to output the data line, so that binary
1084          zeroes are treated as just another data character. */
1085    
1086          /* This extra option, for Jeffrey Friedl's debugging requirements,
1087          replaces the matched string, or a specific captured string if it exists,
1088          with X. When this happens, colouring is ignored. */
1089    
1090    #ifdef JFRIEDL_DEBUG
1091          if (S_arg >= 0 && S_arg < mrc)
1092            {
1093            int first = S_arg * 2;
1094            int last  = first + 1;
1095            fwrite(ptr, 1, offsets[first], stdout);
1096            fprintf(stdout, "X");
1097            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1098            }
1099          else
1100    #endif
1101    
1102          /* We have to split the line(s) up if colouring. */
1103    
1104          if (do_colour)
1105            {
1106            fwrite(ptr, 1, offsets[0], stdout);
1107            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1108            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1109            fprintf(stdout, "%c[00m", 0x1b);
1110            fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1111            }
1112          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1113          }
1114    
1115        /* End of doing what has to be done for a match */
1116    
1117        rc = 0;    /* Had some success */
1118    
1119        /* Remember where the last match happened for after_context. We remember
1120        where we are about to restart, and that line's number. */
1121    
1122        lastmatchrestart = ptr + linelength + endlinelength;
1123        lastmatchnumber = linenumber + 1;
1124        }
1125    
1126      /* Advance to after the newline and increment the line number. */
1127    
1128      ptr += linelength + endlinelength;
1129      linenumber++;
1130    
1131      /* If we haven't yet reached the end of the file (the buffer is full), and
1132      the current point is in the top 1/3 of the buffer, slide the buffer down by
1133      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1134      about to be lost, print them. */
1135    
1136      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1137        {
1138        if (after_context > 0 &&
1139            lastmatchnumber > 0 &&
1140            lastmatchrestart < buffer + MBUFTHIRD)
1141          {
1142          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1143          lastmatchnumber = 0;
1144          }
1145    
1146        /* Now do the shuffle */
1147    
1148        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1149        ptr -= MBUFTHIRD;
1150        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1151        endptr = buffer + bufflength;
1152    
1153        /* Adjust any last match point */
1154    
1155        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1156        }
1157      }     /* Loop through the whole file */
1158    
1159    /* End of file; print final "after" lines if wanted; do_after_lines sets
1160    hyphenpending if it prints something. */
1161    
1162    if (!only_matching && !count_only)
1163      {
1164      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1165      hyphenpending |= endhyphenpending;
1166      }
1167    
1168    /* Print the file name if we are looking for those without matches and there
1169    were none. If we found a match, we won't have got this far. */
1170    
1171    if (filenames == FN_NOMATCH_ONLY)
1172      {
1173      fprintf(stdout, "%s\n", printname);
1174      return 0;
1175      }
1176    
1177    /* Print the match count if wanted */
1178    
1179    if (count_only)
1180      {
1181      if (printname != NULL) fprintf(stdout, "%s:", printname);
1182      fprintf(stdout, "%d\n", count);
1183      }
1184    
1185    return rc;
1186    }
1187    
1188    
1189    
1190    /*************************************************
1191    *     Grep a file or recurse into a directory    *
1192    *************************************************/
1193    
1194    /* Given a path name, if it's a directory, scan all the files if we are
1195    recursing; if it's a file, grep it.
1196    
1197    Arguments:
1198      pathname          the path to investigate
1199      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1200      only_one_at_top   TRUE if the path is the only one at toplevel
1201    
1202    Returns:   0 if there was at least one match
1203               1 if there were no matches
1204               2 there was some kind of error
1205    
1206    However, file opening failures are suppressed if "silent" is set.
1207    */
1208    
1209    static int
1210    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1211    {
1212    int rc = 1;
1213    int sep;
1214    FILE *in;
1215    
1216    /* If the file name is "-" we scan stdin */
1217    
1218    if (strcmp(pathname, "-") == 0)
1219      {
1220      return pcregrep(stdin,
1221        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1222          stdin_name : NULL);
1223      }
1224    
1225    
1226    /* If the file is a directory, skip if skipping or if we are recursing, scan
1227    each file within it, subject to any include or exclude patterns that were set.
1228    The scanning code is localized so it can be made system-specific. */
1229    
1230    if ((sep = isdirectory(pathname)) != 0)
1231      {
1232      if (dee_action == dee_SKIP) return 1;
1233      if (dee_action == dee_RECURSE)
1234        {
1235        char buffer[1024];
1236        char *nextfile;
1237        directory_type *dir = opendirectory(pathname);
1238    
1239        if (dir == NULL)
1240          {
1241          if (!silent)
1242            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1243              strerror(errno));
1244          return 2;
1245          }
1246    
1247        while ((nextfile = readdirectory(dir)) != NULL)
1248          {
1249          int frc, blen;
1250          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1251          blen = strlen(buffer);
1252    
1253          if (exclude_compiled != NULL &&
1254              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1255            continue;
1256    
1257          if (include_compiled != NULL &&
1258              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1259            continue;
1260    
1261          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1262          if (frc > 1) rc = frc;
1263           else if (frc == 0 && rc == 1) rc = 0;
1264          }
1265    
1266        closedirectory(dir);
1267        return rc;
1268        }
1269      }
1270    
1271    /* If the file is not a directory and not a regular file, skip it if that's
1272    been requested. */
1273    
1274    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1275    
1276    /* Control reaches here if we have a regular file, or if we have a directory
1277    and recursion or skipping was not requested, or if we have anything else and
1278    skipping was not requested. The scan proceeds. If this is the first and only
1279    argument at top level, we don't show the file name, unless we are only showing
1280    the file name, or the filename was forced (-H). */
1281    
1282    in = fopen(pathname, "r");
1283    if (in == NULL)
1284      {
1285      if (!silent)
1286        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1287          strerror(errno));
1288      return 2;
1289      }
1290    
1291    rc = pcregrep(in, (filenames > FN_DEFAULT ||
1292      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1293    
1294    fclose(in);
1295    return rc;
1296    }
1297    
1298    
1299    
1300    
1301    /*************************************************
1302    *                Usage function                  *
1303    *************************************************/
1304    
1305    static int
1306    usage(int rc)
1307    {
1308    option_item *op;
1309    fprintf(stderr, "Usage: pcregrep [-");
1310    for (op = optionlist; op->one_char != 0; op++)
1311      {
1312      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1313      }
1314    fprintf(stderr, "] [long options] [pattern] [files]\n");
1315    fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1316    return rc;
1317    }
1318    
1319    
1320    
1321    
1322    /*************************************************
1323    *                Help function                   *
1324    *************************************************/
1325    
1326    static void
1327    help(void)
1328    {
1329    option_item *op;
1330    
1331    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1332    printf("Search for PATTERN in each FILE or standard input.\n");
1333    printf("PATTERN must be present if neither -e nor -f is used.\n");
1334    printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1335    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1336    
1337    printf("Options:\n");
1338    
1339    for (op = optionlist; op->one_char != 0; op++)
1340      {
1341      int n;
1342      char s[4];
1343      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1344      printf("  %s --%s%n", s, op->long_name, &n);
1345      n = 30 - n;
1346      if (n < 1) n = 1;
1347      printf("%.*s%s\n", n, "                    ", op->help_text);
1348      }
1349    
1350    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1351    printf("trailing white space is removed and blank lines are ignored.\n");
1352    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1353    
1354    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1355    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1356    }
1357    
1358    
1359    
1360    
1361    /*************************************************
1362    *    Handle a single-letter, no data option      *
1363    *************************************************/
1364    
1365    static int
1366    handle_option(int letter, int options)
1367    {
1368    switch(letter)
1369      {
1370      case N_HELP: help(); exit(0);
1371      case 'c': count_only = TRUE; break;
1372      case 'F': process_options |= PO_FIXED_STRINGS; break;
1373      case 'H': filenames = FN_FORCE; break;
1374      case 'h': filenames = FN_NONE; break;
1375      case 'i': options |= PCRE_CASELESS; break;
1376      case 'l': filenames = FN_ONLY; break;
1377      case 'L': filenames = FN_NOMATCH_ONLY; break;
1378      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1379      case 'n': number = TRUE; break;
1380      case 'o': only_matching = TRUE; break;
1381      case 'q': quiet = TRUE; break;
1382      case 'r': dee_action = dee_RECURSE; break;
1383      case 's': silent = TRUE; break;
1384      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1385      case 'v': invert = TRUE; break;
1386      case 'w': process_options |= PO_WORD_MATCH; break;
1387      case 'x': process_options |= PO_LINE_MATCH; break;
1388    
1389      case 'V':
1390      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1391      exit(0);
1392      break;
1393    
1394      default:
1395      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1396      exit(usage(2));
1397      }
1398    
1399    return options;
1400    }
1401    
1402    
1403    
1404    
1405    /*************************************************
1406    *          Construct printed ordinal             *
1407    *************************************************/
1408    
1409    /* This turns a number into "1st", "3rd", etc. */
1410    
1411    static char *
1412    ordin(int n)
1413    {
1414    static char buffer[8];
1415    char *p = buffer;
1416    sprintf(p, "%d", n);
1417    while (*p != 0) p++;
1418    switch (n%10)
1419      {
1420      case 1: strcpy(p, "st"); break;
1421      case 2: strcpy(p, "nd"); break;
1422      case 3: strcpy(p, "rd"); break;
1423      default: strcpy(p, "th"); break;
1424      }
1425    return buffer;
1426    }
1427    
1428    
1429    
1430    /*************************************************
1431    *          Compile a single pattern              *
1432    *************************************************/
1433    
1434    /* When the -F option has been used, this is called for each substring.
1435    Otherwise it's called for each supplied pattern.
1436    
1437    Arguments:
1438      pattern        the pattern string
1439      options        the PCRE options
1440      filename       the file name, or NULL for a command-line pattern
1441      count          0 if this is the only command line pattern, or
1442                     number of the command line pattern, or
1443                     linenumber for a pattern from a file
1444    
1445    Returns:         TRUE on success, FALSE after an error
1446    */
1447    
1448    static BOOL
1449    compile_single_pattern(char *pattern, int options, char *filename, int count)
1450    {
1451    char buffer[MBUFTHIRD + 16];
1452    const char *error;
1453    int errptr;
1454    
1455    if (pattern_count >= MAX_PATTERN_COUNT)
1456      {
1457      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1458        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1459      return FALSE;
1460      }
1461    
1462    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1463      suffix[process_options]);
1464    pattern_list[pattern_count] =
1465      pcre_compile(buffer, options, &error, &errptr, pcretables);
1466    if (pattern_list[pattern_count] != NULL)
1467      {
1468      pattern_count++;
1469      return TRUE;
1470      }
1471    
1472    /* Handle compile errors */
1473    
1474    errptr -= (int)strlen(prefix[process_options]);
1475    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1476    
1477    if (filename == NULL)
1478      {
1479      if (count == 0)
1480        fprintf(stderr, "pcregrep: Error in command-line regex "
1481          "at offset %d: %s\n", errptr, error);
1482      else
1483        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1484          "at offset %d: %s\n", ordin(count), errptr, error);
1485      }
1486    else
1487      {
1488      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1489        "at offset %d: %s\n", count, filename, errptr, error);
1490      }
1491    
1492    return FALSE;
1493    }
1494    
1495    
1496    
1497    /*************************************************
1498    *           Compile one supplied pattern         *
1499    *************************************************/
1500    
1501    /* When the -F option has been used, each string may be a list of strings,
1502    separated by line breaks. They will be matched literally.
1503    
1504    Arguments:
1505      pattern        the pattern string
1506      options        the PCRE options
1507      filename       the file name, or NULL for a command-line pattern
1508      count          0 if this is the only command line pattern, or
1509                     number of the command line pattern, or
1510                     linenumber for a pattern from a file
1511    
1512    Returns:         TRUE on success, FALSE after an error
1513    */
1514    
1515    static BOOL
1516    compile_pattern(char *pattern, int options, char *filename, int count)
1517    {
1518    if ((process_options & PO_FIXED_STRINGS) != 0)
1519      {
1520      char *eop = pattern + strlen(pattern);
1521      char buffer[MBUFTHIRD];
1522      for(;;)
1523        {
1524        int ellength;
1525        char *p = end_of_line(pattern, eop, &ellength);
1526        if (ellength == 0)
1527          return compile_single_pattern(pattern, options, filename, count);
1528        sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1529        pattern = p;
1530        if (!compile_single_pattern(buffer, options, filename, count))
1531          return FALSE;
1532        }
1533      }
1534    else return compile_single_pattern(pattern, options, filename, count);
1535    }
1536    
1537    
1538    
1539    /*************************************************
1540    *                Main program                    *
1541    *************************************************/
1542    
1543    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1544    
1545    int
1546    main(int argc, char **argv)
1547    {
1548    int i, j;
1549    int rc = 1;
1550    int pcre_options = 0;
1551    int cmd_pattern_count = 0;
1552    int hint_count = 0;
1553    int errptr;
1554    BOOL only_one_at_top;
1555    char *patterns[MAX_PATTERN_COUNT];
1556    const char *locale_from = "--locale";
1557    const char *error;
1558    
1559    /* Set the default line ending value from the default in the PCRE library;
1560    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1561    */
1562    
1563    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1564    switch(i)
1565      {
1566      default:                 newline = (char *)"lf"; break;
1567      case '\r':               newline = (char *)"cr"; break;
1568      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1569      case -1:                 newline = (char *)"any"; break;
1570      case -2:                 newline = (char *)"anycrlf"; break;
1571      }
1572    
1573    /* Process the options */
1574    
1575    for (i = 1; i < argc; i++)
1576      {
1577      option_item *op = NULL;
1578      char *option_data = (char *)"";    /* default to keep compiler happy */
1579      BOOL longop;
1580      BOOL longopwasequals = FALSE;
1581    
1582      if (argv[i][0] != '-') break;
1583    
1584      /* If we hit an argument that is just "-", it may be a reference to STDIN,
1585      but only if we have previously had -e or -f to define the patterns. */
1586    
1587      if (argv[i][1] == 0)
1588        {
1589        if (pattern_filename != NULL || pattern_count > 0) break;
1590          else exit(usage(2));
1591        }
1592    
1593      /* Handle a long name option, or -- to terminate the options */
1594    
1595      if (argv[i][1] == '-')
1596        {
1597        char *arg = argv[i] + 2;
1598        char *argequals = strchr(arg, '=');
1599    
1600        if (*arg == 0)    /* -- terminates options */
1601          {
1602          i++;
1603          break;                /* out of the options-handling loop */
1604          }
1605    
1606        longop = TRUE;
1607    
1608        /* Some long options have data that follows after =, for example file=name.
1609        Some options have variations in the long name spelling: specifically, we
1610        allow "regexp" because GNU grep allows it, though I personally go along
1611        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1612        These options are entered in the table as "regex(p)". No option is in both
1613        these categories, fortunately. */
1614    
1615        for (op = optionlist; op->one_char != 0; op++)
1616          {
1617          char *opbra = strchr(op->long_name, '(');
1618          char *equals = strchr(op->long_name, '=');
1619          if (opbra == NULL)     /* Not a (p) case */
1620            {
1621            if (equals == NULL)  /* Not thing=data case */
1622              {
1623              if (strcmp(arg, op->long_name) == 0) break;
1624              }
1625            else                 /* Special case xxx=data */
1626              {
1627              int oplen = equals - op->long_name;
1628              int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1629              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1630                {
1631                option_data = arg + arglen;
1632                if (*option_data == '=')
1633                  {
1634                  option_data++;
1635                  longopwasequals = TRUE;
1636                  }
1637                break;
1638                }
1639              }
1640            }
1641          else                   /* Special case xxxx(p) */
1642            {
1643            char buff1[24];
1644            char buff2[24];
1645            int baselen = opbra - op->long_name;
1646            sprintf(buff1, "%.*s", baselen, op->long_name);
1647            sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1648              opbra + 1);
1649            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1650              break;
1651            }
1652          }
1653    
1654        if (op->one_char == 0)
1655          {
1656        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1657        exit(usage(2));        exit(usage(2));
1658        }        }
1659      }      }
1660    
1661    /* One-char options */  
1662      /* Jeffrey Friedl's debugging harness uses these additional options which
1663      are not in the right form for putting in the option table because they use
1664      only one hyphen, yet are more than one character long. By putting them
1665      separately here, they will not get displayed as part of the help() output,
1666      but I don't think Jeffrey will care about that. */
1667    
1668    #ifdef JFRIEDL_DEBUG
1669      else if (strcmp(argv[i], "-pre") == 0) {
1670              jfriedl_prefix = argv[++i];
1671              continue;
1672      } else if (strcmp(argv[i], "-post") == 0) {
1673              jfriedl_postfix = argv[++i];
1674              continue;
1675      } else if (strcmp(argv[i], "-XT") == 0) {
1676              sscanf(argv[++i], "%d", &jfriedl_XT);
1677              continue;
1678      } else if (strcmp(argv[i], "-XR") == 0) {
1679              sscanf(argv[++i], "%d", &jfriedl_XR);
1680              continue;
1681      }
1682    #endif
1683    
1684    
1685      /* One-char options; many that have no data may be in a single argument; we
1686      continue till we hit the last one or one that needs data. */
1687    
1688    else    else
1689      {      {
1690      char *s = argv[i] + 1;      char *s = argv[i] + 1;
1691        longop = FALSE;
1692      while (*s != 0)      while (*s != 0)
1693        {        {
1694        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
1695            { if (*s == op->one_char) break; }
1696          if (op->one_char == 0)
1697          {          {
1698          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1699          if (pattern_filename[0] == 0)            *s, argv[i]);
1700            {          exit(usage(2));
1701            if (i >= argc - 1)          }
1702              {        if (op->type != OP_NODATA || s[1] == 0)
1703              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
1704              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
1705          break;          break;
1706          }          }
1707        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1708        }        }
1709      }      }
   }  
1710    
1711  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
1712  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
1713      something in the PCRE options. */
1714    
1715  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
1716    {      {
1717    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
1718    return 2;      continue;
1719    }      }
1720    
1721  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1722      either has a value or defaults to something. It cannot have data in a
1723      separate item. At the moment, the only such options are "colo(u)r" and
1724      Jeffrey Friedl's special -S debugging option. */
1725    
1726  if (pattern_filename != NULL)    if (*option_data == 0 &&
1727    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
1728      {      {
1729      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
1730        strerror(errno));        {
1731      return 2;        case N_COLOUR:
1732          colour_option = (char *)"auto";
1733          break;
1734    #ifdef JFRIEDL_DEBUG
1735          case 'S':
1736          S_arg = 0;
1737          break;
1738    #endif
1739          }
1740        continue;
1741      }      }
1742    while (fgets(buffer, sizeof(buffer), f) != NULL)  
1743      /* Otherwise, find the data string for the option. */
1744    
1745      if (*option_data == 0)
1746      {      {
1747      char *s = buffer + (int)strlen(buffer);      if (i >= argc - 1 || longopwasequals)
1748      if (pattern_count >= MAX_PATTERN_COUNT)        {
1749          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1750          exit(usage(2));
1751          }
1752        option_data = argv[++i];
1753        }
1754    
1755      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1756      multiple times to create a list of patterns. */
1757    
1758      if (op->type == OP_PATLIST)
1759        {
1760        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1761        {        {
1762        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1763          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
1764        return 2;        return 2;
1765        }        }
1766      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
1767      if (s == buffer) continue;      }
1768      *s = 0;  
1769      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
1770        &errptr, NULL);  
1771      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1772        {
1773        *((char **)op->dataptr) = option_data;
1774        }
1775      else
1776        {
1777        char *endptr;
1778        int n = strtoul(option_data, &endptr, 10);
1779        if (*endptr != 0)
1780        {        {
1781        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
1782          pattern_count, errptr, error);          {
1783        return 2;          char *equals = strchr(op->long_name, '=');
1784            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1785              equals - op->long_name;
1786            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1787              option_data, nlen, op->long_name);
1788            }
1789          else
1790            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1791              option_data, op->one_char);
1792          exit(usage(2));
1793        }        }
1794        *((int *)op->dataptr) = n;
1795        }
1796      }
1797    
1798    /* Options have been decoded. If -C was used, its value is used as a default
1799    for -A and -B. */
1800    
1801    if (both_context > 0)
1802      {
1803      if (after_context == 0) after_context = both_context;
1804      if (before_context == 0) before_context = both_context;
1805      }
1806    
1807    /* If a locale has not been provided as an option, see if the LC_CTYPE or
1808    LC_ALL environment variable is set, and if so, use it. */
1809    
1810    if (locale == NULL)
1811      {
1812      locale = getenv("LC_ALL");
1813      locale_from = "LCC_ALL";
1814      }
1815    
1816    if (locale == NULL)
1817      {
1818      locale = getenv("LC_CTYPE");
1819      locale_from = "LC_CTYPE";
1820      }
1821    
1822    /* If a locale has been provided, set it, and generate the tables the PCRE
1823    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1824    
1825    if (locale != NULL)
1826      {
1827      if (setlocale(LC_CTYPE, locale) == NULL)
1828        {
1829        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1830          locale, locale_from);
1831        return 2;
1832        }
1833      pcretables = pcre_maketables();
1834      }
1835    
1836    /* Sort out colouring */
1837    
1838    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1839      {
1840      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1841      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1842      else
1843        {
1844        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1845          colour_option);
1846        return 2;
1847        }
1848      if (do_colour)
1849        {
1850        char *cs = getenv("PCREGREP_COLOUR");
1851        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1852        if (cs != NULL) colour_string = cs;
1853      }      }
   fclose(f);  
1854    }    }
1855    
1856  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
1857    
1858    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1859      {
1860      pcre_options |= PCRE_NEWLINE_CR;
1861      endlinetype = EL_CR;
1862      }
1863    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1864      {
1865      pcre_options |= PCRE_NEWLINE_LF;
1866      endlinetype = EL_LF;
1867      }
1868    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1869      {
1870      pcre_options |= PCRE_NEWLINE_CRLF;
1871      endlinetype = EL_CRLF;
1872      }
1873    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1874      {
1875      pcre_options |= PCRE_NEWLINE_ANY;
1876      endlinetype = EL_ANY;
1877      }
1878    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1879      {
1880      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1881      endlinetype = EL_ANYCRLF;
1882      }
1883  else  else
1884    {    {
1885    if (i >= argc) return usage(2);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1886    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
1887    if (pattern_list[0] == NULL)    }
1888    
1889    /* Interpret the text values for -d and -D */
1890    
1891    if (dee_option != NULL)
1892      {
1893      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1894      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1895      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1896      else
1897      {      {
1898      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
       error);  
1899      return 2;      return 2;
1900      }      }
   pattern_count++;  
1901    }    }
1902    
1903  /* Study the regular expressions, as we will be running them may times */  if (DEE_option != NULL)
1904      {
1905      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1906      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1907      else
1908        {
1909        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1910        return 2;
1911        }
1912      }
1913    
1914    /* Check the values for Jeffrey Friedl's debugging options. */
1915    
1916    #ifdef JFRIEDL_DEBUG
1917    if (S_arg > 9)
1918      {
1919      fprintf(stderr, "pcregrep: bad value for -S option\n");
1920      return 2;
1921      }
1922    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1923      {
1924      if (jfriedl_XT == 0) jfriedl_XT = 1;
1925      if (jfriedl_XR == 0) jfriedl_XR = 1;
1926      }
1927    #endif
1928    
1929    /* Get memory to store the pattern and hints lists. */
1930    
1931    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1932    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1933    
1934    if (pattern_list == NULL || hints_list == NULL)
1935      {
1936      fprintf(stderr, "pcregrep: malloc failed\n");
1937      goto EXIT2;
1938      }
1939    
1940    /* If no patterns were provided by -e, and there is no file provided by -f,
1941    the first argument is the one and only pattern, and it must exist. */
1942    
1943    if (cmd_pattern_count == 0 && pattern_filename == NULL)
1944      {
1945      if (i >= argc) return usage(2);
1946      patterns[cmd_pattern_count++] = argv[i++];
1947      }
1948    
1949    /* Compile the patterns that were provided on the command line, either by
1950    multiple uses of -e or as a single unkeyed pattern. */
1951    
1952    for (j = 0; j < cmd_pattern_count; j++)
1953      {
1954      if (!compile_pattern(patterns[j], pcre_options, NULL,
1955           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1956        goto EXIT2;
1957      }
1958    
1959    /* Compile the regular expressions that are provided in a file. */
1960    
1961    if (pattern_filename != NULL)
1962      {
1963      int linenumber = 0;
1964      FILE *f;
1965      char *filename;
1966      char buffer[MBUFTHIRD];
1967    
1968      if (strcmp(pattern_filename, "-") == 0)
1969        {
1970        f = stdin;
1971        filename = stdin_name;
1972        }
1973      else
1974        {
1975        f = fopen(pattern_filename, "r");
1976        if (f == NULL)
1977          {
1978          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1979            strerror(errno));
1980          goto EXIT2;
1981          }
1982        filename = pattern_filename;
1983        }
1984    
1985      while (fgets(buffer, MBUFTHIRD, f) != NULL)
1986        {
1987        char *s = buffer + (int)strlen(buffer);
1988        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1989        *s = 0;
1990        linenumber++;
1991        if (buffer[0] == 0) continue;   /* Skip blank lines */
1992        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1993          goto EXIT2;
1994        }
1995    
1996      if (f != stdin) fclose(f);
1997      }
1998    
1999    /* Study the regular expressions, as we will be running them many times */
2000    
2001  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2002    {    {
# Line 646  for (j = 0; j < pattern_count; j++) Line 2006  for (j = 0; j < pattern_count; j++)
2006      char s[16];      char s[16];
2007      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2008      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2009      return 2;      goto EXIT2;
2010        }
2011      hint_count++;
2012      }
2013    
2014    /* If there are include or exclude patterns, compile them. */
2015    
2016    if (exclude_pattern != NULL)
2017      {
2018      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2019        pcretables);
2020      if (exclude_compiled == NULL)
2021        {
2022        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2023          errptr, error);
2024        goto EXIT2;
2025        }
2026      }
2027    
2028    if (include_pattern != NULL)
2029      {
2030      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2031        pcretables);
2032      if (include_compiled == NULL)
2033        {
2034        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2035          errptr, error);
2036        goto EXIT2;
2037      }      }
2038    }    }
2039    
2040  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2041    
2042  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
2043      {
2044      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2045      goto EXIT;
2046      }
2047    
2048  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2049  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2050  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2051    otherwise forced. */
2052    
2053  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2054    
2055  for (; i < argc; i++)  for (; i < argc; i++)
2056    {    {
2057    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2058    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2059      if (frc > 1) rc = frc;
2060        else if (frc == 0 && rc == 1) rc = 0;
2061    }    }
2062    
2063    EXIT:
2064    if (pattern_list != NULL)
2065      {
2066      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2067      free(pattern_list);
2068      }
2069    if (hints_list != NULL)
2070      {
2071      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2072      free(hints_list);
2073      }
2074  return rc;  return rc;
2075    
2076    EXIT2:
2077    rc = 2;
2078    goto EXIT;
2079  }  }
2080    
2081  /* End */  /* End of pcregrep */

Legend:
Removed from v.75  
changed lines
  Added in v.150

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12