/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 63 by nigel, Sat Feb 24 21:40:03 2007 UTC revision 279 by ph10, Tue Dec 4 20:01:43 2007 UTC
# Line 4  Line 4 
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories. */  directories.
8    
9               Copyright (c) 1997-2007 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    
54    #ifdef HAVE_UNISTD_H
55    #include <unistd.h>
56    #endif
57    
58  #include "pcre.h"  #include "pcre.h"
59    
60  #define FALSE 0  #define FALSE 0
# Line 19  directories. */ Line 62  directories. */
62    
63  typedef int BOOL;  typedef int BOOL;
64    
 #define VERSION "3.0 14-Jan-2003"  
65  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
66    
67    #if BUFSIZ > 8192
68    #define MBUFTHIRD BUFSIZ
69    #else
70    #define MBUFTHIRD 8192
71    #endif
72    
73    /* Values for the "filenames" variable, which specifies options for file name
74    output. The order is important; it is assumed that a file name is wanted for
75    all values greater than FN_DEFAULT. */
76    
77    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
78    
79    /* Actions for the -d and -D options */
80    
81    enum { dee_READ, dee_SKIP, dee_RECURSE };
82    enum { DEE_READ, DEE_SKIP };
83    
84    /* Actions for special processing options (flag bits) */
85    
86    #define PO_WORD_MATCH     0x0001
87    #define PO_LINE_MATCH     0x0002
88    #define PO_FIXED_STRINGS  0x0004
89    
90    /* Line ending types */
91    
92    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93    
94    
95    
96  /*************************************************  /*************************************************
97  *               Global variables                 *  *               Global variables                 *
98  *************************************************/  *************************************************/
99    
100    /* Jeffrey Friedl has some debugging requirements that are not part of the
101    regular code. */
102    
103    #ifdef JFRIEDL_DEBUG
104    static int S_arg = -1;
105    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107    static const char *jfriedl_prefix = "";
108    static const char *jfriedl_postfix = "";
109    #endif
110    
111    static int  endlinetype;
112    
113    static char *colour_string = (char *)"1;31";
114    static char *colour_option = NULL;
115    static char *dee_option = NULL;
116    static char *DEE_option = NULL;
117    static char *newline = NULL;
118  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
119    static char *stdin_name = (char *)"(standard input)";
120    static char *locale = NULL;
121    
122    static const unsigned char *pcretables = NULL;
123    
124  static int  pattern_count = 0;  static int  pattern_count = 0;
125  static pcre **pattern_list;  static pcre **pattern_list = NULL;
126  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
127    
128    static char *include_pattern = NULL;
129    static char *exclude_pattern = NULL;
130    
131    static pcre *include_compiled = NULL;
132    static pcre *exclude_compiled = NULL;
133    
134    static int after_context = 0;
135    static int before_context = 0;
136    static int both_context = 0;
137    static int dee_action = dee_READ;
138    static int DEE_action = DEE_READ;
139    static int error_count = 0;
140    static int filenames = FN_DEFAULT;
141    static int process_options = 0;
142    
143  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
144  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
145  static BOOL filenames_only = FALSE;  static BOOL hyphenpending = FALSE;
146  static BOOL invert = FALSE;  static BOOL invert = FALSE;
147    static BOOL multiline = FALSE;
148  static BOOL number = FALSE;  static BOOL number = FALSE;
149  static BOOL recurse = FALSE;  static BOOL only_matching = FALSE;
150    static BOOL quiet = FALSE;
151  static BOOL silent = FALSE;  static BOOL silent = FALSE;
152  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
153    
154  /* Structure for options and list of them */  /* Structure for options and list of them */
155    
156    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
157           OP_PATLIST };
158    
159  typedef struct option_item {  typedef struct option_item {
160      int type;
161    int one_char;    int one_char;
162    char *long_name;    void *dataptr;
163    char *help_text;    const char *long_name;
164      const char *help_text;
165  } option_item;  } option_item;
166    
167    /* Options without a single-letter equivalent get a negative value. This can be
168    used to identify them. */
169    
170    #define N_COLOUR    (-1)
171    #define N_EXCLUDE   (-2)
172    #define N_HELP      (-3)
173    #define N_INCLUDE   (-4)
174    #define N_LABEL     (-5)
175    #define N_LOCALE    (-6)
176    #define N_NULL      (-7)
177    
178  static option_item optionlist[] = {  static option_item optionlist[] = {
179    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
180    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
181    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
182    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
183    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
184    { 'n', "line-number",  "print line number with output lines" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
185    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
186    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
187    { 'u', "utf-8",        "use UTF-8 mode" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
188    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
189    { 'v', "invert-match", "select non-matching lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
190    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
191    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
192    { 0,    NULL,           NULL }    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
193      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
194      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
195      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
196      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
197      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
198      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
199      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
200      { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
202      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
203      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
204      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
205      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
206      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
207    #ifdef JFRIEDL_DEBUG
208      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
209    #endif
210      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
211      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
212      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
213      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
214      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
215      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
216      { OP_NODATA,    0,        NULL,               NULL,            NULL }
217  };  };
218    
219    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
220    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
221    that the combination of -w and -x has the same effect as -x on its own, so we
222    can treat them as the same. */
223    
224    static const char *prefix[] = {
225      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
226    
227    static const char *suffix[] = {
228      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
229    
230    /* UTF-8 tables - used only when the newline setting is "any". */
231    
232    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233    
234    const char utf8_table4[] = {
235      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
237      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
238      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
239    
240    
241    
242  /*************************************************  /*************************************************
243  *       Functions for directory scanning         *  *            OS-specific functions               *
244  *************************************************/  *************************************************/
245    
246  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
247  although at present the only ones are for Unix, Win32, and for "no directory  although at present the only ones are for Unix, Win32, and for "no support". */
 recursion support". */  
248    
249    
250  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
251    
252  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253  #include <sys/types.h>  #include <sys/types.h>
254  #include <sys/stat.h>  #include <sys/stat.h>
255  #include <dirent.h>  #include <dirent.h>
256    
257  typedef DIR directory_type;  typedef DIR directory_type;
258    
259  int  static int
260  isdirectory(char *filename)  isdirectory(char *filename)
261  {  {
262  struct stat statbuf;  struct stat statbuf;
# Line 94  if (stat(filename, &statbuf) < 0) Line 265  if (stat(filename, &statbuf) < 0)
265  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
266  }  }
267    
268  directory_type *  static directory_type *
269  opendirectory(char *filename)  opendirectory(char *filename)
270  {  {
271  return opendir(filename);  return opendir(filename);
272  }  }
273    
274  char *  static char *
275  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
276  {  {
277  for (;;)  for (;;)
# Line 110  for (;;) Line 281  for (;;)
281    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282      return dent->d_name;      return dent->d_name;
283    }    }
284  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
285  }  }
286    
287  void  static void
288  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
289  {  {
290  closedir(dir);  closedir(dir);
291  }  }
292    
293    
294    /************* Test for regular file in Unix **********/
295    
296    static int
297    isregfile(char *filename)
298    {
299    struct stat statbuf;
300    if (stat(filename, &statbuf) < 0)
301      return 1;        /* In the expectation that opening as a file will fail */
302    return (statbuf.st_mode & S_IFMT) == S_IFREG;
303    }
304    
305    
306    /************* Test stdout for being a terminal in Unix **********/
307    
308    static BOOL
309    is_stdout_tty(void)
310    {
311    return isatty(fileno(stdout));
312    }
313    
314    
315  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
316    
317  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
318  Lionel Fourquaux. */  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
319    when it did not exist. */
320    
321    
322  #elif HAVE_WIN32API  #elif HAVE_WINDOWS_H
323    
324  #ifndef STRICT  #ifndef STRICT
325  # define STRICT  # define STRICT
# Line 134  Lionel Fourquaux. */ Line 327  Lionel Fourquaux. */
327  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
328  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
329  #endif  #endif
330    #ifndef INVALID_FILE_ATTRIBUTES
331    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
332    #endif
333    
334  #include <windows.h>  #include <windows.h>
335    
336  typedef struct directory_type  typedef struct directory_type
# Line 213  free(dir); Line 410  free(dir);
410  }  }
411    
412    
413    /************* Test for regular file in Win32 **********/
414    
415    /* I don't know how to do this, or if it can be done; assume all paths are
416    regular if they are not directories. */
417    
418    int isregfile(char *filename)
419    {
420    return !isdirectory(filename)
421    }
422    
423    
424    /************* Test stdout for being a terminal in Win32 **********/
425    
426    /* I don't know how to do this; assume never */
427    
428    static BOOL
429    is_stdout_tty(void)
430    {
431    FALSE;
432    }
433    
434    
435  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
436    
437  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
# Line 221  free(dir); Line 440  free(dir);
440    
441  typedef void directory_type;  typedef void directory_type;
442    
443  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
444  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
446  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
447    
448    
449    /************* Test for regular when we can't do it **********/
450    
451    /* Assume all files are regular. */
452    
453    int isregfile(char *filename) { return 1; }
454    
455    
456    /************* Test stdout for being a terminal when we can't do it **********/
457    
458    static BOOL
459    is_stdout_tty(void)
460    {
461    return FALSE;
462    }
463    
464    
465  #endif  #endif
466    
467    
468    
469  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
470  /*************************************************  /*************************************************
471  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
472  *************************************************/  *************************************************/
# Line 253  return sys_errlist[n]; Line 489  return sys_errlist[n];
489    
490    
491  /*************************************************  /*************************************************
492  *              Grep an individual file           *  *             Find end of line                   *
493  *************************************************/  *************************************************/
494    
495  static int  /* The length of the endline sequence that is found is set via lenptr. This may
496  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
 {  
 int rc = 1;  
 int linenumber = 0;  
 int count = 0;  
 int offsets[99];  
 char buffer[BUFSIZ];  
497    
498  while (fgets(buffer, sizeof(buffer), in) != NULL)  Arguments:
499      p         current position in line
500      endptr    end of available data
501      lenptr    where to put the length of the eol sequence
502    
503    Returns:    pointer to the last byte of the line
504    */
505    
506    static char *
507    end_of_line(char *p, char *endptr, int *lenptr)
508    {
509    switch(endlinetype)
510    {    {
511    BOOL match = FALSE;    default:      /* Just in case */
512    int i;    case EL_LF:
513    int length = (int)strlen(buffer);    while (p < endptr && *p != '\n') p++;
514    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (p < endptr)
515    linenumber++;      {
516        *lenptr = 1;
517        return p + 1;
518        }
519      *lenptr = 0;
520      return endptr;
521    
522    for (i = 0; !match && i < pattern_count; i++)    case EL_CR:
523      while (p < endptr && *p != '\r') p++;
524      if (p < endptr)
525      {      {
526      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      *lenptr = 1;
527        offsets, 99) >= 0;      return p + 1;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
528      }      }
529      *lenptr = 0;
530      return endptr;
531    
532    if (match != invert)    case EL_CRLF:
533      for (;;)
534      {      {
535      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
536        if (++p >= endptr)
537          {
538          *lenptr = 0;
539          return endptr;
540          }
541        if (*p == '\n')
542          {
543          *lenptr = 2;
544          return p + 1;
545          }
546        }
547      break;
548    
549      case EL_ANYCRLF:
550      while (p < endptr)
551        {
552        int extra = 0;
553        register int c = *((unsigned char *)p);
554    
555      else if (filenames_only)      if (utf8 && c >= 0xc0)
556        {        {
557        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
558        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
559          gcss = 6*extra;
560          c = (c & utf8_table3[extra]) << gcss;
561          for (gcii = 1; gcii <= extra; gcii++)
562            {
563            gcss -= 6;
564            c |= (p[gcii] & 0x3f) << gcss;
565            }
566        }        }
567    
568      else if (silent) return 0;      p += 1 + extra;
569    
570      else      switch (c)
571        {        {
572        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
573        if (number) fprintf(stdout, "%d:", linenumber);        *lenptr = 1;
574        fprintf(stdout, "%s\n", buffer);        return p;
575    
576          case 0x0d:    /* CR */
577          if (p < endptr && *p == 0x0a)
578            {
579            *lenptr = 2;
580            p++;
581            }
582          else *lenptr = 1;
583          return p;
584    
585          default:
586          break;
587        }        }
588        }   /* End of loop for ANYCRLF case */
589    
590      rc = 0;    *lenptr = 0;  /* Must have hit the end */
591      }    return endptr;
   }  
592    
593  if (count_only)    case EL_ANY:
594    {    while (p < endptr)
595    if (name != NULL) fprintf(stdout, "%s:", name);      {
596    fprintf(stdout, "%d\n", count);      int extra = 0;
597    }      register int c = *((unsigned char *)p);
598    
599  return rc;      if (utf8 && c >= 0xc0)
600  }        {
601          int gcii, gcss;
602          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
603          gcss = 6*extra;
604          c = (c & utf8_table3[extra]) << gcss;
605          for (gcii = 1; gcii <= extra; gcii++)
606            {
607            gcss -= 6;
608            c |= (p[gcii] & 0x3f) << gcss;
609            }
610          }
611    
612        p += 1 + extra;
613    
614        switch (c)
615          {
616          case 0x0a:    /* LF */
617          case 0x0b:    /* VT */
618          case 0x0c:    /* FF */
619          *lenptr = 1;
620          return p;
621    
622          case 0x0d:    /* CR */
623          if (p < endptr && *p == 0x0a)
624            {
625            *lenptr = 2;
626            p++;
627            }
628          else *lenptr = 1;
629          return p;
630    
631          case 0x85:    /* NEL */
632          *lenptr = utf8? 2 : 1;
633          return p;
634    
635          case 0x2028:  /* LS */
636          case 0x2029:  /* PS */
637          *lenptr = 3;
638          return p;
639    
640          default:
641          break;
642          }
643        }   /* End of loop for ANY case */
644    
645      *lenptr = 0;  /* Must have hit the end */
646      return endptr;
647      }     /* End of overall switch */
648    }
649    
650    
651    
652  /*************************************************  /*************************************************
653  *     Grep a file or recurse into a directory    *  *         Find start of previous line            *
654  *************************************************/  *************************************************/
655    
656  static int  /* This is called when looking back for before lines to print.
 grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  
   BOOL only_one_at_top)  
 {  
 int rc = 1;  
 int sep;  
 FILE *in;  
657    
658  /* If the file is a directory and we are recursing, scan each file within it.  Arguments:
659  The scanning code is localized so it can be made system-specific. */    p         start of the subsequent line
660      startptr  start of available data
661    
662  if ((sep = isdirectory(filename)) != 0 && recurse)  Returns:    pointer to the start of the previous line
663    {  */
   char buffer[1024];  
   char *nextfile;  
   directory_type *dir = opendirectory(filename);  
664    
665    if (dir == NULL)  static char *
666      {  previous_line(char *p, char *startptr)
667      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  {
668        strerror(errno));  switch(endlinetype)
669      return 2;    {
670      }    default:      /* Just in case */
671      case EL_LF:
672      p--;
673      while (p > startptr && p[-1] != '\n') p--;
674      return p;
675    
676      case EL_CR:
677      p--;
678      while (p > startptr && p[-1] != '\n') p--;
679      return p;
680    
681    while ((nextfile = readdirectory(dir)) != NULL)    case EL_CRLF:
682      for (;;)
683      {      {
684      int frc;      p -= 2;
685      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      while (p > startptr && p[-1] != '\n') p--;
686      frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);      if (p <= startptr + 1 || p[-2] == '\r') return p;
     if (frc == 0 && rc == 1) rc = 0;  
687      }      }
688      return p;   /* But control should never get here */
689    
690    closedirectory(dir);    case EL_ANY:
691    return rc;    case EL_ANYCRLF:
692    }    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693      if (utf8) while ((*p & 0xc0) == 0x80) p--;
694    
695  /* If the file is not a directory, or we are not recursing, scan it. If this is    while (p > startptr)
696  the first and only argument at top level, we don't show the file name (unless      {
697  we are only showing the file name). Otherwise, control is via the      register int c;
698  show_filenames variable. */      char *pp = p - 1;
699    
700  in = fopen(filename, "r");      if (utf8)
701  if (in == NULL)        {
702    {        int extra = 0;
703    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));        while ((*pp & 0xc0) == 0x80) pp--;
704    return 2;        c = *((unsigned char *)pp);
705    }        if (c >= 0xc0)
706            {
707            int gcii, gcss;
708            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
709            gcss = 6*extra;
710            c = (c & utf8_table3[extra]) << gcss;
711            for (gcii = 1; gcii <= extra; gcii++)
712              {
713              gcss -= 6;
714              c |= (pp[gcii] & 0x3f) << gcss;
715              }
716            }
717          }
718        else c = *((unsigned char *)pp);
719    
720  rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?      if (endlinetype == EL_ANYCRLF) switch (c)
721    filename : NULL);        {
722  fclose(in);        case 0x0a:    /* LF */
723  return rc;        case 0x0d:    /* CR */
724  }        return p;
725    
726          default:
727          break;
728          }
729    
730        else switch (c)
731          {
732          case 0x0a:    /* LF */
733          case 0x0b:    /* VT */
734          case 0x0c:    /* FF */
735          case 0x0d:    /* CR */
736          case 0x85:    /* NEL */
737          case 0x2028:  /* LS */
738          case 0x2029:  /* PS */
739          return p;
740    
741          default:
742          break;
743          }
744    
745  /*************************************************      p = pp;  /* Back one character */
746  *                Usage function                  *      }        /* End of loop for ANY case */
 *************************************************/  
747    
748  static int    return startptr;  /* Hit start of data */
749  usage(int rc)    }     /* End of overall switch */
 {  
 fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");  
 fprintf(stderr, "Type `pcregrep --help' for more information.\n");  
 return rc;  
750  }  }
751    
752    
753    
754    
755    
756  /*************************************************  /*************************************************
757  *                Help function                   *  *       Print the previous "after" lines         *
758  *************************************************/  *************************************************/
759    
760  static void  /* This is called if we are about to lose said lines because of buffer filling,
761  help(void)  and at the end of the file. The data in the line is written using fwrite() so
762  {  that a binary zero does not terminate it.
763  option_item *op;  
764    Arguments:
765  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");    lastmatchnumber   the number of the last matching line, plus one
766  printf("Search for PATTERN in each FILE or standard input.\n");    lastmatchrestart  where we restarted after the last match
767  printf("PATTERN must be present if -f is not used.\n");    endptr            end of available data
768  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");    printname         filename for printing
769    
770  printf("Options:\n");  Returns:            nothing
771    */
772    
773  for (op = optionlist; op->one_char != 0; op++)  static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
774      char *endptr, char *printname)
775    {
776    if (after_context > 0 && lastmatchnumber > 0)
777    {    {
778    int n;    int count = 0;
779    char s[4];    while (lastmatchrestart < endptr && count++ < after_context)
780    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");      {
781    printf("  %s --%s%n", s, op->long_name, &n);      int ellength;
782    n = 30 - n;      char *pp = lastmatchrestart;
783    if (n < 1) n = 1;      if (printname != NULL) fprintf(stdout, "%s-", printname);
784    printf("%.*s%s\n", n, "                    ", op->help_text);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
785        pp = end_of_line(pp, endptr, &ellength);
786        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
787        lastmatchrestart = pp;
788        }
789      hyphenpending = TRUE;
790    }    }
   
 printf("\n  -f<filename>  or  --file=<filename>\n");  
 printf("    Read patterns from <filename> instead of using a command line option.\n");  
 printf("    Trailing white space is removed; blanks lines are ignored.\n");  
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
   
 printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  
 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  
791  }  }
792    
793    
794    
   
795  /*************************************************  /*************************************************
796  *                Handle an option                *  *            Grep an individual file             *
797  *************************************************/  *************************************************/
798    
799    /* This is called from grep_or_recurse() below. It uses a buffer that is three
800    times the value of MBUFTHIRD. The matching point is never allowed to stray into
801    the top third of the buffer, thus keeping more of the file available for
802    context printing or for multiline scanning. For large files, the pointer will
803    be in the middle third most of the time, so the bottom third is available for
804    "before" context printing.
805    
806    Arguments:
807      in           the fopened FILE stream
808      printname    the file name if it is to be printed for each match
809                   or NULL if the file name is not to be printed
810                   it cannot be NULL if filenames[_nomatch]_only is set
811    
812    Returns:       0 if there was at least one match
813                   1 otherwise (no matches)
814    */
815    
816  static int  static int
817  handle_option(int letter, int options)  pcregrep(FILE *in, char *printname)
818  {  {
819  switch(letter)  int rc = 1;
820    {  int linenumber = 1;
821    case -1:  help(); exit(0);  int lastmatchnumber = 0;
822    case 'c': count_only = TRUE; break;  int count = 0;
823    case 'h': filenames = FALSE; break;  int offsets[99];
824    case 'i': options |= PCRE_CASELESS; break;  char *lastmatchrestart = NULL;
825    case 'l': filenames_only = TRUE;  char buffer[3*MBUFTHIRD];
826    case 'n': number = TRUE; break;  char *ptr = buffer;
827    case 'r': recurse = TRUE; break;  char *endptr;
828    case 's': silent = TRUE; break;  size_t bufflength;
829    case 'u': options |= PCRE_UTF8; break;  BOOL endhyphenpending = FALSE;
830    case 'v': invert = TRUE; break;  
831    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;  /* Do the first read into the start of the buffer and set up the pointer to
832    end of what we have. */
833    case 'V':  
834    fprintf(stderr, "pcregrep version %s using ", VERSION);  bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
835    fprintf(stderr, "PCRE version %s\n", pcre_version());  endptr = buffer + bufflength;
836    exit(0);  
837    break;  /* Loop while the current pointer is not at the end of the file. For large
838    files, endptr will be at the end of the buffer when we are in the middle of the
839    file, but ptr will never get there, because as soon as it gets over 2/3 of the
840    way, the buffer is shifted left and re-filled. */
841    
842    default:  while (ptr < endptr)
843    fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);    {
844    exit(usage(2));    int i, endlinelength;
845    }    int mrc = 0;
846      BOOL match = FALSE;
847      char *matchptr = ptr;
848      char *t = ptr;
849      size_t length, linelength;
850    
851      /* At this point, ptr is at the start of a line. We need to find the length
852      of the subject string to pass to pcre_exec(). In multiline mode, it is the
853      length remainder of the data in the buffer. Otherwise, it is the length of
854      the next line. After matching, we always advance by the length of the next
855      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
856      that any match is constrained to be in the first line. */
857    
858      t = end_of_line(t, endptr, &endlinelength);
859      linelength = t - ptr - endlinelength;
860      length = multiline? (size_t)(endptr - ptr) : linelength;
861    
862  return options;    /* Extra processing for Jeffrey Friedl's debugging. */
 }  
863    
864    #ifdef JFRIEDL_DEBUG
865      if (jfriedl_XT || jfriedl_XR)
866      {
867          #include <sys/time.h>
868          #include <time.h>
869          struct timeval start_time, end_time;
870          struct timezone dummy;
871    
872          if (jfriedl_XT)
873          {
874              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
875              const char *orig = ptr;
876              ptr = malloc(newlen + 1);
877              if (!ptr) {
878                      printf("out of memory");
879                      exit(2);
880              }
881              endptr = ptr;
882              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
883              for (i = 0; i < jfriedl_XT; i++) {
884                      strncpy(endptr, orig,  length);
885                      endptr += length;
886              }
887              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
888              length = newlen;
889          }
890    
891          if (gettimeofday(&start_time, &dummy) != 0)
892                  perror("bad gettimeofday");
893    
 /*************************************************  
 *                Main program                    *  
 *************************************************/  
894    
895  int        for (i = 0; i < jfriedl_XR; i++)
896  main(int argc, char **argv)            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
 {  
 int i, j;  
 int rc = 1;  
 int options = 0;  
 int errptr;  
 const char *error;  
 BOOL only_one_at_top;  
897    
898  /* Process the options */        if (gettimeofday(&end_time, &dummy) != 0)
899                  perror("bad gettimeofday");
900    
901  for (i = 1; i < argc; i++)        double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
902    {                        -
903    if (argv[i][0] != '-') break;                        (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
904    
905    /* Missing options */        printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
906          return 0;
907      }
908    #endif
909    
910    if (argv[i][1] == 0) exit(usage(2));    /* We come back here after a match when the -o option (only_matching) is set,
911      in order to find any further matches in the same line. */
912    
913      ONLY_MATCHING_RESTART:
914    
915    /* Long name options */    /* Run through all the patterns until one matches. Note that we don't include
916      the final newline in the subject string. */
917    
918    if (argv[i][1] == '-')    for (i = 0; i < pattern_count; i++)
919      {      {
920      option_item *op;      mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
921          offsets, 99);
922      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (mrc >= 0) { match = TRUE; break; }
923        if (mrc != PCRE_ERROR_NOMATCH)
924        {        {
925        pattern_filename = argv[i] + 7;        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
926        continue;        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
927        }        fprintf(stderr, "this line:\n");
928          fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */
929      for (op = optionlist; op->one_char != 0; op++)        fprintf(stderr, "\n");
930          if (error_count == 0 &&
931              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
932            {
933            fprintf(stderr, "pcregrep: error %d means that a resource limit "
934              "was exceeded\n", mrc);
935            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
936            }
937          if (error_count++ > 20)
938            {
939            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
940            exit(2);
941            }
942          match = invert;    /* No more matching; don't show the line again */
943          break;
944          }
945        }
946    
947      /* If it's a match or a not-match (as required), do what's wanted. */
948    
949      if (match != invert)
950        {
951        BOOL hyphenprinted = FALSE;
952    
953        /* We've failed if we want a file that doesn't have any matches. */
954    
955        if (filenames == FN_NOMATCH_ONLY) return 1;
956    
957        /* Just count if just counting is wanted. */
958    
959        if (count_only) count++;
960    
961        /* If all we want is a file name, there is no need to scan any more lines
962        in the file. */
963    
964        else if (filenames == FN_ONLY)
965          {
966          fprintf(stdout, "%s\n", printname);
967          return 0;
968          }
969    
970        /* Likewise, if all we want is a yes/no answer. */
971    
972        else if (quiet) return 0;
973    
974        /* The --only-matching option prints just the substring that matched, and
975        does not print any context. Afterwards, adjust the start and length, and
976        then jump back to look for further matches in the same line. If we are in
977        invert mode, however, nothing is printed - this could be useful still
978        because the return code is set. */
979    
980        else if (only_matching)
981          {
982          if (!invert)
983            {
984            if (printname != NULL) fprintf(stdout, "%s:", printname);
985            if (number) fprintf(stdout, "%d:", linenumber);
986            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
987            fprintf(stdout, "\n");
988            matchptr += offsets[1];
989            length -= offsets[1];
990            match = FALSE;
991            goto ONLY_MATCHING_RESTART;
992            }
993          }
994    
995        /* This is the default case when none of the above options is set. We print
996        the matching lines(s), possibly preceded and/or followed by other lines of
997        context. */
998    
999        else
1000          {
1001          /* See if there is a requirement to print some "after" lines from a
1002          previous match. We never print any overlaps. */
1003    
1004          if (after_context > 0 && lastmatchnumber > 0)
1005            {
1006            int ellength;
1007            int linecount = 0;
1008            char *p = lastmatchrestart;
1009    
1010            while (p < ptr && linecount < after_context)
1011              {
1012              p = end_of_line(p, ptr, &ellength);
1013              linecount++;
1014              }
1015    
1016            /* It is important to advance lastmatchrestart during this printing so
1017            that it interacts correctly with any "before" printing below. Print
1018            each line's data using fwrite() in case there are binary zeroes. */
1019    
1020            while (lastmatchrestart < p)
1021              {
1022              char *pp = lastmatchrestart;
1023              if (printname != NULL) fprintf(stdout, "%s-", printname);
1024              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1025              pp = end_of_line(pp, endptr, &ellength);
1026              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1027              lastmatchrestart = pp;
1028              }
1029            if (lastmatchrestart != ptr) hyphenpending = TRUE;
1030            }
1031    
1032          /* If there were non-contiguous lines printed above, insert hyphens. */
1033    
1034          if (hyphenpending)
1035            {
1036            fprintf(stdout, "--\n");
1037            hyphenpending = FALSE;
1038            hyphenprinted = TRUE;
1039            }
1040    
1041          /* See if there is a requirement to print some "before" lines for this
1042          match. Again, don't print overlaps. */
1043    
1044          if (before_context > 0)
1045            {
1046            int linecount = 0;
1047            char *p = ptr;
1048    
1049            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1050                   linecount < before_context)
1051              {
1052              linecount++;
1053              p = previous_line(p, buffer);
1054              }
1055    
1056            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1057              fprintf(stdout, "--\n");
1058    
1059            while (p < ptr)
1060              {
1061              int ellength;
1062              char *pp = p;
1063              if (printname != NULL) fprintf(stdout, "%s-", printname);
1064              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1065              pp = end_of_line(pp, endptr, &ellength);
1066              fwrite(p, 1, pp - p, stdout);
1067              p = pp;
1068              }
1069            }
1070    
1071          /* Now print the matching line(s); ensure we set hyphenpending at the end
1072          of the file if any context lines are being output. */
1073    
1074          if (after_context > 0 || before_context > 0)
1075            endhyphenpending = TRUE;
1076    
1077          if (printname != NULL) fprintf(stdout, "%s:", printname);
1078          if (number) fprintf(stdout, "%d:", linenumber);
1079    
1080          /* In multiline mode, we want to print to the end of the line in which
1081          the end of the matched string is found, so we adjust linelength and the
1082          line number appropriately, but only when there actually was a match
1083          (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1084          the match will always be before the first newline sequence. */
1085    
1086          if (multiline)
1087            {
1088            int ellength;
1089            char *endmatch = ptr;
1090            if (!invert)
1091              {
1092              endmatch += offsets[1];
1093              t = ptr;
1094              while (t < endmatch)
1095                {
1096                t = end_of_line(t, endptr, &ellength);
1097                if (t <= endmatch) linenumber++; else break;
1098                }
1099              }
1100            endmatch = end_of_line(endmatch, endptr, &ellength);
1101            linelength = endmatch - ptr - ellength;
1102            }
1103    
1104          /*** NOTE: Use only fwrite() to output the data line, so that binary
1105          zeroes are treated as just another data character. */
1106    
1107          /* This extra option, for Jeffrey Friedl's debugging requirements,
1108          replaces the matched string, or a specific captured string if it exists,
1109          with X. When this happens, colouring is ignored. */
1110    
1111    #ifdef JFRIEDL_DEBUG
1112          if (S_arg >= 0 && S_arg < mrc)
1113            {
1114            int first = S_arg * 2;
1115            int last  = first + 1;
1116            fwrite(ptr, 1, offsets[first], stdout);
1117            fprintf(stdout, "X");
1118            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1119            }
1120          else
1121    #endif
1122    
1123          /* We have to split the line(s) up if colouring. */
1124    
1125          if (do_colour)
1126            {
1127            fwrite(ptr, 1, offsets[0], stdout);
1128            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1129            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1130            fprintf(stdout, "%c[00m", 0x1b);
1131            fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1132              stdout);
1133            }
1134          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1135          }
1136    
1137        /* End of doing what has to be done for a match */
1138    
1139        rc = 0;    /* Had some success */
1140    
1141        /* Remember where the last match happened for after_context. We remember
1142        where we are about to restart, and that line's number. */
1143    
1144        lastmatchrestart = ptr + linelength + endlinelength;
1145        lastmatchnumber = linenumber + 1;
1146        }
1147    
1148      /* For a match in multiline inverted mode (which of course did not cause
1149      anything to be printed), we have to move on to the end of the match before
1150      proceeding. */
1151    
1152      if (multiline && invert && match)
1153        {
1154        int ellength;
1155        char *endmatch = ptr + offsets[1];
1156        t = ptr;
1157        while (t < endmatch)
1158          {
1159          t = end_of_line(t, endptr, &ellength);
1160          if (t <= endmatch) linenumber++; else break;
1161          }
1162        endmatch = end_of_line(endmatch, endptr, &ellength);
1163        linelength = endmatch - ptr - ellength;
1164        }
1165    
1166      /* Advance to after the newline and increment the line number. */
1167    
1168      ptr += linelength + endlinelength;
1169      linenumber++;
1170    
1171      /* If we haven't yet reached the end of the file (the buffer is full), and
1172      the current point is in the top 1/3 of the buffer, slide the buffer down by
1173      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1174      about to be lost, print them. */
1175    
1176      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1177        {
1178        if (after_context > 0 &&
1179            lastmatchnumber > 0 &&
1180            lastmatchrestart < buffer + MBUFTHIRD)
1181          {
1182          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1183          lastmatchnumber = 0;
1184          }
1185    
1186        /* Now do the shuffle */
1187    
1188        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1189        ptr -= MBUFTHIRD;
1190        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1191        endptr = buffer + bufflength;
1192    
1193        /* Adjust any last match point */
1194    
1195        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1196        }
1197      }     /* Loop through the whole file */
1198    
1199    /* End of file; print final "after" lines if wanted; do_after_lines sets
1200    hyphenpending if it prints something. */
1201    
1202    if (!only_matching && !count_only)
1203      {
1204      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1205      hyphenpending |= endhyphenpending;
1206      }
1207    
1208    /* Print the file name if we are looking for those without matches and there
1209    were none. If we found a match, we won't have got this far. */
1210    
1211    if (filenames == FN_NOMATCH_ONLY)
1212      {
1213      fprintf(stdout, "%s\n", printname);
1214      return 0;
1215      }
1216    
1217    /* Print the match count if wanted */
1218    
1219    if (count_only)
1220      {
1221      if (printname != NULL) fprintf(stdout, "%s:", printname);
1222      fprintf(stdout, "%d\n", count);
1223      }
1224    
1225    return rc;
1226    }
1227    
1228    
1229    
1230    /*************************************************
1231    *     Grep a file or recurse into a directory    *
1232    *************************************************/
1233    
1234    /* Given a path name, if it's a directory, scan all the files if we are
1235    recursing; if it's a file, grep it.
1236    
1237    Arguments:
1238      pathname          the path to investigate
1239      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1240      only_one_at_top   TRUE if the path is the only one at toplevel
1241    
1242    Returns:   0 if there was at least one match
1243               1 if there were no matches
1244               2 there was some kind of error
1245    
1246    However, file opening failures are suppressed if "silent" is set.
1247    */
1248    
1249    static int
1250    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1251    {
1252    int rc = 1;
1253    int sep;
1254    FILE *in;
1255    
1256    /* If the file name is "-" we scan stdin */
1257    
1258    if (strcmp(pathname, "-") == 0)
1259      {
1260      return pcregrep(stdin,
1261        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1262          stdin_name : NULL);
1263      }
1264    
1265    
1266    /* If the file is a directory, skip if skipping or if we are recursing, scan
1267    each file within it, subject to any include or exclude patterns that were set.
1268    The scanning code is localized so it can be made system-specific. */
1269    
1270    if ((sep = isdirectory(pathname)) != 0)
1271      {
1272      if (dee_action == dee_SKIP) return 1;
1273      if (dee_action == dee_RECURSE)
1274        {
1275        char buffer[1024];
1276        char *nextfile;
1277        directory_type *dir = opendirectory(pathname);
1278    
1279        if (dir == NULL)
1280          {
1281          if (!silent)
1282            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1283              strerror(errno));
1284          return 2;
1285          }
1286    
1287        while ((nextfile = readdirectory(dir)) != NULL)
1288          {
1289          int frc, blen;
1290          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1291          blen = strlen(buffer);
1292    
1293          if (exclude_compiled != NULL &&
1294              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1295            continue;
1296    
1297          if (include_compiled != NULL &&
1298              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1299            continue;
1300    
1301          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1302          if (frc > 1) rc = frc;
1303           else if (frc == 0 && rc == 1) rc = 0;
1304          }
1305    
1306        closedirectory(dir);
1307        return rc;
1308        }
1309      }
1310    
1311    /* If the file is not a directory and not a regular file, skip it if that's
1312    been requested. */
1313    
1314    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1315    
1316    /* Control reaches here if we have a regular file, or if we have a directory
1317    and recursion or skipping was not requested, or if we have anything else and
1318    skipping was not requested. The scan proceeds. If this is the first and only
1319    argument at top level, we don't show the file name, unless we are only showing
1320    the file name, or the filename was forced (-H). */
1321    
1322    in = fopen(pathname, "r");
1323    if (in == NULL)
1324      {
1325      if (!silent)
1326        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1327          strerror(errno));
1328      return 2;
1329      }
1330    
1331    rc = pcregrep(in, (filenames > FN_DEFAULT ||
1332      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1333    
1334    fclose(in);
1335    return rc;
1336    }
1337    
1338    
1339    
1340    
1341    /*************************************************
1342    *                Usage function                  *
1343    *************************************************/
1344    
1345    static int
1346    usage(int rc)
1347    {
1348    option_item *op;
1349    fprintf(stderr, "Usage: pcregrep [-");
1350    for (op = optionlist; op->one_char != 0; op++)
1351      {
1352      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1353      }
1354    fprintf(stderr, "] [long options] [pattern] [files]\n");
1355    fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1356    return rc;
1357    }
1358    
1359    
1360    
1361    
1362    /*************************************************
1363    *                Help function                   *
1364    *************************************************/
1365    
1366    static void
1367    help(void)
1368    {
1369    option_item *op;
1370    
1371    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1372    printf("Search for PATTERN in each FILE or standard input.\n");
1373    printf("PATTERN must be present if neither -e nor -f is used.\n");
1374    printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1375    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1376    
1377    printf("Options:\n");
1378    
1379    for (op = optionlist; op->one_char != 0; op++)
1380      {
1381      int n;
1382      char s[4];
1383      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1384      printf("  %s --%s%n", s, op->long_name, &n);
1385      n = 30 - n;
1386      if (n < 1) n = 1;
1387      printf("%.*s%s\n", n, "                    ", op->help_text);
1388      }
1389    
1390    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1391    printf("trailing white space is removed and blank lines are ignored.\n");
1392    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1393    
1394    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1395    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1396    }
1397    
1398    
1399    
1400    
1401    /*************************************************
1402    *    Handle a single-letter, no data option      *
1403    *************************************************/
1404    
1405    static int
1406    handle_option(int letter, int options)
1407    {
1408    switch(letter)
1409      {
1410      case N_HELP: help(); exit(0);
1411      case 'c': count_only = TRUE; break;
1412      case 'F': process_options |= PO_FIXED_STRINGS; break;
1413      case 'H': filenames = FN_FORCE; break;
1414      case 'h': filenames = FN_NONE; break;
1415      case 'i': options |= PCRE_CASELESS; break;
1416      case 'l': filenames = FN_ONLY; break;
1417      case 'L': filenames = FN_NOMATCH_ONLY; break;
1418      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1419      case 'n': number = TRUE; break;
1420      case 'o': only_matching = TRUE; break;
1421      case 'q': quiet = TRUE; break;
1422      case 'r': dee_action = dee_RECURSE; break;
1423      case 's': silent = TRUE; break;
1424      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1425      case 'v': invert = TRUE; break;
1426      case 'w': process_options |= PO_WORD_MATCH; break;
1427      case 'x': process_options |= PO_LINE_MATCH; break;
1428    
1429      case 'V':
1430      fprintf(stderr, "pcregrep version %s\n", pcre_version());
1431      exit(0);
1432      break;
1433    
1434      default:
1435      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1436      exit(usage(2));
1437      }
1438    
1439    return options;
1440    }
1441    
1442    
1443    
1444    
1445    /*************************************************
1446    *          Construct printed ordinal             *
1447    *************************************************/
1448    
1449    /* This turns a number into "1st", "3rd", etc. */
1450    
1451    static char *
1452    ordin(int n)
1453    {
1454    static char buffer[8];
1455    char *p = buffer;
1456    sprintf(p, "%d", n);
1457    while (*p != 0) p++;
1458    switch (n%10)
1459      {
1460      case 1: strcpy(p, "st"); break;
1461      case 2: strcpy(p, "nd"); break;
1462      case 3: strcpy(p, "rd"); break;
1463      default: strcpy(p, "th"); break;
1464      }
1465    return buffer;
1466    }
1467    
1468    
1469    
1470    /*************************************************
1471    *          Compile a single pattern              *
1472    *************************************************/
1473    
1474    /* When the -F option has been used, this is called for each substring.
1475    Otherwise it's called for each supplied pattern.
1476    
1477    Arguments:
1478      pattern        the pattern string
1479      options        the PCRE options
1480      filename       the file name, or NULL for a command-line pattern
1481      count          0 if this is the only command line pattern, or
1482                     number of the command line pattern, or
1483                     linenumber for a pattern from a file
1484    
1485    Returns:         TRUE on success, FALSE after an error
1486    */
1487    
1488    static BOOL
1489    compile_single_pattern(char *pattern, int options, char *filename, int count)
1490    {
1491    char buffer[MBUFTHIRD + 16];
1492    const char *error;
1493    int errptr;
1494    
1495    if (pattern_count >= MAX_PATTERN_COUNT)
1496      {
1497      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1498        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1499      return FALSE;
1500      }
1501    
1502    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1503      suffix[process_options]);
1504    pattern_list[pattern_count] =
1505      pcre_compile(buffer, options, &error, &errptr, pcretables);
1506    if (pattern_list[pattern_count] != NULL)
1507      {
1508      pattern_count++;
1509      return TRUE;
1510      }
1511    
1512    /* Handle compile errors */
1513    
1514    errptr -= (int)strlen(prefix[process_options]);
1515    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1516    
1517    if (filename == NULL)
1518      {
1519      if (count == 0)
1520        fprintf(stderr, "pcregrep: Error in command-line regex "
1521          "at offset %d: %s\n", errptr, error);
1522      else
1523        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1524          "at offset %d: %s\n", ordin(count), errptr, error);
1525      }
1526    else
1527      {
1528      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1529        "at offset %d: %s\n", count, filename, errptr, error);
1530      }
1531    
1532    return FALSE;
1533    }
1534    
1535    
1536    
1537    /*************************************************
1538    *           Compile one supplied pattern         *
1539    *************************************************/
1540    
1541    /* When the -F option has been used, each string may be a list of strings,
1542    separated by line breaks. They will be matched literally.
1543    
1544    Arguments:
1545      pattern        the pattern string
1546      options        the PCRE options
1547      filename       the file name, or NULL for a command-line pattern
1548      count          0 if this is the only command line pattern, or
1549                     number of the command line pattern, or
1550                     linenumber for a pattern from a file
1551    
1552    Returns:         TRUE on success, FALSE after an error
1553    */
1554    
1555    static BOOL
1556    compile_pattern(char *pattern, int options, char *filename, int count)
1557    {
1558    if ((process_options & PO_FIXED_STRINGS) != 0)
1559      {
1560      char *eop = pattern + strlen(pattern);
1561      char buffer[MBUFTHIRD];
1562      for(;;)
1563        {
1564        int ellength;
1565        char *p = end_of_line(pattern, eop, &ellength);
1566        if (ellength == 0)
1567          return compile_single_pattern(pattern, options, filename, count);
1568        sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1569        pattern = p;
1570        if (!compile_single_pattern(buffer, options, filename, count))
1571          return FALSE;
1572        }
1573      }
1574    else return compile_single_pattern(pattern, options, filename, count);
1575    }
1576    
1577    
1578    
1579    /*************************************************
1580    *                Main program                    *
1581    *************************************************/
1582    
1583    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1584    
1585    int
1586    main(int argc, char **argv)
1587    {
1588    int i, j;
1589    int rc = 1;
1590    int pcre_options = 0;
1591    int cmd_pattern_count = 0;
1592    int hint_count = 0;
1593    int errptr;
1594    BOOL only_one_at_top;
1595    char *patterns[MAX_PATTERN_COUNT];
1596    const char *locale_from = "--locale";
1597    const char *error;
1598    
1599    /* Set the default line ending value from the default in the PCRE library;
1600    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1601    */
1602    
1603    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1604    switch(i)
1605      {
1606      default:                 newline = (char *)"lf"; break;
1607      case '\r':               newline = (char *)"cr"; break;
1608      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1609      case -1:                 newline = (char *)"any"; break;
1610      case -2:                 newline = (char *)"anycrlf"; break;
1611      }
1612    
1613    /* Process the options */
1614    
1615    for (i = 1; i < argc; i++)
1616      {
1617      option_item *op = NULL;
1618      char *option_data = (char *)"";    /* default to keep compiler happy */
1619      BOOL longop;
1620      BOOL longopwasequals = FALSE;
1621    
1622      if (argv[i][0] != '-') break;
1623    
1624      /* If we hit an argument that is just "-", it may be a reference to STDIN,
1625      but only if we have previously had -e or -f to define the patterns. */
1626    
1627      if (argv[i][1] == 0)
1628        {
1629        if (pattern_filename != NULL || pattern_count > 0) break;
1630          else exit(usage(2));
1631        }
1632    
1633      /* Handle a long name option, or -- to terminate the options */
1634    
1635      if (argv[i][1] == '-')
1636        {
1637        char *arg = argv[i] + 2;
1638        char *argequals = strchr(arg, '=');
1639    
1640        if (*arg == 0)    /* -- terminates options */
1641          {
1642          i++;
1643          break;                /* out of the options-handling loop */
1644          }
1645    
1646        longop = TRUE;
1647    
1648        /* Some long options have data that follows after =, for example file=name.
1649        Some options have variations in the long name spelling: specifically, we
1650        allow "regexp" because GNU grep allows it, though I personally go along
1651        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1652        These options are entered in the table as "regex(p)". No option is in both
1653        these categories, fortunately. */
1654    
1655        for (op = optionlist; op->one_char != 0; op++)
1656        {        {
1657        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
1658          char *equals = strchr(op->long_name, '=');
1659          if (opbra == NULL)     /* Not a (p) case */
1660          {          {
1661          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
1662          break;            {
1663              if (strcmp(arg, op->long_name) == 0) break;
1664              }
1665            else                 /* Special case xxx=data */
1666              {
1667              int oplen = equals - op->long_name;
1668              int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1669              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1670                {
1671                option_data = arg + arglen;
1672                if (*option_data == '=')
1673                  {
1674                  option_data++;
1675                  longopwasequals = TRUE;
1676                  }
1677                break;
1678                }
1679              }
1680            }
1681          else                   /* Special case xxxx(p) */
1682            {
1683            char buff1[24];
1684            char buff2[24];
1685            int baselen = opbra - op->long_name;
1686            sprintf(buff1, "%.*s", baselen, op->long_name);
1687            sprintf(buff2, "%s%.*s", buff1,
1688              (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1689            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1690              break;
1691          }          }
1692        }        }
1693    
1694      if (op->one_char == 0)      if (op->one_char == 0)
1695        {        {
1696        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 519  for (i = 1; i < argc; i++) Line 1698  for (i = 1; i < argc; i++)
1698        }        }
1699      }      }
1700    
1701    /* One-char options */  
1702      /* Jeffrey Friedl's debugging harness uses these additional options which
1703      are not in the right form for putting in the option table because they use
1704      only one hyphen, yet are more than one character long. By putting them
1705      separately here, they will not get displayed as part of the help() output,
1706      but I don't think Jeffrey will care about that. */
1707    
1708    #ifdef JFRIEDL_DEBUG
1709      else if (strcmp(argv[i], "-pre") == 0) {
1710              jfriedl_prefix = argv[++i];
1711              continue;
1712      } else if (strcmp(argv[i], "-post") == 0) {
1713              jfriedl_postfix = argv[++i];
1714              continue;
1715      } else if (strcmp(argv[i], "-XT") == 0) {
1716              sscanf(argv[++i], "%d", &jfriedl_XT);
1717              continue;
1718      } else if (strcmp(argv[i], "-XR") == 0) {
1719              sscanf(argv[++i], "%d", &jfriedl_XR);
1720              continue;
1721      }
1722    #endif
1723    
1724    
1725      /* One-char options; many that have no data may be in a single argument; we
1726      continue till we hit the last one or one that needs data. */
1727    
1728    else    else
1729      {      {
1730      char *s = argv[i] + 1;      char *s = argv[i] + 1;
1731        longop = FALSE;
1732      while (*s != 0)      while (*s != 0)
1733        {        {
1734        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
1735            { if (*s == op->one_char) break; }
1736          if (op->one_char == 0)
1737          {          {
1738          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1739          if (pattern_filename[0] == 0)            *s, argv[i]);
1740            {          exit(usage(2));
1741            if (i >= argc - 1)          }
1742              {        if (op->type != OP_NODATA || s[1] == 0)
1743              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
1744              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
1745          break;          break;
1746          }          }
1747        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1748        }        }
1749      }      }
   }  
1750    
1751  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
1752  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
1753      something in the PCRE options. */
1754    
1755  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
1756    {      {
1757    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
1758    return 2;      continue;
1759    }      }
1760    
1761  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1762      either has a value or defaults to something. It cannot have data in a
1763      separate item. At the moment, the only such options are "colo(u)r" and
1764      Jeffrey Friedl's special -S debugging option. */
1765    
1766  if (pattern_filename != NULL)    if (*option_data == 0 &&
1767    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
1768      {      {
1769      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
1770        strerror(errno));        {
1771      return 2;        case N_COLOUR:
1772          colour_option = (char *)"auto";
1773          break;
1774    #ifdef JFRIEDL_DEBUG
1775          case 'S':
1776          S_arg = 0;
1777          break;
1778    #endif
1779          }
1780        continue;
1781      }      }
1782    while (fgets(buffer, sizeof(buffer), f) != NULL)  
1783      /* Otherwise, find the data string for the option. */
1784    
1785      if (*option_data == 0)
1786      {      {
1787      char *s = buffer + (int)strlen(buffer);      if (i >= argc - 1 || longopwasequals)
     if (pattern_count >= MAX_PATTERN_COUNT)  
1788        {        {
1789        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1790          exit(usage(2));
1791          }
1792        option_data = argv[++i];
1793        }
1794    
1795      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1796      multiple times to create a list of patterns. */
1797    
1798      if (op->type == OP_PATLIST)
1799        {
1800        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1801          {
1802          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1803          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
1804        return 2;        return 2;
1805        }        }
1806      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
1807      if (s == buffer) continue;      }
1808      *s = 0;  
1809      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
1810        &errptr, NULL);  
1811      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1812        {
1813        *((char **)op->dataptr) = option_data;
1814        }
1815      else
1816        {
1817        char *endptr;
1818        int n = strtoul(option_data, &endptr, 10);
1819        if (*endptr != 0)
1820        {        {
1821        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
1822          pattern_count, errptr, error);          {
1823        return 2;          char *equals = strchr(op->long_name, '=');
1824            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1825              equals - op->long_name;
1826            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1827              option_data, nlen, op->long_name);
1828            }
1829          else
1830            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1831              option_data, op->one_char);
1832          exit(usage(2));
1833        }        }
1834        *((int *)op->dataptr) = n;
1835        }
1836      }
1837    
1838    /* Options have been decoded. If -C was used, its value is used as a default
1839    for -A and -B. */
1840    
1841    if (both_context > 0)
1842      {
1843      if (after_context == 0) after_context = both_context;
1844      if (before_context == 0) before_context = both_context;
1845      }
1846    
1847    /* If a locale has not been provided as an option, see if the LC_CTYPE or
1848    LC_ALL environment variable is set, and if so, use it. */
1849    
1850    if (locale == NULL)
1851      {
1852      locale = getenv("LC_ALL");
1853      locale_from = "LCC_ALL";
1854      }
1855    
1856    if (locale == NULL)
1857      {
1858      locale = getenv("LC_CTYPE");
1859      locale_from = "LC_CTYPE";
1860      }
1861    
1862    /* If a locale has been provided, set it, and generate the tables the PCRE
1863    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1864    
1865    if (locale != NULL)
1866      {
1867      if (setlocale(LC_CTYPE, locale) == NULL)
1868        {
1869        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1870          locale, locale_from);
1871        return 2;
1872        }
1873      pcretables = pcre_maketables();
1874      }
1875    
1876    /* Sort out colouring */
1877    
1878    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1879      {
1880      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1881      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1882      else
1883        {
1884        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1885          colour_option);
1886        return 2;
1887        }
1888      if (do_colour)
1889        {
1890        char *cs = getenv("PCREGREP_COLOUR");
1891        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1892        if (cs != NULL) colour_string = cs;
1893      }      }
   fclose(f);  
1894    }    }
1895    
1896  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
1897    
1898    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1899      {
1900      pcre_options |= PCRE_NEWLINE_CR;
1901      endlinetype = EL_CR;
1902      }
1903    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1904      {
1905      pcre_options |= PCRE_NEWLINE_LF;
1906      endlinetype = EL_LF;
1907      }
1908    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1909      {
1910      pcre_options |= PCRE_NEWLINE_CRLF;
1911      endlinetype = EL_CRLF;
1912      }
1913    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1914      {
1915      pcre_options |= PCRE_NEWLINE_ANY;
1916      endlinetype = EL_ANY;
1917      }
1918    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1919      {
1920      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1921      endlinetype = EL_ANYCRLF;
1922      }
1923  else  else
1924    {    {
1925    if (i >= argc) return usage(2);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1926    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
1927    if (pattern_list[0] == NULL)    }
1928    
1929    /* Interpret the text values for -d and -D */
1930    
1931    if (dee_option != NULL)
1932      {
1933      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1934      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1935      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1936      else
1937      {      {
1938      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
       error);  
1939      return 2;      return 2;
1940      }      }
   pattern_count++;  
1941    }    }
1942    
1943  /* Study the regular expressions, as we will be running them may times */  if (DEE_option != NULL)
1944      {
1945      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1946      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1947      else
1948        {
1949        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1950        return 2;
1951        }
1952      }
1953    
1954    /* Check the values for Jeffrey Friedl's debugging options. */
1955    
1956    #ifdef JFRIEDL_DEBUG
1957    if (S_arg > 9)
1958      {
1959      fprintf(stderr, "pcregrep: bad value for -S option\n");
1960      return 2;
1961      }
1962    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1963      {
1964      if (jfriedl_XT == 0) jfriedl_XT = 1;
1965      if (jfriedl_XR == 0) jfriedl_XR = 1;
1966      }
1967    #endif
1968    
1969    /* Get memory to store the pattern and hints lists. */
1970    
1971    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1972    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1973    
1974    if (pattern_list == NULL || hints_list == NULL)
1975      {
1976      fprintf(stderr, "pcregrep: malloc failed\n");
1977      goto EXIT2;
1978      }
1979    
1980    /* If no patterns were provided by -e, and there is no file provided by -f,
1981    the first argument is the one and only pattern, and it must exist. */
1982    
1983    if (cmd_pattern_count == 0 && pattern_filename == NULL)
1984      {
1985      if (i >= argc) return usage(2);
1986      patterns[cmd_pattern_count++] = argv[i++];
1987      }
1988    
1989    /* Compile the patterns that were provided on the command line, either by
1990    multiple uses of -e or as a single unkeyed pattern. */
1991    
1992    for (j = 0; j < cmd_pattern_count; j++)
1993      {
1994      if (!compile_pattern(patterns[j], pcre_options, NULL,
1995           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1996        goto EXIT2;
1997      }
1998    
1999    /* Compile the regular expressions that are provided in a file. */
2000    
2001    if (pattern_filename != NULL)
2002      {
2003      int linenumber = 0;
2004      FILE *f;
2005      char *filename;
2006      char buffer[MBUFTHIRD];
2007    
2008      if (strcmp(pattern_filename, "-") == 0)
2009        {
2010        f = stdin;
2011        filename = stdin_name;
2012        }
2013      else
2014        {
2015        f = fopen(pattern_filename, "r");
2016        if (f == NULL)
2017          {
2018          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2019            strerror(errno));
2020          goto EXIT2;
2021          }
2022        filename = pattern_filename;
2023        }
2024    
2025      while (fgets(buffer, MBUFTHIRD, f) != NULL)
2026        {
2027        char *s = buffer + (int)strlen(buffer);
2028        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2029        *s = 0;
2030        linenumber++;
2031        if (buffer[0] == 0) continue;   /* Skip blank lines */
2032        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2033          goto EXIT2;
2034        }
2035    
2036      if (f != stdin) fclose(f);
2037      }
2038    
2039    /* Study the regular expressions, as we will be running them many times */
2040    
2041  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
2042    {    {
# Line 615  for (j = 0; j < pattern_count; j++) Line 2046  for (j = 0; j < pattern_count; j++)
2046      char s[16];      char s[16];
2047      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2048      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2049      return 2;      goto EXIT2;
2050        }
2051      hint_count++;
2052      }
2053    
2054    /* If there are include or exclude patterns, compile them. */
2055    
2056    if (exclude_pattern != NULL)
2057      {
2058      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2059        pcretables);
2060      if (exclude_compiled == NULL)
2061        {
2062        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2063          errptr, error);
2064        goto EXIT2;
2065        }
2066      }
2067    
2068    if (include_pattern != NULL)
2069      {
2070      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2071        pcretables);
2072      if (include_compiled == NULL)
2073        {
2074        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2075          errptr, error);
2076        goto EXIT2;
2077      }      }
2078    }    }
2079    
2080  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
2081    
2082  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
2083      {
2084      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2085      goto EXIT;
2086      }
2087    
2088  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
2089  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
2090  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
2091    otherwise forced. */
2092    
2093  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
2094    
2095  for (; i < argc; i++)  for (; i < argc; i++)
2096    {    {
2097    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2098    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
2099      if (frc > 1) rc = frc;
2100        else if (frc == 0 && rc == 1) rc = 0;
2101    }    }
2102    
2103    EXIT:
2104    if (pattern_list != NULL)
2105      {
2106      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2107      free(pattern_list);
2108      }
2109    if (hints_list != NULL)
2110      {
2111      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2112      free(hints_list);
2113      }
2114  return rc;  return rc;
2115    
2116    EXIT2:
2117    rc = 2;
2118    goto EXIT;
2119  }  }
2120    
2121  /* End */  /* End of pcregrep */

Legend:
Removed from v.63  
changed lines
  Added in v.279

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12