/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 141 by ph10, Fri Mar 30 15:46:27 2007 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2007 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #  include <config.h>
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51  #include "pcre.h"  #include <sys/types.h>
52    #include <sys/stat.h>
53    #ifdef HAVE_UNISTD_H
54    #  include <unistd.h>
55    #endif
56    
57    #include <pcre.h>
58    
59  #define FALSE 0  #define FALSE 0
60  #define TRUE 1  #define TRUE 1
61    
62  typedef int BOOL;  typedef int BOOL;
63    
 #define VERSION "2.0 01-Aug-2001"  
64  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
65    
66    #if BUFSIZ > 8192
67    #define MBUFTHIRD BUFSIZ
68    #else
69    #define MBUFTHIRD 8192
70    #endif
71    
72    /* Values for the "filenames" variable, which specifies options for file name
73    output. The order is important; it is assumed that a file name is wanted for
74    all values greater than FN_DEFAULT. */
75    
76    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
77    
78    /* Actions for the -d and -D options */
79    
80    enum { dee_READ, dee_SKIP, dee_RECURSE };
81    enum { DEE_READ, DEE_SKIP };
82    
83    /* Actions for special processing options (flag bits) */
84    
85    #define PO_WORD_MATCH     0x0001
86    #define PO_LINE_MATCH     0x0002
87    #define PO_FIXED_STRINGS  0x0004
88    
89    /* Line ending types */
90    
91    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };
92    
93    
94    
95  /*************************************************  /*************************************************
96  *               Global variables                 *  *               Global variables                 *
97  *************************************************/  *************************************************/
98    
99    /* Jeffrey Friedl has some debugging requirements that are not part of the
100    regular code. */
101    
102    #ifdef JFRIEDL_DEBUG
103    static int S_arg = -1;
104    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
105    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
106    static const char *jfriedl_prefix = "";
107    static const char *jfriedl_postfix = "";
108    #endif
109    
110    static int  endlinetype;
111    
112    static char *colour_string = (char *)"1;31";
113    static char *colour_option = NULL;
114    static char *dee_option = NULL;
115    static char *DEE_option = NULL;
116    static char *newline = NULL;
117  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
118    static char *stdin_name = (char *)"(standard input)";
119    static char *locale = NULL;
120    
121    static const unsigned char *pcretables = NULL;
122    
123  static int  pattern_count = 0;  static int  pattern_count = 0;
124  static pcre **pattern_list;  static pcre **pattern_list = NULL;
125  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
126    
127    static char *include_pattern = NULL;
128    static char *exclude_pattern = NULL;
129    
130    static pcre *include_compiled = NULL;
131    static pcre *exclude_compiled = NULL;
132    
133    static int after_context = 0;
134    static int before_context = 0;
135    static int both_context = 0;
136    static int dee_action = dee_READ;
137    static int DEE_action = DEE_READ;
138    static int error_count = 0;
139    static int filenames = FN_DEFAULT;
140    static int process_options = 0;
141    
142  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
143  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
144  static BOOL filenames_only = FALSE;  static BOOL hyphenpending = FALSE;
145  static BOOL invert = FALSE;  static BOOL invert = FALSE;
146    static BOOL multiline = FALSE;
147  static BOOL number = FALSE;  static BOOL number = FALSE;
148  static BOOL recurse = FALSE;  static BOOL only_matching = FALSE;
149    static BOOL quiet = FALSE;
150  static BOOL silent = FALSE;  static BOOL silent = FALSE;
151  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
152    
153  /* Structure for options and list of them */  /* Structure for options and list of them */
154    
155    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
156           OP_PATLIST };
157    
158  typedef struct option_item {  typedef struct option_item {
159      int type;
160    int one_char;    int one_char;
161    char *long_name;    void *dataptr;
162    char *help_text;    const char *long_name;
163      const char *help_text;
164  } option_item;  } option_item;
165    
166    /* Options without a single-letter equivalent get a negative value. This can be
167    used to identify them. */
168    
169    #define N_COLOUR    (-1)
170    #define N_EXCLUDE   (-2)
171    #define N_HELP      (-3)
172    #define N_INCLUDE   (-4)
173    #define N_LABEL     (-5)
174    #define N_LOCALE    (-6)
175    #define N_NULL      (-7)
176    
177  static option_item optionlist[] = {  static option_item optionlist[] = {
178    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
179    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
180    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
181    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
182    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
183    { 'n', "line-number",  "print line number with output lines" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
184    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
185    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
186    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
187    { 'v', "invert-match", "select non-matching lines" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
188    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
189    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
190    { 0,    NULL,           NULL }    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
191      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
192      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
193      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
194      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
195      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
196      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
197      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
198      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
199      { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },
200      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
201      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
202      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
203      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
204      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
205      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
206    #ifdef JFRIEDL_DEBUG
207      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
208    #endif
209      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
210      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
211      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
212      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
213      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
214      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
215      { OP_NODATA,    0,        NULL,               NULL,            NULL }
216  };  };
217    
218    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
219    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
220    that the combination of -w and -x has the same effect as -x on its own, so we
221    can treat them as the same. */
222    
223    static const char *prefix[] = {
224      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
225    
226    static const char *suffix[] = {
227      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
228    
229    /* UTF-8 tables - used only when the newline setting is "all". */
230    
231    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
232    
233    const char utf8_table4[] = {
234      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
235      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
237      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
238    
239    
240    
241  /*************************************************  /*************************************************
242  *       Functions for directory scanning         *  *            OS-specific functions               *
243  *************************************************/  *************************************************/
244    
245  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
246  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no support". */
 support". */  
247    
248    
249  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
250    
251  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
252  #include <sys/types.h>  #include <sys/types.h>
253  #include <sys/stat.h>  #include <sys/stat.h>
254  #include <dirent.h>  #include <dirent.h>
255    
256  typedef DIR directory_type;  typedef DIR directory_type;
257    
258  int  static int
259  isdirectory(char *filename)  isdirectory(char *filename)
260  {  {
261  struct stat statbuf;  struct stat statbuf;
# Line 92  if (stat(filename, &statbuf) < 0) Line 264  if (stat(filename, &statbuf) < 0)
264  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
265  }  }
266    
267  directory_type *  static directory_type *
268  opendirectory(char *filename)  opendirectory(char *filename)
269  {  {
270  return opendir(filename);  return opendir(filename);
271  }  }
272    
273  char *  static char *
274  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
275  {  {
276  for (;;)  for (;;)
# Line 111  for (;;) Line 283  for (;;)
283  return NULL;   /* Keep compiler happy; never executed */  return NULL;   /* Keep compiler happy; never executed */
284  }  }
285    
286  void  static void
287  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
288  {  {
289  closedir(dir);  closedir(dir);
290  }  }
291    
292    
293  #else  /************* Test for regular file in Unix **********/
294    
295    static int
296    isregfile(char *filename)
297    {
298    struct stat statbuf;
299    if (stat(filename, &statbuf) < 0)
300      return 1;        /* In the expectation that opening as a file will fail */
301    return (statbuf.st_mode & S_IFMT) == S_IFREG;
302    }
303    
304    
305    /************* Test stdout for being a terminal in Unix **********/
306    
307    static BOOL
308    is_stdout_tty(void)
309    {
310    return isatty(fileno(stdout));
311    }
312    
313    
314    /************* Directory scanning in Win32 ***********/
315    
316    /* I (Philip Hazel) have no means of testing this code. It was contributed by
317    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
318    when it did not exist. */
319    
320    
321    #elif HAVE_WINDOWS_H
322    
323    #ifndef STRICT
324    # define STRICT
325    #endif
326    #ifndef WIN32_LEAN_AND_MEAN
327    # define WIN32_LEAN_AND_MEAN
328    #endif
329    #ifndef INVALID_FILE_ATTRIBUTES
330    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
331    #endif
332    
333    #include <windows.h>
334    
335    typedef struct directory_type
336    {
337    HANDLE handle;
338    BOOL first;
339    WIN32_FIND_DATA data;
340    } directory_type;
341    
342    int
343    isdirectory(char *filename)
344    {
345    DWORD attr = GetFileAttributes(filename);
346    if (attr == INVALID_FILE_ATTRIBUTES)
347      return 0;
348    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
349    }
350    
351    directory_type *
352    opendirectory(char *filename)
353    {
354    size_t len;
355    char *pattern;
356    directory_type *dir;
357    DWORD err;
358    len = strlen(filename);
359    pattern = (char *) malloc(len + 3);
360    dir = (directory_type *) malloc(sizeof(*dir));
361    if ((pattern == NULL) || (dir == NULL))
362      {
363      fprintf(stderr, "pcregrep: malloc failed\n");
364      exit(2);
365      }
366    memcpy(pattern, filename, len);
367    memcpy(&(pattern[len]), "\\*", 3);
368    dir->handle = FindFirstFile(pattern, &(dir->data));
369    if (dir->handle != INVALID_HANDLE_VALUE)
370      {
371      free(pattern);
372      dir->first = TRUE;
373      return dir;
374      }
375    err = GetLastError();
376    free(pattern);
377    free(dir);
378    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
379    return NULL;
380    }
381    
382    char *
383    readdirectory(directory_type *dir)
384    {
385    for (;;)
386      {
387      if (!dir->first)
388        {
389        if (!FindNextFile(dir->handle, &(dir->data)))
390          return NULL;
391        }
392      else
393        {
394        dir->first = FALSE;
395        }
396      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
397        return dir->data.cFileName;
398      }
399    #ifndef _MSC_VER
400    return NULL;   /* Keep compiler happy; never executed */
401    #endif
402    }
403    
404    void
405    closedirectory(directory_type *dir)
406    {
407    FindClose(dir->handle);
408    free(dir);
409    }
410    
411    
412    /************* Test for regular file in Win32 **********/
413    
414    /* I don't know how to do this, or if it can be done; assume all paths are
415    regular if they are not directories. */
416    
417    int isregfile(char *filename)
418    {
419    return !isdirectory(filename)
420    }
421    
422    
423    /************* Test stdout for being a terminal in Win32 **********/
424    
425    /* I don't know how to do this; assume never */
426    
427    static BOOL
428    is_stdout_tty(void)
429    {
430    FALSE;
431    }
432    
433    
434  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
435    
436  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
437    
438    #else
439    
440  typedef void directory_type;  typedef void directory_type;
441    
442  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
443  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
444  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
445  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
446    
447    
448    /************* Test for regular when we can't do it **********/
449    
450    /* Assume all files are regular. */
451    
452    int isregfile(char *filename) { return 1; }
453    
454    
455    /************* Test stdout for being a terminal when we can't do it **********/
456    
457    static BOOL
458    is_stdout_tty(void)
459    {
460    return FALSE;
461    }
462    
463    
464  #endif  #endif
465    
466    
467    
468  #if ! HAVE_STRERROR  #ifndef HAVE_STRERROR
469  /*************************************************  /*************************************************
470  *     Provide strerror() for non-ANSI libraries  *  *     Provide strerror() for non-ANSI libraries  *
471  *************************************************/  *************************************************/
# Line 159  return sys_errlist[n]; Line 488  return sys_errlist[n];
488    
489    
490  /*************************************************  /*************************************************
491  *              Grep an individual file           *  *             Find end of line                   *
492    *************************************************/
493    
494    /* The length of the endline sequence that is found is set via lenptr. This may
495    be zero at the very end of the file if there is no line-ending sequence there.
496    
497    Arguments:
498      p         current position in line
499      endptr    end of available data
500      lenptr    where to put the length of the eol sequence
501    
502    Returns:    pointer to the last byte of the line
503    */
504    
505    static char *
506    end_of_line(char *p, char *endptr, int *lenptr)
507    {
508    switch(endlinetype)
509      {
510      default:      /* Just in case */
511      case EL_LF:
512      while (p < endptr && *p != '\n') p++;
513      if (p < endptr)
514        {
515        *lenptr = 1;
516        return p + 1;
517        }
518      *lenptr = 0;
519      return endptr;
520    
521      case EL_CR:
522      while (p < endptr && *p != '\r') p++;
523      if (p < endptr)
524        {
525        *lenptr = 1;
526        return p + 1;
527        }
528      *lenptr = 0;
529      return endptr;
530    
531      case EL_CRLF:
532      for (;;)
533        {
534        while (p < endptr && *p != '\r') p++;
535        if (++p >= endptr)
536          {
537          *lenptr = 0;
538          return endptr;
539          }
540        if (*p == '\n')
541          {
542          *lenptr = 2;
543          return p + 1;
544          }
545        }
546      break;
547    
548      case EL_ANY:
549      while (p < endptr)
550        {
551        int extra = 0;
552        register int c = *((unsigned char *)p);
553    
554        if (utf8 && c >= 0xc0)
555          {
556          int gcii, gcss;
557          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
558          gcss = 6*extra;
559          c = (c & utf8_table3[extra]) << gcss;
560          for (gcii = 1; gcii <= extra; gcii++)
561            {
562            gcss -= 6;
563            c |= (p[gcii] & 0x3f) << gcss;
564            }
565          }
566    
567        p += 1 + extra;
568    
569        switch (c)
570          {
571          case 0x0a:    /* LF */
572          case 0x0b:    /* VT */
573          case 0x0c:    /* FF */
574          *lenptr = 1;
575          return p;
576    
577          case 0x0d:    /* CR */
578          if (p < endptr && *p == 0x0a)
579            {
580            *lenptr = 2;
581            p++;
582            }
583          else *lenptr = 1;
584          return p;
585    
586          case 0x85:    /* NEL */
587          *lenptr = utf8? 2 : 1;
588          return p;
589    
590          case 0x2028:  /* LS */
591          case 0x2029:  /* PS */
592          *lenptr = 3;
593          return p;
594    
595          default:
596          break;
597          }
598        }   /* End of loop for ANY case */
599    
600      *lenptr = 0;  /* Must have hit the end */
601      return endptr;
602      }     /* End of overall switch */
603    }
604    
605    
606    
607    /*************************************************
608    *         Find start of previous line            *
609  *************************************************/  *************************************************/
610    
611    /* This is called when looking back for before lines to print.
612    
613    Arguments:
614      p         start of the subsequent line
615      startptr  start of available data
616    
617    Returns:    pointer to the start of the previous line
618    */
619    
620    static char *
621    previous_line(char *p, char *startptr)
622    {
623    switch(endlinetype)
624      {
625      default:      /* Just in case */
626      case EL_LF:
627      p--;
628      while (p > startptr && p[-1] != '\n') p--;
629      return p;
630    
631      case EL_CR:
632      p--;
633      while (p > startptr && p[-1] != '\n') p--;
634      return p;
635    
636      case EL_CRLF:
637      for (;;)
638        {
639        p -= 2;
640        while (p > startptr && p[-1] != '\n') p--;
641        if (p <= startptr + 1 || p[-2] == '\r') return p;
642        }
643      return p;   /* But control should never get here */
644    
645      case EL_ANY:
646      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
647      if (utf8) while ((*p & 0xc0) == 0x80) p--;
648    
649      while (p > startptr)
650        {
651        register int c;
652        char *pp = p - 1;
653    
654        if (utf8)
655          {
656          int extra = 0;
657          while ((*pp & 0xc0) == 0x80) pp--;
658          c = *((unsigned char *)pp);
659          if (c >= 0xc0)
660            {
661            int gcii, gcss;
662            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
663            gcss = 6*extra;
664            c = (c & utf8_table3[extra]) << gcss;
665            for (gcii = 1; gcii <= extra; gcii++)
666              {
667              gcss -= 6;
668              c |= (pp[gcii] & 0x3f) << gcss;
669              }
670            }
671          }
672        else c = *((unsigned char *)pp);
673    
674        switch (c)
675          {
676          case 0x0a:    /* LF */
677          case 0x0b:    /* VT */
678          case 0x0c:    /* FF */
679          case 0x0d:    /* CR */
680          case 0x85:    /* NEL */
681          case 0x2028:  /* LS */
682          case 0x2029:  /* PS */
683          return p;
684    
685          default:
686          break;
687          }
688    
689        p = pp;  /* Back one character */
690        }        /* End of loop for ANY case */
691    
692      return startptr;  /* Hit start of data */
693      }     /* End of overall switch */
694    }
695    
696    
697    
698    
699    
700    /*************************************************
701    *       Print the previous "after" lines         *
702    *************************************************/
703    
704    /* This is called if we are about to lose said lines because of buffer filling,
705    and at the end of the file. The data in the line is written using fwrite() so
706    that a binary zero does not terminate it.
707    
708    Arguments:
709      lastmatchnumber   the number of the last matching line, plus one
710      lastmatchrestart  where we restarted after the last match
711      endptr            end of available data
712      printname         filename for printing
713    
714    Returns:            nothing
715    */
716    
717    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
718      char *endptr, char *printname)
719    {
720    if (after_context > 0 && lastmatchnumber > 0)
721      {
722      int count = 0;
723      while (lastmatchrestart < endptr && count++ < after_context)
724        {
725        int ellength;
726        char *pp = lastmatchrestart;
727        if (printname != NULL) fprintf(stdout, "%s-", printname);
728        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
729        pp = end_of_line(pp, endptr, &ellength);
730        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
731        lastmatchrestart = pp;
732        }
733      hyphenpending = TRUE;
734      }
735    }
736    
737    
738    
739    /*************************************************
740    *            Grep an individual file             *
741    *************************************************/
742    
743    /* This is called from grep_or_recurse() below. It uses a buffer that is three
744    times the value of MBUFTHIRD. The matching point is never allowed to stray into
745    the top third of the buffer, thus keeping more of the file available for
746    context printing or for multiline scanning. For large files, the pointer will
747    be in the middle third most of the time, so the bottom third is available for
748    "before" context printing.
749    
750    Arguments:
751      in           the fopened FILE stream
752      printname    the file name if it is to be printed for each match
753                   or NULL if the file name is not to be printed
754                   it cannot be NULL if filenames[_nomatch]_only is set
755    
756    Returns:       0 if there was at least one match
757                   1 otherwise (no matches)
758    */
759    
760  static int  static int
761  pcregrep(FILE *in, char *name)  pcregrep(FILE *in, char *printname)
762  {  {
763  int rc = 1;  int rc = 1;
764  int linenumber = 0;  int linenumber = 1;
765    int lastmatchnumber = 0;
766  int count = 0;  int count = 0;
767  int offsets[99];  int offsets[99];
768  char buffer[BUFSIZ];  char *lastmatchrestart = NULL;
769    char buffer[3*MBUFTHIRD];
770    char *ptr = buffer;
771    char *endptr;
772    size_t bufflength;
773    BOOL endhyphenpending = FALSE;
774    
775    /* Do the first read into the start of the buffer and set up the pointer to
776    end of what we have. */
777    
778    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
779    endptr = buffer + bufflength;
780    
781    /* Loop while the current pointer is not at the end of the file. For large
782    files, endptr will be at the end of the buffer when we are in the middle of the
783    file, but ptr will never get there, because as soon as it gets over 2/3 of the
784    way, the buffer is shifted left and re-filled. */
785    
786  while (fgets(buffer, sizeof(buffer), in) != NULL)  while (ptr < endptr)
787    {    {
788      int i, endlinelength;
789      int mrc = 0;
790    BOOL match = FALSE;    BOOL match = FALSE;
791    int i;    char *t = ptr;
792    int length = (int)strlen(buffer);    size_t length, linelength;
793    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;  
794    linenumber++;    /* At this point, ptr is at the start of a line. We need to find the length
795      of the subject string to pass to pcre_exec(). In multiline mode, it is the
796      length remainder of the data in the buffer. Otherwise, it is the length of
797      the next line. After matching, we always advance by the length of the next
798      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
799      that any match is constrained to be in the first line. */
800    
801      t = end_of_line(t, endptr, &endlinelength);
802      linelength = t - ptr - endlinelength;
803      length = multiline? endptr - ptr : linelength;
804    
805      /* Extra processing for Jeffrey Friedl's debugging. */
806    
807    #ifdef JFRIEDL_DEBUG
808      if (jfriedl_XT || jfriedl_XR)
809      {
810          #include <sys/time.h>
811          #include <time.h>
812          struct timeval start_time, end_time;
813          struct timezone dummy;
814    
815          if (jfriedl_XT)
816          {
817              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
818              const char *orig = ptr;
819              ptr = malloc(newlen + 1);
820              if (!ptr) {
821                      printf("out of memory");
822                      exit(2);
823              }
824              endptr = ptr;
825              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
826              for (i = 0; i < jfriedl_XT; i++) {
827                      strncpy(endptr, orig,  length);
828                      endptr += length;
829              }
830              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
831              length = newlen;
832          }
833    
834          if (gettimeofday(&start_time, &dummy) != 0)
835                  perror("bad gettimeofday");
836    
837    
838          for (i = 0; i < jfriedl_XR; i++)
839              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
840    
841          if (gettimeofday(&end_time, &dummy) != 0)
842                  perror("bad gettimeofday");
843    
844          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
845                          -
846                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
847    
848          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
849          return 0;
850      }
851    #endif
852    
853    
854    for (i = 0; !match && i < pattern_count; i++)    /* Run through all the patterns until one matches. Note that we don't include
855      the final newline in the subject string. */
856    
857      for (i = 0; i < pattern_count; i++)
858      {      {
859      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
860        offsets, 99) >= 0;        offsets, 99);
861      if (match && whole_lines && offsets[1] != length) match = FALSE;      if (mrc >= 0) { match = TRUE; break; }
862        if (mrc != PCRE_ERROR_NOMATCH)
863          {
864          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
865          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
866          fprintf(stderr, "this line:\n");
867          fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */
868          fprintf(stderr, "\n");
869          if (error_count == 0 &&
870              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
871            {
872            fprintf(stderr, "pcregrep: error %d means that a resource limit "
873              "was exceeded\n", mrc);
874            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
875            }
876          if (error_count++ > 20)
877            {
878            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
879            exit(2);
880            }
881          match = invert;    /* No more matching; don't show the line again */
882          break;
883          }
884      }      }
885    
886      /* If it's a match or a not-match (as required), do what's wanted. */
887    
888    if (match != invert)    if (match != invert)
889      {      {
890        BOOL hyphenprinted = FALSE;
891    
892        /* We've failed if we want a file that doesn't have any matches. */
893    
894        if (filenames == FN_NOMATCH_ONLY) return 1;
895    
896        /* Just count if just counting is wanted. */
897    
898      if (count_only) count++;      if (count_only) count++;
899    
900      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
901        in the file. */
902    
903        else if (filenames == FN_ONLY)
904        {        {
905        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        fprintf(stdout, "%s\n", printname);
906        return 0;        return 0;
907        }        }
908    
909      else if (silent) return 0;      /* Likewise, if all we want is a yes/no answer. */
910    
911        else if (quiet) return 0;
912    
913        /* The --only-matching option prints just the substring that matched, and
914        does not pring any context. */
915    
916        else if (only_matching)
917          {
918          if (printname != NULL) fprintf(stdout, "%s:", printname);
919          if (number) fprintf(stdout, "%d:", linenumber);
920          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
921          fprintf(stdout, "\n");
922          }
923    
924        /* This is the default case when none of the above options is set. We print
925        the matching lines(s), possibly preceded and/or followed by other lines of
926        context. */
927    
928      else      else
929        {        {
930        if (name != NULL) fprintf(stdout, "%s:", name);        /* See if there is a requirement to print some "after" lines from a
931          previous match. We never print any overlaps. */
932    
933          if (after_context > 0 && lastmatchnumber > 0)
934            {
935            int ellength;
936            int linecount = 0;
937            char *p = lastmatchrestart;
938    
939            while (p < ptr && linecount < after_context)
940              {
941              p = end_of_line(p, ptr, &ellength);
942              linecount++;
943              }
944    
945            /* It is important to advance lastmatchrestart during this printing so
946            that it interacts correctly with any "before" printing below. Print
947            each line's data using fwrite() in case there are binary zeroes. */
948    
949            while (lastmatchrestart < p)
950              {
951              char *pp = lastmatchrestart;
952              if (printname != NULL) fprintf(stdout, "%s-", printname);
953              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
954              pp = end_of_line(pp, endptr, &ellength);
955              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
956              lastmatchrestart = pp;
957              }
958            if (lastmatchrestart != ptr) hyphenpending = TRUE;
959            }
960    
961          /* If there were non-contiguous lines printed above, insert hyphens. */
962    
963          if (hyphenpending)
964            {
965            fprintf(stdout, "--\n");
966            hyphenpending = FALSE;
967            hyphenprinted = TRUE;
968            }
969    
970          /* See if there is a requirement to print some "before" lines for this
971          match. Again, don't print overlaps. */
972    
973          if (before_context > 0)
974            {
975            int linecount = 0;
976            char *p = ptr;
977    
978            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
979                   linecount < before_context)
980              {
981              linecount++;
982              p = previous_line(p, buffer);
983              }
984    
985            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
986              fprintf(stdout, "--\n");
987    
988            while (p < ptr)
989              {
990              int ellength;
991              char *pp = p;
992              if (printname != NULL) fprintf(stdout, "%s-", printname);
993              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
994              pp = end_of_line(pp, endptr, &ellength);
995              fwrite(p, 1, pp - p, stdout);
996              p = pp;
997              }
998            }
999    
1000          /* Now print the matching line(s); ensure we set hyphenpending at the end
1001          of the file if any context lines are being output. */
1002    
1003          if (after_context > 0 || before_context > 0)
1004            endhyphenpending = TRUE;
1005    
1006          if (printname != NULL) fprintf(stdout, "%s:", printname);
1007        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
1008        fprintf(stdout, "%s\n", buffer);  
1009          /* In multiline mode, we want to print to the end of the line in which
1010          the end of the matched string is found, so we adjust linelength and the
1011          line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1012          start of the match will always be before the first newline sequence. */
1013    
1014          if (multiline)
1015            {
1016            int ellength;
1017            char *endmatch = ptr + offsets[1];
1018            t = ptr;
1019            while (t < endmatch)
1020              {
1021              t = end_of_line(t, endptr, &ellength);
1022              if (t <= endmatch) linenumber++; else break;
1023              }
1024            endmatch = end_of_line(endmatch, endptr, &ellength);
1025            linelength = endmatch - ptr - ellength;
1026            }
1027    
1028          /*** NOTE: Use only fwrite() to output the data line, so that binary
1029          zeroes are treated as just another data character. */
1030    
1031          /* This extra option, for Jeffrey Friedl's debugging requirements,
1032          replaces the matched string, or a specific captured string if it exists,
1033          with X. When this happens, colouring is ignored. */
1034    
1035    #ifdef JFRIEDL_DEBUG
1036          if (S_arg >= 0 && S_arg < mrc)
1037            {
1038            int first = S_arg * 2;
1039            int last  = first + 1;
1040            fwrite(ptr, 1, offsets[first], stdout);
1041            fprintf(stdout, "X");
1042            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1043            }
1044          else
1045    #endif
1046    
1047          /* We have to split the line(s) up if colouring. */
1048    
1049          if (do_colour)
1050            {
1051            fwrite(ptr, 1, offsets[0], stdout);
1052            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1053            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1054            fprintf(stdout, "%c[00m", 0x1b);
1055            fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1056            }
1057          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1058        }        }
1059    
1060      rc = 0;      /* End of doing what has to be done for a match */
1061    
1062        rc = 0;    /* Had some success */
1063    
1064        /* Remember where the last match happened for after_context. We remember
1065        where we are about to restart, and that line's number. */
1066    
1067        lastmatchrestart = ptr + linelength + endlinelength;
1068        lastmatchnumber = linenumber + 1;
1069      }      }
1070    
1071      /* Advance to after the newline and increment the line number. */
1072    
1073      ptr += linelength + endlinelength;
1074      linenumber++;
1075    
1076      /* If we haven't yet reached the end of the file (the buffer is full), and
1077      the current point is in the top 1/3 of the buffer, slide the buffer down by
1078      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1079      about to be lost, print them. */
1080    
1081      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1082        {
1083        if (after_context > 0 &&
1084            lastmatchnumber > 0 &&
1085            lastmatchrestart < buffer + MBUFTHIRD)
1086          {
1087          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1088          lastmatchnumber = 0;
1089          }
1090    
1091        /* Now do the shuffle */
1092    
1093        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1094        ptr -= MBUFTHIRD;
1095        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1096        endptr = buffer + bufflength;
1097    
1098        /* Adjust any last match point */
1099    
1100        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1101        }
1102      }     /* Loop through the whole file */
1103    
1104    /* End of file; print final "after" lines if wanted; do_after_lines sets
1105    hyphenpending if it prints something. */
1106    
1107    if (!only_matching && !count_only)
1108      {
1109      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1110      hyphenpending |= endhyphenpending;
1111      }
1112    
1113    /* Print the file name if we are looking for those without matches and there
1114    were none. If we found a match, we won't have got this far. */
1115    
1116    if (filenames == FN_NOMATCH_ONLY)
1117      {
1118      fprintf(stdout, "%s\n", printname);
1119      return 0;
1120    }    }
1121    
1122    /* Print the match count if wanted */
1123    
1124  if (count_only)  if (count_only)
1125    {    {
1126    if (name != NULL) fprintf(stdout, "%s:", name);    if (printname != NULL) fprintf(stdout, "%s:", printname);
1127    fprintf(stdout, "%d\n", count);    fprintf(stdout, "%d\n", count);
1128    }    }
1129    
# Line 220  return rc; Line 1132  return rc;
1132    
1133    
1134    
   
1135  /*************************************************  /*************************************************
1136  *     Grep a file or recurse into a directory    *  *     Grep a file or recurse into a directory    *
1137  *************************************************/  *************************************************/
1138    
1139    /* Given a path name, if it's a directory, scan all the files if we are
1140    recursing; if it's a file, grep it.
1141    
1142    Arguments:
1143      pathname          the path to investigate
1144      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1145      only_one_at_top   TRUE if the path is the only one at toplevel
1146    
1147    Returns:   0 if there was at least one match
1148               1 if there were no matches
1149               2 there was some kind of error
1150    
1151    However, file opening failures are suppressed if "silent" is set.
1152    */
1153    
1154  static int  static int
1155  grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   BOOL only_one_at_top)  
1156  {  {
1157  int rc = 1;  int rc = 1;
1158  int sep;  int sep;
1159  FILE *in;  FILE *in;
1160    
1161  /* If the file is a directory and we are recursing, scan each file within it.  /* If the file name is "-" we scan stdin */
1162    
1163    if (strcmp(pathname, "-") == 0)
1164      {
1165      return pcregrep(stdin,
1166        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1167          stdin_name : NULL);
1168      }
1169    
1170    
1171    /* If the file is a directory, skip if skipping or if we are recursing, scan
1172    each file within it, subject to any include or exclude patterns that were set.
1173  The scanning code is localized so it can be made system-specific. */  The scanning code is localized so it can be made system-specific. */
1174    
1175  if ((sep = isdirectory(filename)) != 0 && recurse)  if ((sep = isdirectory(pathname)) != 0)
1176    {    {
1177    char buffer[1024];    if (dee_action == dee_SKIP) return 1;
1178    char *nextfile;    if (dee_action == dee_RECURSE)
1179    directory_type *dir = opendirectory(filename);      {
1180        char buffer[1024];
1181        char *nextfile;
1182        directory_type *dir = opendirectory(pathname);
1183    
1184        if (dir == NULL)
1185          {
1186          if (!silent)
1187            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1188              strerror(errno));
1189          return 2;
1190          }
1191    
1192        while ((nextfile = readdirectory(dir)) != NULL)
1193          {
1194          int frc, blen;
1195          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1196          blen = strlen(buffer);
1197    
1198          if (exclude_compiled != NULL &&
1199              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1200            continue;
1201    
1202          if (include_compiled != NULL &&
1203              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1204            continue;
1205    
1206          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1207          if (frc > 1) rc = frc;
1208           else if (frc == 0 && rc == 1) rc = 0;
1209          }
1210    
1211    if (dir == NULL)      closedirectory(dir);
1212      {      return rc;
     fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,  
       strerror(errno));  
     return 2;  
1213      }      }
1214      }
1215    
1216    while ((nextfile = readdirectory(dir)) != NULL)  /* If the file is not a directory and not a regular file, skip it if that's
1217      {  been requested. */
     int frc;  
     sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);  
     frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);  
     if (frc == 0 && rc == 1) rc = 0;  
     }  
1218    
1219    closedirectory(dir);  else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
   return rc;  
   }  
1220    
1221  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* Control reaches here if we have a regular file, or if we have a directory
1222  the first and only argument at top level, we don't show the file name.  and recursion or skipping was not requested, or if we have anything else and
1223  Otherwise, control is via the show_filenames variable. */  skipping was not requested. The scan proceeds. If this is the first and only
1224    argument at top level, we don't show the file name, unless we are only showing
1225    the file name, or the filename was forced (-H). */
1226    
1227  in = fopen(filename, "r");  in = fopen(pathname, "r");
1228  if (in == NULL)  if (in == NULL)
1229    {    {
1230    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));    if (!silent)
1231        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1232          strerror(errno));
1233    return 2;    return 2;
1234    }    }
1235    
1236  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);  rc = pcregrep(in, (filenames > FN_DEFAULT ||
1237      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1238    
1239  fclose(in);  fclose(in);
1240  return rc;  return rc;
1241  }  }
# Line 287  return rc; Line 1250  return rc;
1250  static int  static int
1251  usage(int rc)  usage(int rc)
1252  {  {
1253  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  option_item *op;
1254    fprintf(stderr, "Usage: pcregrep [-");
1255    for (op = optionlist; op->one_char != 0; op++)
1256      {
1257      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1258      }
1259    fprintf(stderr, "] [long options] [pattern] [files]\n");
1260  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1261  return rc;  return rc;
1262  }  }
# Line 304  help(void) Line 1273  help(void)
1273  {  {
1274  option_item *op;  option_item *op;
1275    
1276  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1277  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1278    printf("PATTERN must be present if neither -e nor -f is used.\n");
1279    printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1280  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1281    
1282  printf("Options:\n");  printf("Options:\n");
# Line 321  for (op = optionlist; op->one_char != 0; Line 1292  for (op = optionlist; op->one_char != 0;
1292    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1293    }    }
1294    
1295  printf("\n  -f<filename>  or  --file=<filename>\n");  printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1296  printf("    Read patterns from <filename> instead of using a command line option.\n");  printf("trailing white space is removed and blank lines are ignored.\n");
1297  printf("    Trailing white space is removed; blanks lines are ignored.\n");  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
1298    
1299  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1300  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1301  }  }
1302    
# Line 334  printf("Exit status is 0 if any matches, Line 1304  printf("Exit status is 0 if any matches,
1304    
1305    
1306  /*************************************************  /*************************************************
1307  *                Handle an option                *  *    Handle a single-letter, no data option      *
1308  *************************************************/  *************************************************/
1309    
1310  static int  static int
# Line 342  handle_option(int letter, int options) Line 1312  handle_option(int letter, int options)
1312  {  {
1313  switch(letter)  switch(letter)
1314    {    {
1315    case -1:  help(); exit(0);    case N_HELP: help(); exit(0);
1316    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1317    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1318      case 'H': filenames = FN_FORCE; break;
1319      case 'h': filenames = FN_NONE; break;
1320    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1321    case 'l': filenames_only = TRUE;    case 'l': filenames = FN_ONLY; break;
1322      case 'L': filenames = FN_NOMATCH_ONLY; break;
1323      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1324    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1325    case 'r': recurse = TRUE; break;    case 'o': only_matching = TRUE; break;
1326      case 'q': quiet = TRUE; break;
1327      case 'r': dee_action = dee_RECURSE; break;
1328    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1329      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1330    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1331    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1332      case 'x': process_options |= PO_LINE_MATCH; break;
1333    
1334    case 'V':    case 'V':
1335    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1336    exit(0);    exit(0);
1337    break;    break;
1338    
# Line 371  return options; Line 1348  return options;
1348    
1349    
1350  /*************************************************  /*************************************************
1351    *          Construct printed ordinal             *
1352    *************************************************/
1353    
1354    /* This turns a number into "1st", "3rd", etc. */
1355    
1356    static char *
1357    ordin(int n)
1358    {
1359    static char buffer[8];
1360    char *p = buffer;
1361    sprintf(p, "%d", n);
1362    while (*p != 0) p++;
1363    switch (n%10)
1364      {
1365      case 1: strcpy(p, "st"); break;
1366      case 2: strcpy(p, "nd"); break;
1367      case 3: strcpy(p, "rd"); break;
1368      default: strcpy(p, "th"); break;
1369      }
1370    return buffer;
1371    }
1372    
1373    
1374    
1375    /*************************************************
1376    *          Compile a single pattern              *
1377    *************************************************/
1378    
1379    /* When the -F option has been used, this is called for each substring.
1380    Otherwise it's called for each supplied pattern.
1381    
1382    Arguments:
1383      pattern        the pattern string
1384      options        the PCRE options
1385      filename       the file name, or NULL for a command-line pattern
1386      count          0 if this is the only command line pattern, or
1387                     number of the command line pattern, or
1388                     linenumber for a pattern from a file
1389    
1390    Returns:         TRUE on success, FALSE after an error
1391    */
1392    
1393    static BOOL
1394    compile_single_pattern(char *pattern, int options, char *filename, int count)
1395    {
1396    char buffer[MBUFTHIRD + 16];
1397    const char *error;
1398    int errptr;
1399    
1400    if (pattern_count >= MAX_PATTERN_COUNT)
1401      {
1402      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1403        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1404      return FALSE;
1405      }
1406    
1407    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1408      suffix[process_options]);
1409    pattern_list[pattern_count] =
1410      pcre_compile(buffer, options, &error, &errptr, pcretables);
1411    if (pattern_list[pattern_count] != NULL)
1412      {
1413      pattern_count++;
1414      return TRUE;
1415      }
1416    
1417    /* Handle compile errors */
1418    
1419    errptr -= (int)strlen(prefix[process_options]);
1420    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1421    
1422    if (filename == NULL)
1423      {
1424      if (count == 0)
1425        fprintf(stderr, "pcregrep: Error in command-line regex "
1426          "at offset %d: %s\n", errptr, error);
1427      else
1428        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1429          "at offset %d: %s\n", ordin(count), errptr, error);
1430      }
1431    else
1432      {
1433      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1434        "at offset %d: %s\n", count, filename, errptr, error);
1435      }
1436    
1437    return FALSE;
1438    }
1439    
1440    
1441    
1442    /*************************************************
1443    *           Compile one supplied pattern         *
1444    *************************************************/
1445    
1446    /* When the -F option has been used, each string may be a list of strings,
1447    separated by line breaks. They will be matched literally.
1448    
1449    Arguments:
1450      pattern        the pattern string
1451      options        the PCRE options
1452      filename       the file name, or NULL for a command-line pattern
1453      count          0 if this is the only command line pattern, or
1454                     number of the command line pattern, or
1455                     linenumber for a pattern from a file
1456    
1457    Returns:         TRUE on success, FALSE after an error
1458    */
1459    
1460    static BOOL
1461    compile_pattern(char *pattern, int options, char *filename, int count)
1462    {
1463    if ((process_options & PO_FIXED_STRINGS) != 0)
1464      {
1465      char *eop = pattern + strlen(pattern);
1466      char buffer[MBUFTHIRD];
1467      for(;;)
1468        {
1469        int ellength;
1470        char *p = end_of_line(pattern, eop, &ellength);
1471        if (ellength == 0)
1472          return compile_single_pattern(pattern, options, filename, count);
1473        sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1474        pattern = p;
1475        if (!compile_single_pattern(buffer, options, filename, count))
1476          return FALSE;
1477        }
1478      }
1479    else return compile_single_pattern(pattern, options, filename, count);
1480    }
1481    
1482    
1483    
1484    /*************************************************
1485  *                Main program                    *  *                Main program                    *
1486  *************************************************/  *************************************************/
1487    
1488    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1489    
1490  int  int
1491  main(int argc, char **argv)  main(int argc, char **argv)
1492  {  {
1493  int i, j;  int i, j;
1494  int rc = 1;  int rc = 1;
1495  int options = 0;  int pcre_options = 0;
1496    int cmd_pattern_count = 0;
1497    int hint_count = 0;
1498  int errptr;  int errptr;
 const char *error;  
1499  BOOL only_one_at_top;  BOOL only_one_at_top;
1500    char *patterns[MAX_PATTERN_COUNT];
1501    const char *locale_from = "--locale";
1502    const char *error;
1503    
1504    /* Set the default line ending value from the default in the PCRE library;
1505    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1506    */
1507    
1508    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1509    switch(i)
1510      {
1511      default:                 newline = (char *)"lf"; break;
1512      case '\r':               newline = (char *)"cr"; break;
1513      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1514      case -1:                 newline = (char *)"any"; break;
1515      }
1516    
1517  /* Process the options */  /* Process the options */
1518    
1519  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
1520    {    {
1521      option_item *op = NULL;
1522      char *option_data = (char *)"";    /* default to keep compiler happy */
1523      BOOL longop;
1524      BOOL longopwasequals = FALSE;
1525    
1526    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1527    
1528    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1529      but only if we have previously had -e or -f to define the patterns. */
1530    
1531      if (argv[i][1] == 0)
1532        {
1533        if (pattern_filename != NULL || pattern_count > 0) break;
1534          else exit(usage(2));
1535        }
1536    
1537      /* Handle a long name option, or -- to terminate the options */
1538    
1539    if (argv[i][1] == '-')    if (argv[i][1] == '-')
1540      {      {
1541      option_item *op;      char *arg = argv[i] + 2;
1542        char *argequals = strchr(arg, '=');
1543    
1544      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
1545        {        {
1546        pattern_filename = argv[i] + 7;        i++;
1547        continue;        break;                /* out of the options-handling loop */
1548        }        }
1549    
1550        longop = TRUE;
1551    
1552        /* Some long options have data that follows after =, for example file=name.
1553        Some options have variations in the long name spelling: specifically, we
1554        allow "regexp" because GNU grep allows it, though I personally go along
1555        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1556        These options are entered in the table as "regex(p)". No option is in both
1557        these categories, fortunately. */
1558    
1559      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1560        {        {
1561        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
1562          char *equals = strchr(op->long_name, '=');
1563          if (opbra == NULL)     /* Not a (p) case */
1564          {          {
1565          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
1566          break;            {
1567              if (strcmp(arg, op->long_name) == 0) break;
1568              }
1569            else                 /* Special case xxx=data */
1570              {
1571              int oplen = equals - op->long_name;
1572              int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1573              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1574                {
1575                option_data = arg + arglen;
1576                if (*option_data == '=')
1577                  {
1578                  option_data++;
1579                  longopwasequals = TRUE;
1580                  }
1581                break;
1582                }
1583              }
1584            }
1585          else                   /* Special case xxxx(p) */
1586            {
1587            char buff1[24];
1588            char buff2[24];
1589            int baselen = opbra - op->long_name;
1590            sprintf(buff1, "%.*s", baselen, op->long_name);
1591            sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1592              opbra + 1);
1593            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1594              break;
1595          }          }
1596        }        }
1597    
1598      if (op->one_char == 0)      if (op->one_char == 0)
1599        {        {
1600        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 417  for (i = 1; i < argc; i++) Line 1602  for (i = 1; i < argc; i++)
1602        }        }
1603      }      }
1604    
1605    /* One-char options */  
1606      /* Jeffrey Friedl's debugging harness uses these additional options which
1607      are not in the right form for putting in the option table because they use
1608      only one hyphen, yet are more than one character long. By putting them
1609      separately here, they will not get displayed as part of the help() output,
1610      but I don't think Jeffrey will care about that. */
1611    
1612    #ifdef JFRIEDL_DEBUG
1613      else if (strcmp(argv[i], "-pre") == 0) {
1614              jfriedl_prefix = argv[++i];
1615              continue;
1616      } else if (strcmp(argv[i], "-post") == 0) {
1617              jfriedl_postfix = argv[++i];
1618              continue;
1619      } else if (strcmp(argv[i], "-XT") == 0) {
1620              sscanf(argv[++i], "%d", &jfriedl_XT);
1621              continue;
1622      } else if (strcmp(argv[i], "-XR") == 0) {
1623              sscanf(argv[++i], "%d", &jfriedl_XR);
1624              continue;
1625      }
1626    #endif
1627    
1628    
1629      /* One-char options; many that have no data may be in a single argument; we
1630      continue till we hit the last one or one that needs data. */
1631    
1632    else    else
1633      {      {
1634      char *s = argv[i] + 1;      char *s = argv[i] + 1;
1635        longop = FALSE;
1636      while (*s != 0)      while (*s != 0)
1637        {        {
1638        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
1639            { if (*s == op->one_char) break; }
1640          if (op->one_char == 0)
1641          {          {
1642          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1643          if (pattern_filename[0] == 0)            *s, argv[i]);
1644            {          exit(usage(2));
1645            if (i >= argc - 1)          }
1646              {        if (op->type != OP_NODATA || s[1] == 0)
1647              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
1648              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
1649          break;          break;
1650          }          }
1651        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1652        }        }
1653      }      }
   }  
1654    
1655  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
1656  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
1657      something in the PCRE options. */
1658    
1659  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
1660    {      {
1661    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
1662    return 2;      continue;
1663    }      }
1664    
1665  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1666      either has a value or defaults to something. It cannot have data in a
1667      separate item. At the moment, the only such options are "colo(u)r" and
1668      Jeffrey Friedl's special -S debugging option. */
1669    
1670  if (pattern_filename != NULL)    if (*option_data == 0 &&
1671    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
1672      {      {
1673      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
1674        strerror(errno));        {
1675      return 2;        case N_COLOUR:
1676          colour_option = (char *)"auto";
1677          break;
1678    #ifdef JFRIEDL_DEBUG
1679          case 'S':
1680          S_arg = 0;
1681          break;
1682    #endif
1683          }
1684        continue;
1685      }      }
1686    while (fgets(buffer, sizeof(buffer), f) != NULL)  
1687      /* Otherwise, find the data string for the option. */
1688    
1689      if (*option_data == 0)
1690      {      {
1691      char *s = buffer + (int)strlen(buffer);      if (i >= argc - 1 || longopwasequals)
1692      if (pattern_count >= MAX_PATTERN_COUNT)        {
1693          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1694          exit(usage(2));
1695          }
1696        option_data = argv[++i];
1697        }
1698    
1699      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1700      multiple times to create a list of patterns. */
1701    
1702      if (op->type == OP_PATLIST)
1703        {
1704        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1705        {        {
1706        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1707          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
1708        return 2;        return 2;
1709        }        }
1710      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
1711      if (s == buffer) continue;      }
1712      *s = 0;  
1713      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
1714        &errptr, NULL);  
1715      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1716        {
1717        *((char **)op->dataptr) = option_data;
1718        }
1719      else
1720        {
1721        char *endptr;
1722        int n = strtoul(option_data, &endptr, 10);
1723        if (*endptr != 0)
1724        {        {
1725        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
1726          pattern_count, errptr, error);          {
1727        return 2;          char *equals = strchr(op->long_name, '=');
1728            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1729              equals - op->long_name;
1730            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1731              option_data, nlen, op->long_name);
1732            }
1733          else
1734            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1735              option_data, op->one_char);
1736          exit(usage(2));
1737        }        }
1738        *((int *)op->dataptr) = n;
1739        }
1740      }
1741    
1742    /* Options have been decoded. If -C was used, its value is used as a default
1743    for -A and -B. */
1744    
1745    if (both_context > 0)
1746      {
1747      if (after_context == 0) after_context = both_context;
1748      if (before_context == 0) before_context = both_context;
1749      }
1750    
1751    /* If a locale has not been provided as an option, see if the LC_CTYPE or
1752    LC_ALL environment variable is set, and if so, use it. */
1753    
1754    if (locale == NULL)
1755      {
1756      locale = getenv("LC_ALL");
1757      locale_from = "LCC_ALL";
1758      }
1759    
1760    if (locale == NULL)
1761      {
1762      locale = getenv("LC_CTYPE");
1763      locale_from = "LC_CTYPE";
1764      }
1765    
1766    /* If a locale has been provided, set it, and generate the tables the PCRE
1767    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1768    
1769    if (locale != NULL)
1770      {
1771      if (setlocale(LC_CTYPE, locale) == NULL)
1772        {
1773        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1774          locale, locale_from);
1775        return 2;
1776        }
1777      pcretables = pcre_maketables();
1778      }
1779    
1780    /* Sort out colouring */
1781    
1782    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1783      {
1784      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1785      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1786      else
1787        {
1788        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1789          colour_option);
1790        return 2;
1791        }
1792      if (do_colour)
1793        {
1794        char *cs = getenv("PCREGREP_COLOUR");
1795        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1796        if (cs != NULL) colour_string = cs;
1797      }      }
   fclose(f);  
1798    }    }
1799    
1800  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
1801    
1802    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1803      {
1804      pcre_options |= PCRE_NEWLINE_CR;
1805      endlinetype = EL_CR;
1806      }
1807    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1808      {
1809      pcre_options |= PCRE_NEWLINE_LF;
1810      endlinetype = EL_LF;
1811      }
1812    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1813      {
1814      pcre_options |= PCRE_NEWLINE_CRLF;
1815      endlinetype = EL_CRLF;
1816      }
1817    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1818      {
1819      pcre_options |= PCRE_NEWLINE_ANY;
1820      endlinetype = EL_ANY;
1821      }
1822  else  else
1823    {    {
1824    if (i >= argc) return usage(0);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1825    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
1826    if (pattern_list[0] == NULL)    }
1827    
1828    /* Interpret the text values for -d and -D */
1829    
1830    if (dee_option != NULL)
1831      {
1832      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1833      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1834      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1835      else
1836        {
1837        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1838        return 2;
1839        }
1840      }
1841    
1842    if (DEE_option != NULL)
1843      {
1844      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1845      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1846      else
1847      {      {
1848      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
       error);  
1849      return 2;      return 2;
1850      }      }
   pattern_count++;  
1851    }    }
1852    
1853  /* Study the regular expressions, as we will be running them may times */  /* Check the values for Jeffrey Friedl's debugging options. */
1854    
1855    #ifdef JFRIEDL_DEBUG
1856    if (S_arg > 9)
1857      {
1858      fprintf(stderr, "pcregrep: bad value for -S option\n");
1859      return 2;
1860      }
1861    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1862      {
1863      if (jfriedl_XT == 0) jfriedl_XT = 1;
1864      if (jfriedl_XR == 0) jfriedl_XR = 1;
1865      }
1866    #endif
1867    
1868    /* Get memory to store the pattern and hints lists. */
1869    
1870    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1871    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1872    
1873    if (pattern_list == NULL || hints_list == NULL)
1874      {
1875      fprintf(stderr, "pcregrep: malloc failed\n");
1876      goto EXIT2;
1877      }
1878    
1879    /* If no patterns were provided by -e, and there is no file provided by -f,
1880    the first argument is the one and only pattern, and it must exist. */
1881    
1882    if (cmd_pattern_count == 0 && pattern_filename == NULL)
1883      {
1884      if (i >= argc) return usage(2);
1885      patterns[cmd_pattern_count++] = argv[i++];
1886      }
1887    
1888    /* Compile the patterns that were provided on the command line, either by
1889    multiple uses of -e or as a single unkeyed pattern. */
1890    
1891    for (j = 0; j < cmd_pattern_count; j++)
1892      {
1893      if (!compile_pattern(patterns[j], pcre_options, NULL,
1894           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1895        goto EXIT2;
1896      }
1897    
1898    /* Compile the regular expressions that are provided in a file. */
1899    
1900    if (pattern_filename != NULL)
1901      {
1902      int linenumber = 0;
1903      FILE *f;
1904      char *filename;
1905      char buffer[MBUFTHIRD];
1906    
1907      if (strcmp(pattern_filename, "-") == 0)
1908        {
1909        f = stdin;
1910        filename = stdin_name;
1911        }
1912      else
1913        {
1914        f = fopen(pattern_filename, "r");
1915        if (f == NULL)
1916          {
1917          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1918            strerror(errno));
1919          goto EXIT2;
1920          }
1921        filename = pattern_filename;
1922        }
1923    
1924      while (fgets(buffer, MBUFTHIRD, f) != NULL)
1925        {
1926        char *s = buffer + (int)strlen(buffer);
1927        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1928        *s = 0;
1929        linenumber++;
1930        if (buffer[0] == 0) continue;   /* Skip blank lines */
1931        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1932          goto EXIT2;
1933        }
1934    
1935      if (f != stdin) fclose(f);
1936      }
1937    
1938    /* Study the regular expressions, as we will be running them many times */
1939    
1940  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
1941    {    {
# Line 513  for (j = 0; j < pattern_count; j++) Line 1945  for (j = 0; j < pattern_count; j++)
1945      char s[16];      char s[16];
1946      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1947      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1948      return 2;      goto EXIT2;
1949        }
1950      hint_count++;
1951      }
1952    
1953    /* If there are include or exclude patterns, compile them. */
1954    
1955    if (exclude_pattern != NULL)
1956      {
1957      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1958        pcretables);
1959      if (exclude_compiled == NULL)
1960        {
1961        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1962          errptr, error);
1963        goto EXIT2;
1964        }
1965      }
1966    
1967    if (include_pattern != NULL)
1968      {
1969      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1970        pcretables);
1971      if (include_compiled == NULL)
1972        {
1973        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1974          errptr, error);
1975        goto EXIT2;
1976      }      }
1977    }    }
1978    
1979  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
1980    
1981  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
1982      {
1983      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1984      goto EXIT;
1985      }
1986    
1987  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
1988  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
1989  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
1990    otherwise forced. */
1991    
1992  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
1993    
1994  for (; i < argc; i++)  for (; i < argc; i++)
1995    {    {
1996    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1997    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
1998      if (frc > 1) rc = frc;
1999        else if (frc == 0 && rc == 1) rc = 0;
2000    }    }
2001    
2002    EXIT:
2003    if (pattern_list != NULL)
2004      {
2005      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2006      free(pattern_list);
2007      }
2008    if (hints_list != NULL)
2009      {
2010      for (i = 0; i < hint_count; i++) free(hints_list[i]);
2011      free(hints_list);
2012      }
2013  return rc;  return rc;
2014    
2015    EXIT2:
2016    rc = 2;
2017    goto EXIT;
2018  }  }
2019    
2020  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.141

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12