/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 121 by ph10, Mon Mar 12 12:12:47 2007 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2007 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #ifdef HAVE_CONFIG_H
41    #  include <config.h>
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45    #include <locale.h>
46  #include <stdio.h>  #include <stdio.h>
47  #include <string.h>  #include <string.h>
48  #include <stdlib.h>  #include <stdlib.h>
49  #include <errno.h>  #include <errno.h>
50  #include "config.h"  
51    #include <sys/types.h>
52    #include <sys/stat.h>
53    #include <unistd.h>
54    
55  #include "pcre.h"  #include "pcre.h"
56    
57  #define FALSE 0  #define FALSE 0
# Line 18  its pattern matching. On a Unix system i Line 59  its pattern matching. On a Unix system i
59    
60  typedef int BOOL;  typedef int BOOL;
61    
 #define VERSION "2.0 01-Aug-2001"  
62  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
63    
64    #if BUFSIZ > 8192
65    #define MBUFTHIRD BUFSIZ
66    #else
67    #define MBUFTHIRD 8192
68    #endif
69    
70    /* Values for the "filenames" variable, which specifies options for file name
71    output. The order is important; it is assumed that a file name is wanted for
72    all values greater than FN_DEFAULT. */
73    
74    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
75    
76    /* Actions for the -d and -D options */
77    
78    enum { dee_READ, dee_SKIP, dee_RECURSE };
79    enum { DEE_READ, DEE_SKIP };
80    
81    /* Actions for special processing options (flag bits) */
82    
83    #define PO_WORD_MATCH     0x0001
84    #define PO_LINE_MATCH     0x0002
85    #define PO_FIXED_STRINGS  0x0004
86    
87    /* Line ending types */
88    
89    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };
90    
91    
92    
93  /*************************************************  /*************************************************
94  *               Global variables                 *  *               Global variables                 *
95  *************************************************/  *************************************************/
96    
97    /* Jeffrey Friedl has some debugging requirements that are not part of the
98    regular code. */
99    
100    #ifdef JFRIEDL_DEBUG
101    static int S_arg = -1;
102    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
103    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
104    static const char *jfriedl_prefix = "";
105    static const char *jfriedl_postfix = "";
106    #endif
107    
108    static int  endlinetype;
109    
110    static char *colour_string = (char *)"1;31";
111    static char *colour_option = NULL;
112    static char *dee_option = NULL;
113    static char *DEE_option = NULL;
114    static char *newline = NULL;
115  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
116    static char *stdin_name = (char *)"(standard input)";
117    static char *locale = NULL;
118    
119    static const unsigned char *pcretables = NULL;
120    
121  static int  pattern_count = 0;  static int  pattern_count = 0;
122  static pcre **pattern_list;  static pcre **pattern_list = NULL;
123  static pcre_extra **hints_list;  static pcre_extra **hints_list = NULL;
124    
125    static char *include_pattern = NULL;
126    static char *exclude_pattern = NULL;
127    
128    static pcre *include_compiled = NULL;
129    static pcre *exclude_compiled = NULL;
130    
131    static int after_context = 0;
132    static int before_context = 0;
133    static int both_context = 0;
134    static int dee_action = dee_READ;
135    static int DEE_action = DEE_READ;
136    static int error_count = 0;
137    static int filenames = FN_DEFAULT;
138    static int process_options = 0;
139    
140  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
141  static BOOL filenames = TRUE;  static BOOL do_colour = FALSE;
142  static BOOL filenames_only = FALSE;  static BOOL hyphenpending = FALSE;
143  static BOOL invert = FALSE;  static BOOL invert = FALSE;
144    static BOOL multiline = FALSE;
145  static BOOL number = FALSE;  static BOOL number = FALSE;
146  static BOOL recurse = FALSE;  static BOOL only_matching = FALSE;
147    static BOOL quiet = FALSE;
148  static BOOL silent = FALSE;  static BOOL silent = FALSE;
149  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
150    
151  /* Structure for options and list of them */  /* Structure for options and list of them */
152    
153    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
154           OP_PATLIST };
155    
156  typedef struct option_item {  typedef struct option_item {
157      int type;
158    int one_char;    int one_char;
159    char *long_name;    void *dataptr;
160    char *help_text;    const char *long_name;
161      const char *help_text;
162  } option_item;  } option_item;
163    
164    /* Options without a single-letter equivalent get a negative value. This can be
165    used to identify them. */
166    
167    #define N_COLOUR    (-1)
168    #define N_EXCLUDE   (-2)
169    #define N_HELP      (-3)
170    #define N_INCLUDE   (-4)
171    #define N_LABEL     (-5)
172    #define N_LOCALE    (-6)
173    #define N_NULL      (-7)
174    
175  static option_item optionlist[] = {  static option_item optionlist[] = {
176    { -1,  "help",         "display this help and exit" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
177    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
178    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
179    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
180    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
181    { 'n', "line-number",  "print line number with output lines" },    { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
182    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
183    { 's', "no-messages",  "suppress error messages" },    { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
184    { 'V', "version",      "print version information and exit" },    { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
185    { 'v', "invert-match", "select non-matching lines" },    { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
186    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
187    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
188    { 0,    NULL,           NULL }    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
189      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
190      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
191      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
192      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
193      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
194      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
195      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
196      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
197      { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },
198      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
199      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
200      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
201      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
202      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
203      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
204    #ifdef JFRIEDL_DEBUG
205      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
206    #endif
207      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
208      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
209      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
210      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
211      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
212      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
213      { OP_NODATA,    0,        NULL,               NULL,            NULL }
214  };  };
215    
216    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
217    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
218    that the combination of -w and -x has the same effect as -x on its own, so we
219    can treat them as the same. */
220    
221    static const char *prefix[] = {
222      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
223    
224    static const char *suffix[] = {
225      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
226    
227    /* UTF-8 tables - used only when the newline setting is "all". */
228    
229    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
230    
231    const char utf8_table4[] = {
232      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
233      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
234      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
235      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
236    
237    
238    
239  /*************************************************  /*************************************************
240  *       Functions for directory scanning         *  *            OS-specific functions               *
241  *************************************************/  *************************************************/
242    
243  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
244  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no support". */
 support". */  
245    
246    
247  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
248    
249  #if IS_UNIX  #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
250  #include <sys/types.h>  #include <sys/types.h>
251  #include <sys/stat.h>  #include <sys/stat.h>
252  #include <dirent.h>  #include <dirent.h>
253    
254  typedef DIR directory_type;  typedef DIR directory_type;
255    
256  int  static int
257  isdirectory(char *filename)  isdirectory(char *filename)
258  {  {
259  struct stat statbuf;  struct stat statbuf;
# Line 92  if (stat(filename, &statbuf) < 0) Line 262  if (stat(filename, &statbuf) < 0)
262  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
263  }  }
264    
265  directory_type *  static directory_type *
266  opendirectory(char *filename)  opendirectory(char *filename)
267  {  {
268  return opendir(filename);  return opendir(filename);
269  }  }
270    
271  char *  static char *
272  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
273  {  {
274  for (;;)  for (;;)
# Line 111  for (;;) Line 281  for (;;)
281  return NULL;   /* Keep compiler happy; never executed */  return NULL;   /* Keep compiler happy; never executed */
282  }  }
283    
284  void  static void
285  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
286  {  {
287  closedir(dir);  closedir(dir);
288  }  }
289    
290    
291  #else  /************* Test for regular file in Unix **********/
292    
293    static int
294    isregfile(char *filename)
295    {
296    struct stat statbuf;
297    if (stat(filename, &statbuf) < 0)
298      return 1;        /* In the expectation that opening as a file will fail */
299    return (statbuf.st_mode & S_IFMT) == S_IFREG;
300    }
301    
302    
303    /************* Test stdout for being a terminal in Unix **********/
304    
305    static BOOL
306    is_stdout_tty(void)
307    {
308    return isatty(fileno(stdout));
309    }
310    
311    
312    /************* Directory scanning in Win32 ***********/
313    
314    /* I (Philip Hazel) have no means of testing this code. It was contributed by
315    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
316    when it did not exist. */
317    
318    
319    #elif HAVE_WINDOWS_H
320    
321    #ifndef STRICT
322    # define STRICT
323    #endif
324    #ifndef WIN32_LEAN_AND_MEAN
325    # define WIN32_LEAN_AND_MEAN
326    #endif
327    #ifndef INVALID_FILE_ATTRIBUTES
328    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
329    #endif
330    
331    #include <windows.h>
332    
333    typedef struct directory_type
334    {
335    HANDLE handle;
336    BOOL first;
337    WIN32_FIND_DATA data;
338    } directory_type;
339    
340    int
341    isdirectory(char *filename)
342    {
343    DWORD attr = GetFileAttributes(filename);
344    if (attr == INVALID_FILE_ATTRIBUTES)
345      return 0;
346    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
347    }
348    
349    directory_type *
350    opendirectory(char *filename)
351    {
352    size_t len;
353    char *pattern;
354    directory_type *dir;
355    DWORD err;
356    len = strlen(filename);
357    pattern = (char *) malloc(len + 3);
358    dir = (directory_type *) malloc(sizeof(*dir));
359    if ((pattern == NULL) || (dir == NULL))
360      {
361      fprintf(stderr, "pcregrep: malloc failed\n");
362      exit(2);
363      }
364    memcpy(pattern, filename, len);
365    memcpy(&(pattern[len]), "\\*", 3);
366    dir->handle = FindFirstFile(pattern, &(dir->data));
367    if (dir->handle != INVALID_HANDLE_VALUE)
368      {
369      free(pattern);
370      dir->first = TRUE;
371      return dir;
372      }
373    err = GetLastError();
374    free(pattern);
375    free(dir);
376    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
377    return NULL;
378    }
379    
380    char *
381    readdirectory(directory_type *dir)
382    {
383    for (;;)
384      {
385      if (!dir->first)
386        {
387        if (!FindNextFile(dir->handle, &(dir->data)))
388          return NULL;
389        }
390      else
391        {
392        dir->first = FALSE;
393        }
394      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
395        return dir->data.cFileName;
396      }
397    #ifndef _MSC_VER
398    return NULL;   /* Keep compiler happy; never executed */
399    #endif
400    }
401    
402    void
403    closedirectory(directory_type *dir)
404    {
405    FindClose(dir->handle);
406    free(dir);
407    }
408    
409    
410    /************* Test for regular file in Win32 **********/
411    
412    /* I don't know how to do this, or if it can be done; assume all paths are
413    regular if they are not directories. */
414    
415    int isregfile(char *filename)
416    {
417    return !isdirectory(filename)
418    }
419    
420    
421    /************* Test stdout for being a terminal in Win32 **********/
422    
423    /* I don't know how to do this; assume never */
424    
425    static BOOL
426    is_stdout_tty(void)
427    {
428    FALSE;
429    }
430    
431    
432  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
433    
434  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
435    
436    #else
437    
438  typedef void directory_type;  typedef void directory_type;
439    
440  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return 0; }
441  directory_type * opendirectory(char *filename) {}  directory_type * opendirectory(char *filename) { return (directory_type*)0;}
442  char *readdirectory(directory_type *dir) {}  char *readdirectory(directory_type *dir) { return (char*)0;}
443  void closedirectory(directory_type *dir) {}  void closedirectory(directory_type *dir) {}
444    
445    
446    /************* Test for regular when we can't do it **********/
447    
448    /* Assume all files are regular. */
449    
450    int isregfile(char *filename) { return 1; }
451    
452    
453    /************* Test stdout for being a terminal when we can't do it **********/
454    
455    static BOOL
456    is_stdout_tty(void)
457    {
458    return FALSE;
459    }
460    
461    
462  #endif  #endif
463    
464    
# Line 159  return sys_errlist[n]; Line 486  return sys_errlist[n];
486    
487    
488  /*************************************************  /*************************************************
489  *              Grep an individual file           *  *             Find end of line                   *
490    *************************************************/
491    
492    /* The length of the endline sequence that is found is set via lenptr. This may
493    be zero at the very end of the file if there is no line-ending sequence there.
494    
495    Arguments:
496      p         current position in line
497      endptr    end of available data
498      lenptr    where to put the length of the eol sequence
499    
500    Returns:    pointer to the last byte of the line
501    */
502    
503    static char *
504    end_of_line(char *p, char *endptr, int *lenptr)
505    {
506    switch(endlinetype)
507      {
508      default:      /* Just in case */
509      case EL_LF:
510      while (p < endptr && *p != '\n') p++;
511      if (p < endptr)
512        {
513        *lenptr = 1;
514        return p + 1;
515        }
516      *lenptr = 0;
517      return endptr;
518    
519      case EL_CR:
520      while (p < endptr && *p != '\r') p++;
521      if (p < endptr)
522        {
523        *lenptr = 1;
524        return p + 1;
525        }
526      *lenptr = 0;
527      return endptr;
528    
529      case EL_CRLF:
530      for (;;)
531        {
532        while (p < endptr && *p != '\r') p++;
533        if (++p >= endptr)
534          {
535          *lenptr = 0;
536          return endptr;
537          }
538        if (*p == '\n')
539          {
540          *lenptr = 2;
541          return p + 1;
542          }
543        }
544      break;
545    
546      case EL_ANY:
547      while (p < endptr)
548        {
549        int extra = 0;
550        register int c = *((unsigned char *)p);
551    
552        if (utf8 && c >= 0xc0)
553          {
554          int gcii, gcss;
555          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
556          gcss = 6*extra;
557          c = (c & utf8_table3[extra]) << gcss;
558          for (gcii = 1; gcii <= extra; gcii++)
559            {
560            gcss -= 6;
561            c |= (p[gcii] & 0x3f) << gcss;
562            }
563          }
564    
565        p += 1 + extra;
566    
567        switch (c)
568          {
569          case 0x0a:    /* LF */
570          case 0x0b:    /* VT */
571          case 0x0c:    /* FF */
572          *lenptr = 1;
573          return p;
574    
575          case 0x0d:    /* CR */
576          if (p < endptr && *p == 0x0a)
577            {
578            *lenptr = 2;
579            p++;
580            }
581          else *lenptr = 1;
582          return p;
583    
584          case 0x85:    /* NEL */
585          *lenptr = utf8? 2 : 1;
586          return p;
587    
588          case 0x2028:  /* LS */
589          case 0x2029:  /* PS */
590          *lenptr = 3;
591          return p;
592    
593          default:
594          break;
595          }
596        }   /* End of loop for ANY case */
597    
598      *lenptr = 0;  /* Must have hit the end */
599      return endptr;
600      }     /* End of overall switch */
601    }
602    
603    
604    
605    /*************************************************
606    *         Find start of previous line            *
607    *************************************************/
608    
609    /* This is called when looking back for before lines to print.
610    
611    Arguments:
612      p         start of the subsequent line
613      startptr  start of available data
614    
615    Returns:    pointer to the start of the previous line
616    */
617    
618    static char *
619    previous_line(char *p, char *startptr)
620    {
621    switch(endlinetype)
622      {
623      default:      /* Just in case */
624      case EL_LF:
625      p--;
626      while (p > startptr && p[-1] != '\n') p--;
627      return p;
628    
629      case EL_CR:
630      p--;
631      while (p > startptr && p[-1] != '\n') p--;
632      return p;
633    
634      case EL_CRLF:
635      for (;;)
636        {
637        p -= 2;
638        while (p > startptr && p[-1] != '\n') p--;
639        if (p <= startptr + 1 || p[-2] == '\r') return p;
640        }
641      return p;   /* But control should never get here */
642    
643      case EL_ANY:
644      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
645      if (utf8) while ((*p & 0xc0) == 0x80) p--;
646    
647      while (p > startptr)
648        {
649        register int c;
650        char *pp = p - 1;
651    
652        if (utf8)
653          {
654          int extra = 0;
655          while ((*pp & 0xc0) == 0x80) pp--;
656          c = *((unsigned char *)pp);
657          if (c >= 0xc0)
658            {
659            int gcii, gcss;
660            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
661            gcss = 6*extra;
662            c = (c & utf8_table3[extra]) << gcss;
663            for (gcii = 1; gcii <= extra; gcii++)
664              {
665              gcss -= 6;
666              c |= (pp[gcii] & 0x3f) << gcss;
667              }
668            }
669          }
670        else c = *((unsigned char *)pp);
671    
672        switch (c)
673          {
674          case 0x0a:    /* LF */
675          case 0x0b:    /* VT */
676          case 0x0c:    /* FF */
677          case 0x0d:    /* CR */
678          case 0x85:    /* NEL */
679          case 0x2028:  /* LS */
680          case 0x2029:  /* PS */
681          return p;
682    
683          default:
684          break;
685          }
686    
687        p = pp;  /* Back one character */
688        }        /* End of loop for ANY case */
689    
690      return startptr;  /* Hit start of data */
691      }     /* End of overall switch */
692    }
693    
694    
695    
696    
697    
698    /*************************************************
699    *       Print the previous "after" lines         *
700  *************************************************/  *************************************************/
701    
702    /* This is called if we are about to lose said lines because of buffer filling,
703    and at the end of the file. The data in the line is written using fwrite() so
704    that a binary zero does not terminate it.
705    
706    Arguments:
707      lastmatchnumber   the number of the last matching line, plus one
708      lastmatchrestart  where we restarted after the last match
709      endptr            end of available data
710      printname         filename for printing
711    
712    Returns:            nothing
713    */
714    
715    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
716      char *endptr, char *printname)
717    {
718    if (after_context > 0 && lastmatchnumber > 0)
719      {
720      int count = 0;
721      while (lastmatchrestart < endptr && count++ < after_context)
722        {
723        int ellength;
724        char *pp = lastmatchrestart;
725        if (printname != NULL) fprintf(stdout, "%s-", printname);
726        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
727        pp = end_of_line(pp, endptr, &ellength);
728        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
729        lastmatchrestart = pp;
730        }
731      hyphenpending = TRUE;
732      }
733    }
734    
735    
736    
737    /*************************************************
738    *            Grep an individual file             *
739    *************************************************/
740    
741    /* This is called from grep_or_recurse() below. It uses a buffer that is three
742    times the value of MBUFTHIRD. The matching point is never allowed to stray into
743    the top third of the buffer, thus keeping more of the file available for
744    context printing or for multiline scanning. For large files, the pointer will
745    be in the middle third most of the time, so the bottom third is available for
746    "before" context printing.
747    
748    Arguments:
749      in           the fopened FILE stream
750      printname    the file name if it is to be printed for each match
751                   or NULL if the file name is not to be printed
752                   it cannot be NULL if filenames[_nomatch]_only is set
753    
754    Returns:       0 if there was at least one match
755                   1 otherwise (no matches)
756    */
757    
758  static int  static int
759  pcregrep(FILE *in, char *name)  pcregrep(FILE *in, char *printname)
760  {  {
761  int rc = 1;  int rc = 1;
762  int linenumber = 0;  int linenumber = 1;
763    int lastmatchnumber = 0;
764  int count = 0;  int count = 0;
765  int offsets[99];  int offsets[99];
766  char buffer[BUFSIZ];  char *lastmatchrestart = NULL;
767    char buffer[3*MBUFTHIRD];
768    char *ptr = buffer;
769    char *endptr;
770    size_t bufflength;
771    BOOL endhyphenpending = FALSE;
772    
773    /* Do the first read into the start of the buffer and set up the pointer to
774    end of what we have. */
775    
776    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
777    endptr = buffer + bufflength;
778    
779    /* Loop while the current pointer is not at the end of the file. For large
780    files, endptr will be at the end of the buffer when we are in the middle of the
781    file, but ptr will never get there, because as soon as it gets over 2/3 of the
782    way, the buffer is shifted left and re-filled. */
783    
784  while (fgets(buffer, sizeof(buffer), in) != NULL)  while (ptr < endptr)
785    {    {
786      int i, endlinelength;
787      int mrc = 0;
788    BOOL match = FALSE;    BOOL match = FALSE;
789    int i;    char *t = ptr;
790    int length = (int)strlen(buffer);    size_t length, linelength;
791    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;  
792    linenumber++;    /* At this point, ptr is at the start of a line. We need to find the length
793      of the subject string to pass to pcre_exec(). In multiline mode, it is the
794      length remainder of the data in the buffer. Otherwise, it is the length of
795      the next line. After matching, we always advance by the length of the next
796      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
797      that any match is constrained to be in the first line. */
798    
799      t = end_of_line(t, endptr, &endlinelength);
800      linelength = t - ptr - endlinelength;
801      length = multiline? endptr - ptr : linelength;
802    
803      /* Extra processing for Jeffrey Friedl's debugging. */
804    
805    #ifdef JFRIEDL_DEBUG
806      if (jfriedl_XT || jfriedl_XR)
807      {
808          #include <sys/time.h>
809          #include <time.h>
810          struct timeval start_time, end_time;
811          struct timezone dummy;
812    
813          if (jfriedl_XT)
814          {
815              unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
816              const char *orig = ptr;
817              ptr = malloc(newlen + 1);
818              if (!ptr) {
819                      printf("out of memory");
820                      exit(2);
821              }
822              endptr = ptr;
823              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
824              for (i = 0; i < jfriedl_XT; i++) {
825                      strncpy(endptr, orig,  length);
826                      endptr += length;
827              }
828              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
829              length = newlen;
830          }
831    
832          if (gettimeofday(&start_time, &dummy) != 0)
833                  perror("bad gettimeofday");
834    
835    
836          for (i = 0; i < jfriedl_XR; i++)
837              match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
838    
839          if (gettimeofday(&end_time, &dummy) != 0)
840                  perror("bad gettimeofday");
841    
842    for (i = 0; !match && i < pattern_count; i++)        double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
843                          -
844                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
845    
846          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
847          return 0;
848      }
849    #endif
850    
851    
852      /* Run through all the patterns until one matches. Note that we don't include
853      the final newline in the subject string. */
854    
855      for (i = 0; i < pattern_count; i++)
856      {      {
857      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
858        offsets, 99) >= 0;        offsets, 99);
859      if (match && whole_lines && offsets[1] != length) match = FALSE;      if (mrc >= 0) { match = TRUE; break; }
860        if (mrc != PCRE_ERROR_NOMATCH)
861          {
862          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
863          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
864          fprintf(stderr, "this line:\n");
865          fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */
866          fprintf(stderr, "\n");
867          if (error_count == 0 &&
868              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
869            {
870            fprintf(stderr, "pcregrep: error %d means that a resource limit "
871              "was exceeded\n", mrc);
872            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
873            }
874          if (error_count++ > 20)
875            {
876            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
877            exit(2);
878            }
879          match = invert;    /* No more matching; don't show the line again */
880          break;
881          }
882      }      }
883    
884      /* If it's a match or a not-match (as required), do what's wanted. */
885    
886    if (match != invert)    if (match != invert)
887      {      {
888        BOOL hyphenprinted = FALSE;
889    
890        /* We've failed if we want a file that doesn't have any matches. */
891    
892        if (filenames == FN_NOMATCH_ONLY) return 1;
893    
894        /* Just count if just counting is wanted. */
895    
896      if (count_only) count++;      if (count_only) count++;
897    
898      else if (filenames_only)      /* If all we want is a file name, there is no need to scan any more lines
899        in the file. */
900    
901        else if (filenames == FN_ONLY)
902        {        {
903        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        fprintf(stdout, "%s\n", printname);
904        return 0;        return 0;
905        }        }
906    
907      else if (silent) return 0;      /* Likewise, if all we want is a yes/no answer. */
908    
909        else if (quiet) return 0;
910    
911        /* The --only-matching option prints just the substring that matched, and
912        does not pring any context. */
913    
914        else if (only_matching)
915          {
916          if (printname != NULL) fprintf(stdout, "%s:", printname);
917          if (number) fprintf(stdout, "%d:", linenumber);
918          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
919          fprintf(stdout, "\n");
920          }
921    
922        /* This is the default case when none of the above options is set. We print
923        the matching lines(s), possibly preceded and/or followed by other lines of
924        context. */
925    
926      else      else
927        {        {
928        if (name != NULL) fprintf(stdout, "%s:", name);        /* See if there is a requirement to print some "after" lines from a
929          previous match. We never print any overlaps. */
930    
931          if (after_context > 0 && lastmatchnumber > 0)
932            {
933            int ellength;
934            int linecount = 0;
935            char *p = lastmatchrestart;
936    
937            while (p < ptr && linecount < after_context)
938              {
939              p = end_of_line(p, ptr, &ellength);
940              linecount++;
941              }
942    
943            /* It is important to advance lastmatchrestart during this printing so
944            that it interacts correctly with any "before" printing below. Print
945            each line's data using fwrite() in case there are binary zeroes. */
946    
947            while (lastmatchrestart < p)
948              {
949              char *pp = lastmatchrestart;
950              if (printname != NULL) fprintf(stdout, "%s-", printname);
951              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
952              pp = end_of_line(pp, endptr, &ellength);
953              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
954              lastmatchrestart = pp;
955              }
956            if (lastmatchrestart != ptr) hyphenpending = TRUE;
957            }
958    
959          /* If there were non-contiguous lines printed above, insert hyphens. */
960    
961          if (hyphenpending)
962            {
963            fprintf(stdout, "--\n");
964            hyphenpending = FALSE;
965            hyphenprinted = TRUE;
966            }
967    
968          /* See if there is a requirement to print some "before" lines for this
969          match. Again, don't print overlaps. */
970    
971          if (before_context > 0)
972            {
973            int linecount = 0;
974            char *p = ptr;
975    
976            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
977                   linecount < before_context)
978              {
979              linecount++;
980              p = previous_line(p, buffer);
981              }
982    
983            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
984              fprintf(stdout, "--\n");
985    
986            while (p < ptr)
987              {
988              int ellength;
989              char *pp = p;
990              if (printname != NULL) fprintf(stdout, "%s-", printname);
991              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
992              pp = end_of_line(pp, endptr, &ellength);
993              fwrite(p, 1, pp - p, stdout);
994              p = pp;
995              }
996            }
997    
998          /* Now print the matching line(s); ensure we set hyphenpending at the end
999          of the file if any context lines are being output. */
1000    
1001          if (after_context > 0 || before_context > 0)
1002            endhyphenpending = TRUE;
1003    
1004          if (printname != NULL) fprintf(stdout, "%s:", printname);
1005        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
1006        fprintf(stdout, "%s\n", buffer);  
1007          /* In multiline mode, we want to print to the end of the line in which
1008          the end of the matched string is found, so we adjust linelength and the
1009          line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1010          start of the match will always be before the first newline sequence. */
1011    
1012          if (multiline)
1013            {
1014            int ellength;
1015            char *endmatch = ptr + offsets[1];
1016            t = ptr;
1017            while (t < endmatch)
1018              {
1019              t = end_of_line(t, endptr, &ellength);
1020              if (t <= endmatch) linenumber++; else break;
1021              }
1022            endmatch = end_of_line(endmatch, endptr, &ellength);
1023            linelength = endmatch - ptr - ellength;
1024            }
1025    
1026          /*** NOTE: Use only fwrite() to output the data line, so that binary
1027          zeroes are treated as just another data character. */
1028    
1029          /* This extra option, for Jeffrey Friedl's debugging requirements,
1030          replaces the matched string, or a specific captured string if it exists,
1031          with X. When this happens, colouring is ignored. */
1032    
1033    #ifdef JFRIEDL_DEBUG
1034          if (S_arg >= 0 && S_arg < mrc)
1035            {
1036            int first = S_arg * 2;
1037            int last  = first + 1;
1038            fwrite(ptr, 1, offsets[first], stdout);
1039            fprintf(stdout, "X");
1040            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1041            }
1042          else
1043    #endif
1044    
1045          /* We have to split the line(s) up if colouring. */
1046    
1047          if (do_colour)
1048            {
1049            fwrite(ptr, 1, offsets[0], stdout);
1050            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1051            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1052            fprintf(stdout, "%c[00m", 0x1b);
1053            fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1054            }
1055          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1056        }        }
1057    
1058      rc = 0;      /* End of doing what has to be done for a match */
1059    
1060        rc = 0;    /* Had some success */
1061    
1062        /* Remember where the last match happened for after_context. We remember
1063        where we are about to restart, and that line's number. */
1064    
1065        lastmatchrestart = ptr + linelength + endlinelength;
1066        lastmatchnumber = linenumber + 1;
1067      }      }
1068    
1069      /* Advance to after the newline and increment the line number. */
1070    
1071      ptr += linelength + endlinelength;
1072      linenumber++;
1073    
1074      /* If we haven't yet reached the end of the file (the buffer is full), and
1075      the current point is in the top 1/3 of the buffer, slide the buffer down by
1076      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1077      about to be lost, print them. */
1078    
1079      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1080        {
1081        if (after_context > 0 &&
1082            lastmatchnumber > 0 &&
1083            lastmatchrestart < buffer + MBUFTHIRD)
1084          {
1085          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1086          lastmatchnumber = 0;
1087          }
1088    
1089        /* Now do the shuffle */
1090    
1091        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1092        ptr -= MBUFTHIRD;
1093        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1094        endptr = buffer + bufflength;
1095    
1096        /* Adjust any last match point */
1097    
1098        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1099        }
1100      }     /* Loop through the whole file */
1101    
1102    /* End of file; print final "after" lines if wanted; do_after_lines sets
1103    hyphenpending if it prints something. */
1104    
1105    if (!only_matching && !count_only)
1106      {
1107      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1108      hyphenpending |= endhyphenpending;
1109      }
1110    
1111    /* Print the file name if we are looking for those without matches and there
1112    were none. If we found a match, we won't have got this far. */
1113    
1114    if (filenames == FN_NOMATCH_ONLY)
1115      {
1116      fprintf(stdout, "%s\n", printname);
1117      return 0;
1118    }    }
1119    
1120    /* Print the match count if wanted */
1121    
1122  if (count_only)  if (count_only)
1123    {    {
1124    if (name != NULL) fprintf(stdout, "%s:", name);    if (printname != NULL) fprintf(stdout, "%s:", printname);
1125    fprintf(stdout, "%d\n", count);    fprintf(stdout, "%d\n", count);
1126    }    }
1127    
# Line 220  return rc; Line 1130  return rc;
1130    
1131    
1132    
   
1133  /*************************************************  /*************************************************
1134  *     Grep a file or recurse into a directory    *  *     Grep a file or recurse into a directory    *
1135  *************************************************/  *************************************************/
1136    
1137    /* Given a path name, if it's a directory, scan all the files if we are
1138    recursing; if it's a file, grep it.
1139    
1140    Arguments:
1141      pathname          the path to investigate
1142      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1143      only_one_at_top   TRUE if the path is the only one at toplevel
1144    
1145    Returns:   0 if there was at least one match
1146               1 if there were no matches
1147               2 there was some kind of error
1148    
1149    However, file opening failures are suppressed if "silent" is set.
1150    */
1151    
1152  static int  static int
1153  grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
   BOOL only_one_at_top)  
1154  {  {
1155  int rc = 1;  int rc = 1;
1156  int sep;  int sep;
1157  FILE *in;  FILE *in;
1158    
1159  /* If the file is a directory and we are recursing, scan each file within it.  /* If the file name is "-" we scan stdin */
1160    
1161    if (strcmp(pathname, "-") == 0)
1162      {
1163      return pcregrep(stdin,
1164        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1165          stdin_name : NULL);
1166      }
1167    
1168    
1169    /* If the file is a directory, skip if skipping or if we are recursing, scan
1170    each file within it, subject to any include or exclude patterns that were set.
1171  The scanning code is localized so it can be made system-specific. */  The scanning code is localized so it can be made system-specific. */
1172    
1173  if ((sep = isdirectory(filename)) != 0 && recurse)  if ((sep = isdirectory(pathname)) != 0)
1174    {    {
1175    char buffer[1024];    if (dee_action == dee_SKIP) return 1;
1176    char *nextfile;    if (dee_action == dee_RECURSE)
   directory_type *dir = opendirectory(filename);  
   
   if (dir == NULL)  
1177      {      {
1178      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,      char buffer[1024];
1179        strerror(errno));      char *nextfile;
1180      return 2;      directory_type *dir = opendirectory(pathname);
     }  
1181    
1182    while ((nextfile = readdirectory(dir)) != NULL)      if (dir == NULL)
1183      {        {
1184      int frc;        if (!silent)
1185      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);          fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1186      frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);            strerror(errno));
1187      if (frc == 0 && rc == 1) rc = 0;        return 2;
1188      }        }
1189    
1190        while ((nextfile = readdirectory(dir)) != NULL)
1191          {
1192          int frc, blen;
1193          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1194          blen = strlen(buffer);
1195    
1196          if (exclude_compiled != NULL &&
1197              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1198            continue;
1199    
1200          if (include_compiled != NULL &&
1201              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1202            continue;
1203    
1204          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1205          if (frc > 1) rc = frc;
1206           else if (frc == 0 && rc == 1) rc = 0;
1207          }
1208    
1209    closedirectory(dir);      closedirectory(dir);
1210    return rc;      return rc;
1211        }
1212    }    }
1213    
1214  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* If the file is not a directory and not a regular file, skip it if that's
1215  the first and only argument at top level, we don't show the file name.  been requested. */
1216  Otherwise, control is via the show_filenames variable. */  
1217    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1218    
1219    /* Control reaches here if we have a regular file, or if we have a directory
1220    and recursion or skipping was not requested, or if we have anything else and
1221    skipping was not requested. The scan proceeds. If this is the first and only
1222    argument at top level, we don't show the file name, unless we are only showing
1223    the file name, or the filename was forced (-H). */
1224    
1225  in = fopen(filename, "r");  in = fopen(pathname, "r");
1226  if (in == NULL)  if (in == NULL)
1227    {    {
1228    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));    if (!silent)
1229        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1230          strerror(errno));
1231    return 2;    return 2;
1232    }    }
1233    
1234  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);  rc = pcregrep(in, (filenames > FN_DEFAULT ||
1235      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1236    
1237  fclose(in);  fclose(in);
1238  return rc;  return rc;
1239  }  }
# Line 287  return rc; Line 1248  return rc;
1248  static int  static int
1249  usage(int rc)  usage(int rc)
1250  {  {
1251  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  option_item *op;
1252    fprintf(stderr, "Usage: pcregrep [-");
1253    for (op = optionlist; op->one_char != 0; op++)
1254      {
1255      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1256      }
1257    fprintf(stderr, "] [long options] [pattern] [files]\n");
1258  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1259  return rc;  return rc;
1260  }  }
# Line 304  help(void) Line 1271  help(void)
1271  {  {
1272  option_item *op;  option_item *op;
1273    
1274  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1275  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1276    printf("PATTERN must be present if neither -e nor -f is used.\n");
1277    printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1278  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1279    
1280  printf("Options:\n");  printf("Options:\n");
# Line 321  for (op = optionlist; op->one_char != 0; Line 1290  for (op = optionlist; op->one_char != 0;
1290    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1291    }    }
1292    
1293  printf("\n  -f<filename>  or  --file=<filename>\n");  printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1294  printf("    Read patterns from <filename> instead of using a command line option.\n");  printf("trailing white space is removed and blank lines are ignored.\n");
1295  printf("    Trailing white space is removed; blanks lines are ignored.\n");  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
1296    
1297  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1298  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1299  }  }
1300    
# Line 334  printf("Exit status is 0 if any matches, Line 1302  printf("Exit status is 0 if any matches,
1302    
1303    
1304  /*************************************************  /*************************************************
1305  *                Handle an option                *  *    Handle a single-letter, no data option      *
1306  *************************************************/  *************************************************/
1307    
1308  static int  static int
# Line 342  handle_option(int letter, int options) Line 1310  handle_option(int letter, int options)
1310  {  {
1311  switch(letter)  switch(letter)
1312    {    {
1313    case -1:  help(); exit(0);    case N_HELP: help(); exit(0);
1314    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1315    case 'h': filenames = FALSE; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1316      case 'H': filenames = FN_FORCE; break;
1317      case 'h': filenames = FN_NONE; break;
1318    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
1319    case 'l': filenames_only = TRUE;    case 'l': filenames = FN_ONLY; break;
1320      case 'L': filenames = FN_NOMATCH_ONLY; break;
1321      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1322    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
1323    case 'r': recurse = TRUE; break;    case 'o': only_matching = TRUE; break;
1324      case 'q': quiet = TRUE; break;
1325      case 'r': dee_action = dee_RECURSE; break;
1326    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1327      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1328    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1329    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1330      case 'x': process_options |= PO_LINE_MATCH; break;
1331    
1332    case 'V':    case 'V':
1333    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s\n", pcre_version());
   fprintf(stderr, "PCRE version %s\n", pcre_version());  
1334    exit(0);    exit(0);
1335    break;    break;
1336    
# Line 371  return options; Line 1346  return options;
1346    
1347    
1348  /*************************************************  /*************************************************
1349    *          Construct printed ordinal             *
1350    *************************************************/
1351    
1352    /* This turns a number into "1st", "3rd", etc. */
1353    
1354    static char *
1355    ordin(int n)
1356    {
1357    static char buffer[8];
1358    char *p = buffer;
1359    sprintf(p, "%d", n);
1360    while (*p != 0) p++;
1361    switch (n%10)
1362      {
1363      case 1: strcpy(p, "st"); break;
1364      case 2: strcpy(p, "nd"); break;
1365      case 3: strcpy(p, "rd"); break;
1366      default: strcpy(p, "th"); break;
1367      }
1368    return buffer;
1369    }
1370    
1371    
1372    
1373    /*************************************************
1374    *          Compile a single pattern              *
1375    *************************************************/
1376    
1377    /* When the -F option has been used, this is called for each substring.
1378    Otherwise it's called for each supplied pattern.
1379    
1380    Arguments:
1381      pattern        the pattern string
1382      options        the PCRE options
1383      filename       the file name, or NULL for a command-line pattern
1384      count          0 if this is the only command line pattern, or
1385                     number of the command line pattern, or
1386                     linenumber for a pattern from a file
1387    
1388    Returns:         TRUE on success, FALSE after an error
1389    */
1390    
1391    static BOOL
1392    compile_single_pattern(char *pattern, int options, char *filename, int count)
1393    {
1394    char buffer[MBUFTHIRD + 16];
1395    const char *error;
1396    int errptr;
1397    
1398    if (pattern_count >= MAX_PATTERN_COUNT)
1399      {
1400      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1401        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1402      return FALSE;
1403      }
1404    
1405    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1406      suffix[process_options]);
1407    pattern_list[pattern_count] =
1408      pcre_compile(buffer, options, &error, &errptr, pcretables);
1409    if (pattern_list[pattern_count++] != NULL) return TRUE;
1410    
1411    /* Handle compile errors */
1412    
1413    errptr -= (int)strlen(prefix[process_options]);
1414    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1415    
1416    if (filename == NULL)
1417      {
1418      if (count == 0)
1419        fprintf(stderr, "pcregrep: Error in command-line regex "
1420          "at offset %d: %s\n", errptr, error);
1421      else
1422        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1423          "at offset %d: %s\n", ordin(count), errptr, error);
1424      }
1425    else
1426      {
1427      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1428        "at offset %d: %s\n", count, filename, errptr, error);
1429      }
1430    
1431    return FALSE;
1432    }
1433    
1434    
1435    
1436    /*************************************************
1437    *           Compile one supplied pattern         *
1438    *************************************************/
1439    
1440    /* When the -F option has been used, each string may be a list of strings,
1441    separated by line breaks. They will be matched literally.
1442    
1443    Arguments:
1444      pattern        the pattern string
1445      options        the PCRE options
1446      filename       the file name, or NULL for a command-line pattern
1447      count          0 if this is the only command line pattern, or
1448                     number of the command line pattern, or
1449                     linenumber for a pattern from a file
1450    
1451    Returns:         TRUE on success, FALSE after an error
1452    */
1453    
1454    static BOOL
1455    compile_pattern(char *pattern, int options, char *filename, int count)
1456    {
1457    if ((process_options & PO_FIXED_STRINGS) != 0)
1458      {
1459      char *eop = pattern + strlen(pattern);
1460      char buffer[MBUFTHIRD];
1461      for(;;)
1462        {
1463        int ellength;
1464        char *p = end_of_line(pattern, eop, &ellength);
1465        if (ellength == 0)
1466          return compile_single_pattern(pattern, options, filename, count);
1467        sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1468        pattern = p;
1469        if (!compile_single_pattern(buffer, options, filename, count))
1470          return FALSE;
1471        }
1472      }
1473    else return compile_single_pattern(pattern, options, filename, count);
1474    }
1475    
1476    
1477    
1478    /*************************************************
1479  *                Main program                    *  *                Main program                    *
1480  *************************************************/  *************************************************/
1481    
1482    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1483    
1484  int  int
1485  main(int argc, char **argv)  main(int argc, char **argv)
1486  {  {
1487  int i, j;  int i, j;
1488  int rc = 1;  int rc = 1;
1489  int options = 0;  int pcre_options = 0;
1490    int cmd_pattern_count = 0;
1491  int errptr;  int errptr;
 const char *error;  
1492  BOOL only_one_at_top;  BOOL only_one_at_top;
1493    char *patterns[MAX_PATTERN_COUNT];
1494    const char *locale_from = "--locale";
1495    const char *error;
1496    
1497    /* Set the default line ending value from the default in the PCRE library;
1498    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1499    */
1500    
1501    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1502    switch(i)
1503      {
1504      default:                 newline = (char *)"lf"; break;
1505      case '\r':               newline = (char *)"cr"; break;
1506      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1507      case -1:                 newline = (char *)"any"; break;
1508      }
1509    
1510  /* Process the options */  /* Process the options */
1511    
1512  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
1513    {    {
1514      option_item *op = NULL;
1515      char *option_data = (char *)"";    /* default to keep compiler happy */
1516      BOOL longop;
1517      BOOL longopwasequals = FALSE;
1518    
1519    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
1520    
1521    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
1522      but only if we have previously had -e or -f to define the patterns. */
1523    
1524      if (argv[i][1] == 0)
1525        {
1526        if (pattern_filename != NULL || pattern_count > 0) break;
1527          else exit(usage(2));
1528        }
1529    
1530      /* Handle a long name option, or -- to terminate the options */
1531    
1532    if (argv[i][1] == '-')    if (argv[i][1] == '-')
1533      {      {
1534      option_item *op;      char *arg = argv[i] + 2;
1535        char *argequals = strchr(arg, '=');
1536    
1537      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
1538        {        {
1539        pattern_filename = argv[i] + 7;        i++;
1540        continue;        break;                /* out of the options-handling loop */
1541        }        }
1542    
1543        longop = TRUE;
1544    
1545        /* Some long options have data that follows after =, for example file=name.
1546        Some options have variations in the long name spelling: specifically, we
1547        allow "regexp" because GNU grep allows it, though I personally go along
1548        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1549        These options are entered in the table as "regex(p)". No option is in both
1550        these categories, fortunately. */
1551    
1552      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
1553        {        {
1554        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
1555          char *equals = strchr(op->long_name, '=');
1556          if (opbra == NULL)     /* Not a (p) case */
1557          {          {
1558          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
1559          break;            {
1560              if (strcmp(arg, op->long_name) == 0) break;
1561              }
1562            else                 /* Special case xxx=data */
1563              {
1564              int oplen = equals - op->long_name;
1565              int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1566              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1567                {
1568                option_data = arg + arglen;
1569                if (*option_data == '=')
1570                  {
1571                  option_data++;
1572                  longopwasequals = TRUE;
1573                  }
1574                break;
1575                }
1576              }
1577            }
1578          else                   /* Special case xxxx(p) */
1579            {
1580            char buff1[24];
1581            char buff2[24];
1582            int baselen = opbra - op->long_name;
1583            sprintf(buff1, "%.*s", baselen, op->long_name);
1584            sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1585              opbra + 1);
1586            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1587              break;
1588          }          }
1589        }        }
1590    
1591      if (op->one_char == 0)      if (op->one_char == 0)
1592        {        {
1593        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 417  for (i = 1; i < argc; i++) Line 1595  for (i = 1; i < argc; i++)
1595        }        }
1596      }      }
1597    
1598    /* One-char options */  
1599      /* Jeffrey Friedl's debugging harness uses these additional options which
1600      are not in the right form for putting in the option table because they use
1601      only one hyphen, yet are more than one character long. By putting them
1602      separately here, they will not get displayed as part of the help() output,
1603      but I don't think Jeffrey will care about that. */
1604    
1605    #ifdef JFRIEDL_DEBUG
1606      else if (strcmp(argv[i], "-pre") == 0) {
1607              jfriedl_prefix = argv[++i];
1608              continue;
1609      } else if (strcmp(argv[i], "-post") == 0) {
1610              jfriedl_postfix = argv[++i];
1611              continue;
1612      } else if (strcmp(argv[i], "-XT") == 0) {
1613              sscanf(argv[++i], "%d", &jfriedl_XT);
1614              continue;
1615      } else if (strcmp(argv[i], "-XR") == 0) {
1616              sscanf(argv[++i], "%d", &jfriedl_XR);
1617              continue;
1618      }
1619    #endif
1620    
1621    
1622      /* One-char options; many that have no data may be in a single argument; we
1623      continue till we hit the last one or one that needs data. */
1624    
1625    else    else
1626      {      {
1627      char *s = argv[i] + 1;      char *s = argv[i] + 1;
1628        longop = FALSE;
1629      while (*s != 0)      while (*s != 0)
1630        {        {
1631        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
1632            { if (*s == op->one_char) break; }
1633          if (op->one_char == 0)
1634          {          {
1635          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1636          if (pattern_filename[0] == 0)            *s, argv[i]);
1637            {          exit(usage(2));
1638            if (i >= argc - 1)          }
1639              {        if (op->type != OP_NODATA || s[1] == 0)
1640              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
1641              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
1642          break;          break;
1643          }          }
1644        else options = handle_option(*s++, options);        pcre_options = handle_option(*s++, pcre_options);
1645        }        }
1646      }      }
   }  
1647    
1648  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));    /* At this point we should have op pointing to a matched option. If the type
1649  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));    is NO_DATA, it means that there is no data, and the option might set
1650      something in the PCRE options. */
1651    
1652  if (pattern_list == NULL || hints_list == NULL)    if (op->type == OP_NODATA)
1653    {      {
1654    fprintf(stderr, "pcregrep: malloc failed\n");      pcre_options = handle_option(op->one_char, pcre_options);
1655    return 2;      continue;
1656    }      }
1657    
1658  /* Compile the regular expression(s). */    /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1659      either has a value or defaults to something. It cannot have data in a
1660      separate item. At the moment, the only such options are "colo(u)r" and
1661      Jeffrey Friedl's special -S debugging option. */
1662    
1663  if (pattern_filename != NULL)    if (*option_data == 0 &&
1664    {        (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
   FILE *f = fopen(pattern_filename, "r");  
   char buffer[BUFSIZ];  
   if (f == NULL)  
1665      {      {
1666      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      switch (op->one_char)
1667        strerror(errno));        {
1668      return 2;        case N_COLOUR:
1669          colour_option = (char *)"auto";
1670          break;
1671    #ifdef JFRIEDL_DEBUG
1672          case 'S':
1673          S_arg = 0;
1674          break;
1675    #endif
1676          }
1677        continue;
1678      }      }
1679    while (fgets(buffer, sizeof(buffer), f) != NULL)  
1680      /* Otherwise, find the data string for the option. */
1681    
1682      if (*option_data == 0)
1683      {      {
1684      char *s = buffer + (int)strlen(buffer);      if (i >= argc - 1 || longopwasequals)
     if (pattern_count >= MAX_PATTERN_COUNT)  
1685        {        {
1686        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1687          exit(usage(2));
1688          }
1689        option_data = argv[++i];
1690        }
1691    
1692      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1693      multiple times to create a list of patterns. */
1694    
1695      if (op->type == OP_PATLIST)
1696        {
1697        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1698          {
1699          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1700          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
1701        return 2;        return 2;
1702        }        }
1703      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      patterns[cmd_pattern_count++] = option_data;
1704      if (s == buffer) continue;      }
1705      *s = 0;  
1706      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,    /* Otherwise, deal with single string or numeric data values. */
1707        &errptr, NULL);  
1708      if (pattern_list[pattern_count++] == NULL)    else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1709        {
1710        *((char **)op->dataptr) = option_data;
1711        }
1712      else
1713        {
1714        char *endptr;
1715        int n = strtoul(option_data, &endptr, 10);
1716        if (*endptr != 0)
1717        {        {
1718        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        if (longop)
1719          pattern_count, errptr, error);          {
1720        return 2;          char *equals = strchr(op->long_name, '=');
1721            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1722              equals - op->long_name;
1723            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1724              option_data, nlen, op->long_name);
1725            }
1726          else
1727            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1728              option_data, op->one_char);
1729          exit(usage(2));
1730        }        }
1731        *((int *)op->dataptr) = n;
1732        }
1733      }
1734    
1735    /* Options have been decoded. If -C was used, its value is used as a default
1736    for -A and -B. */
1737    
1738    if (both_context > 0)
1739      {
1740      if (after_context == 0) after_context = both_context;
1741      if (before_context == 0) before_context = both_context;
1742      }
1743    
1744    /* If a locale has not been provided as an option, see if the LC_CTYPE or
1745    LC_ALL environment variable is set, and if so, use it. */
1746    
1747    if (locale == NULL)
1748      {
1749      locale = getenv("LC_ALL");
1750      locale_from = "LCC_ALL";
1751      }
1752    
1753    if (locale == NULL)
1754      {
1755      locale = getenv("LC_CTYPE");
1756      locale_from = "LC_CTYPE";
1757      }
1758    
1759    /* If a locale has been provided, set it, and generate the tables the PCRE
1760    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1761    
1762    if (locale != NULL)
1763      {
1764      if (setlocale(LC_CTYPE, locale) == NULL)
1765        {
1766        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1767          locale, locale_from);
1768        return 2;
1769        }
1770      pcretables = pcre_maketables();
1771      }
1772    
1773    /* Sort out colouring */
1774    
1775    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1776      {
1777      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1778      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1779      else
1780        {
1781        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1782          colour_option);
1783        return 2;
1784        }
1785      if (do_colour)
1786        {
1787        char *cs = getenv("PCREGREP_COLOUR");
1788        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1789        if (cs != NULL) colour_string = cs;
1790      }      }
   fclose(f);  
1791    }    }
1792    
1793  /* If no file name, a single regex must be given inline */  /* Interpret the newline type; the default settings are Unix-like. */
1794    
1795    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1796      {
1797      pcre_options |= PCRE_NEWLINE_CR;
1798      endlinetype = EL_CR;
1799      }
1800    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1801      {
1802      pcre_options |= PCRE_NEWLINE_LF;
1803      endlinetype = EL_LF;
1804      }
1805    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1806      {
1807      pcre_options |= PCRE_NEWLINE_CRLF;
1808      endlinetype = EL_CRLF;
1809      }
1810    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1811      {
1812      pcre_options |= PCRE_NEWLINE_ANY;
1813      endlinetype = EL_ANY;
1814      }
1815  else  else
1816    {    {
1817    if (i >= argc) return usage(0);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1818    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    return 2;
1819    if (pattern_list[0] == NULL)    }
1820    
1821    /* Interpret the text values for -d and -D */
1822    
1823    if (dee_option != NULL)
1824      {
1825      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1826      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1827      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1828      else
1829        {
1830        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1831        return 2;
1832        }
1833      }
1834    
1835    if (DEE_option != NULL)
1836      {
1837      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1838      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1839      else
1840      {      {
1841      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
       error);  
1842      return 2;      return 2;
1843      }      }
   pattern_count++;  
1844    }    }
1845    
1846  /* Study the regular expressions, as we will be running them may times */  /* Check the values for Jeffrey Friedl's debugging options. */
1847    
1848    #ifdef JFRIEDL_DEBUG
1849    if (S_arg > 9)
1850      {
1851      fprintf(stderr, "pcregrep: bad value for -S option\n");
1852      return 2;
1853      }
1854    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1855      {
1856      if (jfriedl_XT == 0) jfriedl_XT = 1;
1857      if (jfriedl_XR == 0) jfriedl_XR = 1;
1858      }
1859    #endif
1860    
1861    /* Get memory to store the pattern and hints lists. */
1862    
1863    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1864    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1865    
1866    if (pattern_list == NULL || hints_list == NULL)
1867      {
1868      fprintf(stderr, "pcregrep: malloc failed\n");
1869      goto EXIT2;
1870      }
1871    
1872    /* If no patterns were provided by -e, and there is no file provided by -f,
1873    the first argument is the one and only pattern, and it must exist. */
1874    
1875    if (cmd_pattern_count == 0 && pattern_filename == NULL)
1876      {
1877      if (i >= argc) return usage(2);
1878      patterns[cmd_pattern_count++] = argv[i++];
1879      }
1880    
1881    /* Compile the patterns that were provided on the command line, either by
1882    multiple uses of -e or as a single unkeyed pattern. */
1883    
1884    for (j = 0; j < cmd_pattern_count; j++)
1885      {
1886      if (!compile_pattern(patterns[j], pcre_options, NULL,
1887           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1888        goto EXIT2;
1889      }
1890    
1891    /* Compile the regular expressions that are provided in a file. */
1892    
1893    if (pattern_filename != NULL)
1894      {
1895      int linenumber = 0;
1896      FILE *f;
1897      char *filename;
1898      char buffer[MBUFTHIRD];
1899    
1900      if (strcmp(pattern_filename, "-") == 0)
1901        {
1902        f = stdin;
1903        filename = stdin_name;
1904        }
1905      else
1906        {
1907        f = fopen(pattern_filename, "r");
1908        if (f == NULL)
1909          {
1910          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1911            strerror(errno));
1912          goto EXIT2;
1913          }
1914        filename = pattern_filename;
1915        }
1916    
1917      while (fgets(buffer, MBUFTHIRD, f) != NULL)
1918        {
1919        char *s = buffer + (int)strlen(buffer);
1920        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1921        *s = 0;
1922        linenumber++;
1923        if (buffer[0] == 0) continue;   /* Skip blank lines */
1924        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1925          goto EXIT2;
1926        }
1927    
1928      if (f != stdin) fclose(f);
1929      }
1930    
1931    /* Study the regular expressions, as we will be running them many times */
1932    
1933  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
1934    {    {
# Line 513  for (j = 0; j < pattern_count; j++) Line 1938  for (j = 0; j < pattern_count; j++)
1938      char s[16];      char s[16];
1939      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);      if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1940      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1941      return 2;      goto EXIT2;
1942        }
1943      }
1944    
1945    /* If there are include or exclude patterns, compile them. */
1946    
1947    if (exclude_pattern != NULL)
1948      {
1949      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1950        pcretables);
1951      if (exclude_compiled == NULL)
1952        {
1953        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1954          errptr, error);
1955        goto EXIT2;
1956        }
1957      }
1958    
1959    if (include_pattern != NULL)
1960      {
1961      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1962        pcretables);
1963      if (include_compiled == NULL)
1964        {
1965        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1966          errptr, error);
1967        goto EXIT2;
1968      }      }
1969    }    }
1970    
1971  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit. */
1972    
1973  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc)
1974      {
1975      rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1976      goto EXIT;
1977      }
1978    
1979  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
1980  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
1981  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames are not
1982    otherwise forced. */
1983    
1984  only_one_at_top = (i == argc - 1);  only_one_at_top = i == argc - 1;   /* Catch initial value of i */
 if (filenames_only) filenames = TRUE;  
1985    
1986  for (; i < argc; i++)  for (; i < argc; i++)
1987    {    {
1988    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1989    if (frc == 0 && rc == 1) rc = 0;      only_one_at_top);
1990      if (frc > 1) rc = frc;
1991        else if (frc == 0 && rc == 1) rc = 0;
1992    }    }
1993    
1994    EXIT:
1995    if (pattern_list != NULL)
1996      {
1997      for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
1998      free(pattern_list);
1999      }
2000    if (hints_list != NULL)
2001      {
2002      for (i = 0; i < pattern_count; i++) free(hints_list[i]);
2003      free(hints_list);
2004      }
2005  return rc;  return rc;
2006    
2007    EXIT2:
2008    rc = 2;
2009    goto EXIT;
2010  }  }
2011    
2012  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.121

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12