/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2006 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40    #include <ctype.h>
41    #include <locale.h>
42  #include <stdio.h>  #include <stdio.h>
43  #include <string.h>  #include <string.h>
44  #include <stdlib.h>  #include <stdlib.h>
45  #include <errno.h>  #include <errno.h>
46    
47    #include <sys/types.h>
48    #include <sys/stat.h>
49    #include <unistd.h>
50    
51  #include "config.h"  #include "config.h"
52  #include "pcre.h"  #include "pcre.h"
53    
# Line 17  its pattern matching. */ Line 56  its pattern matching. */
56    
57  typedef int BOOL;  typedef int BOOL;
58    
59    #define VERSION "4.4 29-Nov-2006"
60    #define MAX_PATTERN_COUNT 100
61    
62    #if BUFSIZ > 8192
63    #define MBUFTHIRD BUFSIZ
64    #else
65    #define MBUFTHIRD 8192
66    #endif
67    
68    /* Values for the "filenames" variable, which specifies options for file name
69    output. The order is important; it is assumed that a file name is wanted for
70    all values greater than FN_DEFAULT. */
71    
72    enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
73    
74    /* Actions for the -d and -D options */
75    
76    enum { dee_READ, dee_SKIP, dee_RECURSE };
77    enum { DEE_READ, DEE_SKIP };
78    
79    /* Actions for special processing options (flag bits) */
80    
81    #define PO_WORD_MATCH     0x0001
82    #define PO_LINE_MATCH     0x0002
83    #define PO_FIXED_STRINGS  0x0004
84    
85    /* Line ending types */
86    
87    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };
88    
89    
90    
91  /*************************************************  /*************************************************
92  *               Global variables                 *  *               Global variables                 *
93  *************************************************/  *************************************************/
94    
95  static pcre *pattern;  /* Jeffrey Friedl has some debugging requirements that are not part of the
96  static pcre_extra *hints;  regular code. */
97    
98    #ifdef JFRIEDL_DEBUG
99    static int S_arg = -1;
100    static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
101    static unsigned int jfriedl_XT = 0; /* replicate text this many times */
102    static const char *jfriedl_prefix = "";
103    static const char *jfriedl_postfix = "";
104    #endif
105    
106    static int  endlinetype;
107    
108    static char *colour_string = (char *)"1;31";
109    static char *colour_option = NULL;
110    static char *dee_option = NULL;
111    static char *DEE_option = NULL;
112    static char *newline = NULL;
113    static char *pattern_filename = NULL;
114    static char *stdin_name = (char *)"(standard input)";
115    static char *locale = NULL;
116    
117    static const unsigned char *pcretables = NULL;
118    
119    static int  pattern_count = 0;
120    static pcre **pattern_list;
121    static pcre_extra **hints_list;
122    
123    static char *include_pattern = NULL;
124    static char *exclude_pattern = NULL;
125    
126    static pcre *include_compiled = NULL;
127    static pcre *exclude_compiled = NULL;
128    
129    static int after_context = 0;
130    static int before_context = 0;
131    static int both_context = 0;
132    static int dee_action = dee_READ;
133    static int DEE_action = DEE_READ;
134    static int error_count = 0;
135    static int filenames = FN_DEFAULT;
136    static int process_options = 0;
137    
138  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
139  static BOOL filenames_only = FALSE;  static BOOL do_colour = FALSE;
140    static BOOL hyphenpending = FALSE;
141  static BOOL invert = FALSE;  static BOOL invert = FALSE;
142    static BOOL multiline = FALSE;
143  static BOOL number = FALSE;  static BOOL number = FALSE;
144    static BOOL only_matching = FALSE;
145    static BOOL quiet = FALSE;
146  static BOOL silent = FALSE;  static BOOL silent = FALSE;
147  static BOOL whole_lines = FALSE;  static BOOL utf8 = FALSE;
148    
149    /* Structure for options and list of them */
150    
151    enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
152           OP_PATLIST };
153    
154    typedef struct option_item {
155      int type;
156      int one_char;
157      void *dataptr;
158      const char *long_name;
159      const char *help_text;
160    } option_item;
161    
162    /* Options without a single-letter equivalent get a negative value. This can be
163    used to identify them. */
164    
165    #define N_COLOUR    (-1)
166    #define N_EXCLUDE   (-2)
167    #define N_HELP      (-3)
168    #define N_INCLUDE   (-4)
169    #define N_LABEL     (-5)
170    #define N_LOCALE    (-6)
171    #define N_NULL      (-7)
172    
173    static option_item optionlist[] = {
174      { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
175      { OP_NODATA,    N_HELP,   NULL,              "help",          "display this help and exit" },
176      { OP_NUMBER,    'A',      &after_context,    "after-context=number", "set number of following context lines" },
177      { OP_NUMBER,    'B',      &before_context,   "before-context=number", "set number of prior context lines" },
178      { OP_OP_STRING, N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
179      { OP_NUMBER,    'C',      &both_context,     "context=number", "set number of context lines, before & after" },
180      { OP_NODATA,    'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
181      { OP_OP_STRING, N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
182      { OP_STRING,    'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
183      { OP_STRING,    'd',      &dee_option,       "directories=action", "how to handle directories" },
184      { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
185      { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
186      { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
187      { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
188      { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
189      { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
190      { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
191      { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
192      { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
193      { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
194      { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
195      { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },
196      { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
197      { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
198      { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
199      { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
200      { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
201      { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
202    #ifdef JFRIEDL_DEBUG
203      { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
204    #endif
205      { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
206      { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
207      { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
208      { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
209      { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
210      { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
211      { OP_NODATA,    0,        NULL,               NULL,            NULL }
212    };
213    
214    /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
215    options. These set the 1, 2, and 4 bits in process_options, respectively. Note
216    that the combination of -w and -x has the same effect as -x on its own, so we
217    can treat them as the same. */
218    
219    static const char *prefix[] = {
220      "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
221    
222    static const char *suffix[] = {
223      "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
224    
225    /* UTF-8 tables - used only when the newline setting is "all". */
226    
227    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
228    
229    const char utf8_table4[] = {
230      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
231      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
232      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
233      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
234    
235    
236    
237    /*************************************************
238    *            OS-specific functions               *
239    *************************************************/
240    
241    /* These functions are defined so that they can be made system specific,
242    although at present the only ones are for Unix, Win32, and for "no support". */
243    
244    
245    /************* Directory scanning in Unix ***********/
246    
247    #if IS_UNIX
248    #include <sys/types.h>
249    #include <sys/stat.h>
250    #include <dirent.h>
251    
252    typedef DIR directory_type;
253    
254    static int
255    isdirectory(char *filename)
256    {
257    struct stat statbuf;
258    if (stat(filename, &statbuf) < 0)
259      return 0;        /* In the expectation that opening as a file will fail */
260    return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
261    }
262    
263    static directory_type *
264    opendirectory(char *filename)
265    {
266    return opendir(filename);
267    }
268    
269    static char *
270    readdirectory(directory_type *dir)
271    {
272    for (;;)
273      {
274      struct dirent *dent = readdir(dir);
275      if (dent == NULL) return NULL;
276      if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
277        return dent->d_name;
278      }
279    return NULL;   /* Keep compiler happy; never executed */
280    }
281    
282    static void
283    closedirectory(directory_type *dir)
284    {
285    closedir(dir);
286    }
287    
288    
289    /************* Test for regular file in Unix **********/
290    
291    static int
292    isregfile(char *filename)
293    {
294    struct stat statbuf;
295    if (stat(filename, &statbuf) < 0)
296      return 1;        /* In the expectation that opening as a file will fail */
297    return (statbuf.st_mode & S_IFMT) == S_IFREG;
298    }
299    
300    
301    /************* Test stdout for being a terminal in Unix **********/
302    
303    static BOOL
304    is_stdout_tty(void)
305    {
306    return isatty(fileno(stdout));
307    }
308    
309    
310    /************* Directory scanning in Win32 ***********/
311    
312    /* I (Philip Hazel) have no means of testing this code. It was contributed by
313    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
314    when it did not exist. */
315    
316    
317    #elif HAVE_WIN32API
318    
319    #ifndef STRICT
320    # define STRICT
321    #endif
322    #ifndef WIN32_LEAN_AND_MEAN
323    # define WIN32_LEAN_AND_MEAN
324    #endif
325    #ifndef INVALID_FILE_ATTRIBUTES
326    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
327    #endif
328    
329    #include <windows.h>
330    
331    typedef struct directory_type
332    {
333    HANDLE handle;
334    BOOL first;
335    WIN32_FIND_DATA data;
336    } directory_type;
337    
338    int
339    isdirectory(char *filename)
340    {
341    DWORD attr = GetFileAttributes(filename);
342    if (attr == INVALID_FILE_ATTRIBUTES)
343      return 0;
344    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
345    }
346    
347    directory_type *
348    opendirectory(char *filename)
349    {
350    size_t len;
351    char *pattern;
352    directory_type *dir;
353    DWORD err;
354    len = strlen(filename);
355    pattern = (char *) malloc(len + 3);
356    dir = (directory_type *) malloc(sizeof(*dir));
357    if ((pattern == NULL) || (dir == NULL))
358      {
359      fprintf(stderr, "pcregrep: malloc failed\n");
360      exit(2);
361      }
362    memcpy(pattern, filename, len);
363    memcpy(&(pattern[len]), "\\*", 3);
364    dir->handle = FindFirstFile(pattern, &(dir->data));
365    if (dir->handle != INVALID_HANDLE_VALUE)
366      {
367      free(pattern);
368      dir->first = TRUE;
369      return dir;
370      }
371    err = GetLastError();
372    free(pattern);
373    free(dir);
374    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
375    return NULL;
376    }
377    
378    char *
379    readdirectory(directory_type *dir)
380    {
381    for (;;)
382      {
383      if (!dir->first)
384        {
385        if (!FindNextFile(dir->handle, &(dir->data)))
386          return NULL;
387        }
388      else
389        {
390        dir->first = FALSE;
391        }
392      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
393        return dir->data.cFileName;
394      }
395    #ifndef _MSC_VER
396    return NULL;   /* Keep compiler happy; never executed */
397    #endif
398    }
399    
400    void
401    closedirectory(directory_type *dir)
402    {
403    FindClose(dir->handle);
404    free(dir);
405    }
406    
407    
408    /************* Test for regular file in Win32 **********/
409    
410    /* I don't know how to do this, or if it can be done; assume all paths are
411    regular if they are not directories. */
412    
413    int isregfile(char *filename)
414    {
415    return !isdirectory(filename)
416    }
417    
418    
419    /************* Test stdout for being a terminal in Win32 **********/
420    
421    /* I don't know how to do this; assume never */
422    
423    static BOOL
424    is_stdout_tty(void)
425    {
426    FALSE;
427    }
428    
429    
430    /************* Directory scanning when we can't do it ***********/
431    
432    /* The type is void, and apart from isdirectory(), the functions do nothing. */
433    
434    #else
435    
436    typedef void directory_type;
437    
438    int isdirectory(char *filename) { return 0; }
439    directory_type * opendirectory(char *filename) {}
440    char *readdirectory(directory_type *dir) {}
441    void closedirectory(directory_type *dir) {}
442    
443    
444    /************* Test for regular when we can't do it **********/
445    
446    /* Assume all files are regular. */
447    
448    int isregfile(char *filename) { return 1; }
449    
450    
451    /************* Test stdout for being a terminal when we can't do it **********/
452    
453    static BOOL
454    is_stdout_tty(void)
455    {
456    return FALSE;
457    }
458    
459    
460    #endif
461    
462    
463    
# Line 58  return sys_errlist[n]; Line 484  return sys_errlist[n];
484    
485    
486  /*************************************************  /*************************************************
487  *              Grep an individual file           *  *             Find end of line                   *
488  *************************************************/  *************************************************/
489    
490  static int  /* The length of the endline sequence that is found is set via lenptr. This may
491  pcregrep(FILE *in, char *name)  be zero at the very end of the file if there is no line-ending sequence there.
492  {  
493  int rc = 1;  Arguments:
494  int linenumber = 0;    p         current position in line
495  int count = 0;    endptr    end of available data
496  int offsets[99];    lenptr    where to put the length of the eol sequence
497  char buffer[BUFSIZ];  
498    Returns:    pointer to the last byte of the line
499    */
500    
501  while (fgets(buffer, sizeof(buffer), in) != NULL)  static char *
502    end_of_line(char *p, char *endptr, int *lenptr)
503    {
504    switch(endlinetype)
505    {    {
506    BOOL match;    default:      /* Just in case */
507    int length = (int)strlen(buffer);    case EL_LF:
508    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    while (p < endptr && *p != '\n') p++;
509    linenumber++;    if (p < endptr)
510        {
511        *lenptr = 1;
512        return p + 1;
513        }
514      *lenptr = 0;
515      return endptr;
516    
517    match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0;    case EL_CR:
518    if (match && whole_lines && offsets[1] != length) match = FALSE;    while (p < endptr && *p != '\r') p++;
519      if (p < endptr)
520        {
521        *lenptr = 1;
522        return p + 1;
523        }
524      *lenptr = 0;
525      return endptr;
526    
527    if (match != invert)    case EL_CRLF:
528      for (;;)
529      {      {
530      if (count_only) count++;      while (p < endptr && *p != '\r') p++;
531        if (++p >= endptr)
532          {
533          *lenptr = 0;
534          return endptr;
535          }
536        if (*p == '\n')
537          {
538          *lenptr = 2;
539          return p + 1;
540          }
541        }
542      break;
543    
544      case EL_ANY:
545      while (p < endptr)
546        {
547        int extra = 0;
548        register int c = *((unsigned char *)p);
549    
550      else if (filenames_only)      if (utf8 && c >= 0xc0)
551        {        {
552        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        int gcii, gcss;
553        return 0;        extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
554          gcss = 6*extra;
555          c = (c & utf8_table3[extra]) << gcss;
556          for (gcii = 1; gcii <= extra; gcii++)
557            {
558            gcss -= 6;
559            c |= (p[gcii] & 0x3f) << gcss;
560            }
561        }        }
562    
563      else if (silent) return 0;      p += 1 + extra;
564    
565      else      switch (c)
566        {        {
567        if (name != NULL) fprintf(stdout, "%s:", name);        case 0x0a:    /* LF */
568        if (number) fprintf(stdout, "%d:", linenumber);        case 0x0b:    /* VT */
569        fprintf(stdout, "%s\n", buffer);        case 0x0c:    /* FF */
570          *lenptr = 1;
571          return p;
572    
573          case 0x0d:    /* CR */
574          if (p < endptr && *p == 0x0a)
575            {
576            *lenptr = 2;
577            p++;
578            }
579          else *lenptr = 1;
580          return p;
581    
582          case 0x85:    /* NEL */
583          *lenptr = utf8? 2 : 1;
584          return p;
585    
586          case 0x2028:  /* LS */
587          case 0x2029:  /* PS */
588          *lenptr = 3;
589          return p;
590    
591          default:
592          break;
593        }        }
594        }   /* End of loop for ANY case */
595    
596      rc = 0;    *lenptr = 0;  /* Must have hit the end */
597      }    return endptr;
598    }    }     /* End of overall switch */
599    }
600    
601  if (count_only)  
602    
603    /*************************************************
604    *         Find start of previous line            *
605    *************************************************/
606    
607    /* This is called when looking back for before lines to print.
608    
609    Arguments:
610      p         start of the subsequent line
611      startptr  start of available data
612    
613    Returns:    pointer to the start of the previous line
614    */
615    
616    static char *
617    previous_line(char *p, char *startptr)
618    {
619    switch(endlinetype)
620    {    {
621    if (name != NULL) fprintf(stdout, "%s:", name);    default:      /* Just in case */
622    fprintf(stdout, "%d\n", count);    case EL_LF:
623    }    p--;
624      while (p > startptr && p[-1] != '\n') p--;
625      return p;
626    
627      case EL_CR:
628      p--;
629      while (p > startptr && p[-1] != '\n') p--;
630      return p;
631    
632  return rc;    case EL_CRLF:
633      for (;;)
634        {
635        p -= 2;
636        while (p > startptr && p[-1] != '\n') p--;
637        if (p <= startptr + 1 || p[-2] == '\r') return p;
638        }
639      return p;   /* But control should never get here */
640    
641      case EL_ANY:
642      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
643      if (utf8) while ((*p & 0xc0) == 0x80) p--;
644    
645      while (p > startptr)
646        {
647        register int c;
648        char *pp = p - 1;
649    
650        if (utf8)
651          {
652          int extra = 0;
653          while ((*pp & 0xc0) == 0x80) pp--;
654          c = *((unsigned char *)pp);
655          if (c >= 0xc0)
656            {
657            int gcii, gcss;
658            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
659            gcss = 6*extra;
660            c = (c & utf8_table3[extra]) << gcss;
661            for (gcii = 1; gcii <= extra; gcii++)
662              {
663              gcss -= 6;
664              c |= (pp[gcii] & 0x3f) << gcss;
665              }
666            }
667          }
668        else c = *((unsigned char *)pp);
669    
670        switch (c)
671          {
672          case 0x0a:    /* LF */
673          case 0x0b:    /* VT */
674          case 0x0c:    /* FF */
675          case 0x0d:    /* CR */
676          case 0x85:    /* NEL */
677          case 0x2028:  /* LS */
678          case 0x2029:  /* PS */
679          return p;
680    
681          default:
682          break;
683          }
684    
685        p = pp;  /* Back one character */
686        }        /* End of loop for ANY case */
687    
688      return startptr;  /* Hit start of data */
689      }     /* End of overall switch */
690  }  }
691    
692    
693    
694    
695    
696  /*************************************************  /*************************************************
697  *                Usage function                  *  *       Print the previous "after" lines         *
698  *************************************************/  *************************************************/
699    
700  static int  /* This is called if we are about to lose said lines because of buffer filling,
701  usage(int rc)  and at the end of the file. The data in the line is written using fwrite() so
702    that a binary zero does not terminate it.
703    
704    Arguments:
705      lastmatchnumber   the number of the last matching line, plus one
706      lastmatchrestart  where we restarted after the last match
707      endptr            end of available data
708      printname         filename for printing
709    
710    Returns:            nothing
711    */
712    
713    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
714      char *endptr, char *printname)
715  {  {
716  fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n");  if (after_context > 0 && lastmatchnumber > 0)
717  return rc;    {
718      int count = 0;
719      while (lastmatchrestart < endptr && count++ < after_context)
720        {
721        int ellength;
722        char *pp = lastmatchrestart;
723        if (printname != NULL) fprintf(stdout, "%s-", printname);
724        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
725        pp = end_of_line(pp, endptr, &ellength);
726        fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
727        lastmatchrestart = pp;
728        }
729      hyphenpending = TRUE;
730      }
731  }  }
732    
733    
734    
   
735  /*************************************************  /*************************************************
736  *                Main program                    *  *            Grep an individual file             *
737  *************************************************/  *************************************************/
738    
739  int  /* This is called from grep_or_recurse() below. It uses a buffer that is three
740  main(int argc, char **argv)  times the value of MBUFTHIRD. The matching point is never allowed to stray into
741    the top third of the buffer, thus keeping more of the file available for
742    context printing or for multiline scanning. For large files, the pointer will
743    be in the middle third most of the time, so the bottom third is available for
744    "before" context printing.
745    
746    Arguments:
747      in           the fopened FILE stream
748      printname    the file name if it is to be printed for each match
749                   or NULL if the file name is not to be printed
750                   it cannot be NULL if filenames[_nomatch]_only is set
751    
752    Returns:       0 if there was at least one match
753                   1 otherwise (no matches)
754    */
755    
756    static int
757    pcregrep(FILE *in, char *printname)
758  {  {
 int i;  
759  int rc = 1;  int rc = 1;
760  int options = 0;  int linenumber = 1;
761  int errptr;  int lastmatchnumber = 0;
762  const char *error;  int count = 0;
763  BOOL filenames = TRUE;  int offsets[99];
764    char *lastmatchrestart = NULL;
765    char buffer[3*MBUFTHIRD];
766    char *ptr = buffer;
767    char *endptr;
768    size_t bufflength;
769    BOOL endhyphenpending = FALSE;
770    
771    /* Do the first read into the start of the buffer and set up the pointer to
772    end of what we have. */
773    
774    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
775    endptr = buffer + bufflength;
776    
777    /* Loop while the current pointer is not at the end of the file. For large
778    files, endptr will be at the end of the buffer when we are in the middle of the
779    file, but ptr will never get there, because as soon as it gets over 2/3 of the
780    way, the buffer is shifted left and re-filled. */
781    
782  /* Process the options */  while (ptr < endptr)
783      {
784      int i, endlinelength;
785      int mrc = 0;
786      BOOL match = FALSE;
787      char *t = ptr;
788      size_t length, linelength;
789    
790      /* At this point, ptr is at the start of a line. We need to find the length
791      of the subject string to pass to pcre_exec(). In multiline mode, it is the
792      length remainder of the data in the buffer. Otherwise, it is the length of
793      the next line. After matching, we always advance by the length of the next
794      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
795      that any match is constrained to be in the first line. */
796    
797      t = end_of_line(t, endptr, &endlinelength);
798      linelength = t - ptr - endlinelength;
799      length = multiline? endptr - ptr : linelength;
800    
801  for (i = 1; i < argc; i++)    /* Extra processing for Jeffrey Friedl's debugging. */
802    
803    #ifdef JFRIEDL_DEBUG
804      if (jfriedl_XT || jfriedl_XR)
805    {    {
806    char *s;        #include <sys/time.h>
807    if (argv[i][0] != '-') break;        #include <time.h>
808    s = argv[i] + 1;        struct timeval start_time, end_time;
809    while (*s != 0)        struct timezone dummy;
810      {  
811      switch (*s++)        if (jfriedl_XT)
812        {        {
813        case 'c': count_only = TRUE; break;            unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
814        case 'h': filenames = FALSE; break;            const char *orig = ptr;
815        case 'i': options |= PCRE_CASELESS; break;            ptr = malloc(newlen + 1);
816        case 'l': filenames_only = TRUE;            if (!ptr) {
817        case 'n': number = TRUE; break;                    printf("out of memory");
818        case 's': silent = TRUE; break;                    exit(2);
819        case 'v': invert = TRUE; break;            }
820        case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;            endptr = ptr;
821              strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
822              for (i = 0; i < jfriedl_XT; i++) {
823                      strncpy(endptr, orig,  length);
824                      endptr += length;
825              }
826              strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
827              length = newlen;
828          }
829    
830        case 'V':        if (gettimeofday(&start_time, &dummy) != 0)
831        fprintf(stderr, "PCRE version %s\n", pcre_version());                perror("bad gettimeofday");
       break;  
832    
833        default:  
834        fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]);        for (i = 0; i < jfriedl_XR; i++)
835        return usage(2);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
836    
837          if (gettimeofday(&end_time, &dummy) != 0)
838                  perror("bad gettimeofday");
839    
840          double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
841                          -
842                          (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
843    
844          printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
845          return 0;
846      }
847    #endif
848    
849    
850      /* Run through all the patterns until one matches. Note that we don't include
851      the final newline in the subject string. */
852    
853      for (i = 0; i < pattern_count; i++)
854        {
855        mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
856          offsets, 99);
857        if (mrc >= 0) { match = TRUE; break; }
858        if (mrc != PCRE_ERROR_NOMATCH)
859          {
860          fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
861          if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
862          fprintf(stderr, "this line:\n");
863          fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */
864          fprintf(stderr, "\n");
865          if (error_count == 0 &&
866              (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
867            {
868            fprintf(stderr, "pcregrep: error %d means that a resource limit "
869              "was exceeded\n", mrc);
870            fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
871            }
872          if (error_count++ > 20)
873            {
874            fprintf(stderr, "pcregrep: too many errors - abandoned\n");
875            exit(2);
876            }
877          match = invert;    /* No more matching; don't show the line again */
878          break;
879        }        }
880      }      }
   }  
881    
882  /* There must be at least a regexp argument */    /* If it's a match or a not-match (as required), do what's wanted. */
883    
884  if (i >= argc) return usage(0);    if (match != invert)
885        {
886        BOOL hyphenprinted = FALSE;
887    
888  /* Compile the regular expression. */      /* We've failed if we want a file that doesn't have any matches. */
889    
890  pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL);      if (filenames == FN_NOMATCH_ONLY) return 1;
 if (pattern == NULL)  
   {  
   fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error);  
   return 2;  
   }  
891    
892  /* Study the regular expression, as we will be running it may times */      /* Just count if just counting is wanted. */
893    
894  hints = pcre_study(pattern, 0, &error);      if (count_only) count++;
 if (error != NULL)  
   {  
   fprintf(stderr, "pcregrep: error while studing regex: %s\n", error);  
   return 2;  
   }  
895    
896  /* If there are no further arguments, do the business on stdin and exit */      /* If all we want is a file name, there is no need to scan any more lines
897        in the file. */
898    
899        else if (filenames == FN_ONLY)
900          {
901          fprintf(stdout, "%s\n", printname);
902          return 0;
903          }
904    
905  if (i >= argc) return pcregrep(stdin, NULL);      /* Likewise, if all we want is a yes/no answer. */
906    
907  /* Otherwise, work through the remaining arguments as files. If there is only      else if (quiet) return 0;
 one, don't give its name on the output. */  
908    
909  if (i == argc - 1) filenames = FALSE;      /* The --only-matching option prints just the substring that matched, and
910  if (filenames_only) filenames = TRUE;      does not pring any context. */
911    
912  for (; i < argc; i++)      else if (only_matching)
913    {        {
914    FILE *in = fopen(argv[i], "r");        if (printname != NULL) fprintf(stdout, "%s:", printname);
915    if (in == NULL)        if (number) fprintf(stdout, "%d:", linenumber);
916      {        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
917      fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno));        fprintf(stdout, "\n");
918      rc = 2;        }
919    
920        /* This is the default case when none of the above options is set. We print
921        the matching lines(s), possibly preceded and/or followed by other lines of
922        context. */
923    
924        else
925          {
926          /* See if there is a requirement to print some "after" lines from a
927          previous match. We never print any overlaps. */
928    
929          if (after_context > 0 && lastmatchnumber > 0)
930            {
931            int ellength;
932            int linecount = 0;
933            char *p = lastmatchrestart;
934    
935            while (p < ptr && linecount < after_context)
936              {
937              p = end_of_line(p, ptr, &ellength);
938              linecount++;
939              }
940    
941            /* It is important to advance lastmatchrestart during this printing so
942            that it interacts correctly with any "before" printing below. Print
943            each line's data using fwrite() in case there are binary zeroes. */
944    
945            while (lastmatchrestart < p)
946              {
947              char *pp = lastmatchrestart;
948              if (printname != NULL) fprintf(stdout, "%s-", printname);
949              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
950              pp = end_of_line(pp, endptr, &ellength);
951              fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
952              lastmatchrestart = pp;
953              }
954            if (lastmatchrestart != ptr) hyphenpending = TRUE;
955            }
956    
957          /* If there were non-contiguous lines printed above, insert hyphens. */
958    
959          if (hyphenpending)
960            {
961            fprintf(stdout, "--\n");
962            hyphenpending = FALSE;
963            hyphenprinted = TRUE;
964            }
965    
966          /* See if there is a requirement to print some "before" lines for this
967          match. Again, don't print overlaps. */
968    
969          if (before_context > 0)
970            {
971            int linecount = 0;
972            char *p = ptr;
973    
974            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
975                   linecount < before_context)
976              {
977              linecount++;
978              p = previous_line(p, buffer);
979              }
980    
981            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
982              fprintf(stdout, "--\n");
983    
984            while (p < ptr)
985              {
986              int ellength;
987              char *pp = p;
988              if (printname != NULL) fprintf(stdout, "%s-", printname);
989              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
990              pp = end_of_line(pp, endptr, &ellength);
991              fwrite(p, 1, pp - p, stdout);
992              p = pp;
993              }
994            }
995    
996          /* Now print the matching line(s); ensure we set hyphenpending at the end
997          of the file if any context lines are being output. */
998    
999          if (after_context > 0 || before_context > 0)
1000            endhyphenpending = TRUE;
1001    
1002          if (printname != NULL) fprintf(stdout, "%s:", printname);
1003          if (number) fprintf(stdout, "%d:", linenumber);
1004    
1005          /* In multiline mode, we want to print to the end of the line in which
1006          the end of the matched string is found, so we adjust linelength and the
1007          line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1008          start of the match will always be before the first newline sequence. */
1009    
1010          if (multiline)
1011            {
1012            int ellength;
1013            char *endmatch = ptr + offsets[1];
1014            t = ptr;
1015            while (t < endmatch)
1016              {
1017              t = end_of_line(t, endptr, &ellength);
1018              if (t <= endmatch) linenumber++; else break;
1019              }
1020            endmatch = end_of_line(endmatch, endptr, &ellength);
1021            linelength = endmatch - ptr - ellength;
1022            }
1023    
1024          /*** NOTE: Use only fwrite() to output the data line, so that binary
1025          zeroes are treated as just another data character. */
1026    
1027          /* This extra option, for Jeffrey Friedl's debugging requirements,
1028          replaces the matched string, or a specific captured string if it exists,
1029          with X. When this happens, colouring is ignored. */
1030    
1031    #ifdef JFRIEDL_DEBUG
1032          if (S_arg >= 0 && S_arg < mrc)
1033            {
1034            int first = S_arg * 2;
1035            int last  = first + 1;
1036            fwrite(ptr, 1, offsets[first], stdout);
1037            fprintf(stdout, "X");
1038            fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1039            }
1040          else
1041    #endif
1042    
1043          /* We have to split the line(s) up if colouring. */
1044    
1045          if (do_colour)
1046            {
1047            fwrite(ptr, 1, offsets[0], stdout);
1048            fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1049            fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1050            fprintf(stdout, "%c[00m", 0x1b);
1051            fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1052            }
1053          else fwrite(ptr, 1, linelength + endlinelength, stdout);
1054          }
1055    
1056        /* End of doing what has to be done for a match */
1057    
1058        rc = 0;    /* Had some success */
1059    
1060        /* Remember where the last match happened for after_context. We remember
1061        where we are about to restart, and that line's number. */
1062    
1063        lastmatchrestart = ptr + linelength + endlinelength;
1064        lastmatchnumber = linenumber + 1;
1065      }      }
1066    else  
1067      /* Advance to after the newline and increment the line number. */
1068    
1069      ptr += linelength + endlinelength;
1070      linenumber++;
1071    
1072      /* If we haven't yet reached the end of the file (the buffer is full), and
1073      the current point is in the top 1/3 of the buffer, slide the buffer down by
1074      1/3 and refill it. Before we do this, if some unprinted "after" lines are
1075      about to be lost, print them. */
1076    
1077      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1078        {
1079        if (after_context > 0 &&
1080            lastmatchnumber > 0 &&
1081            lastmatchrestart < buffer + MBUFTHIRD)
1082          {
1083          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1084          lastmatchnumber = 0;
1085          }
1086    
1087        /* Now do the shuffle */
1088    
1089        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1090        ptr -= MBUFTHIRD;
1091        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1092        endptr = buffer + bufflength;
1093    
1094        /* Adjust any last match point */
1095    
1096        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1097        }
1098      }     /* Loop through the whole file */
1099    
1100    /* End of file; print final "after" lines if wanted; do_after_lines sets
1101    hyphenpending if it prints something. */
1102    
1103    if (!only_matching && !count_only)
1104      {
1105      do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1106      hyphenpending |= endhyphenpending;
1107      }
1108    
1109    /* Print the file name if we are looking for those without matches and there
1110    were none. If we found a match, we won't have got this far. */
1111    
1112    if (filenames == FN_NOMATCH_ONLY)
1113      {
1114      fprintf(stdout, "%s\n", printname);
1115      return 0;
1116      }
1117    
1118    /* Print the match count if wanted */
1119    
1120    if (count_only)
1121      {
1122      if (printname != NULL) fprintf(stdout, "%s:", printname);
1123      fprintf(stdout, "%d\n", count);
1124      }
1125    
1126    return rc;
1127    }
1128    
1129    
1130    
1131    /*************************************************
1132    *     Grep a file or recurse into a directory    *
1133    *************************************************/
1134    
1135    /* Given a path name, if it's a directory, scan all the files if we are
1136    recursing; if it's a file, grep it.
1137    
1138    Arguments:
1139      pathname          the path to investigate
1140      dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
1141      only_one_at_top   TRUE if the path is the only one at toplevel
1142    
1143    Returns:   0 if there was at least one match
1144               1 if there were no matches
1145               2 there was some kind of error
1146    
1147    However, file opening failures are suppressed if "silent" is set.
1148    */
1149    
1150    static int
1151    grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1152    {
1153    int rc = 1;
1154    int sep;
1155    FILE *in;
1156    
1157    /* If the file name is "-" we scan stdin */
1158    
1159    if (strcmp(pathname, "-") == 0)
1160      {
1161      return pcregrep(stdin,
1162        (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1163          stdin_name : NULL);
1164      }
1165    
1166    
1167    /* If the file is a directory, skip if skipping or if we are recursing, scan
1168    each file within it, subject to any include or exclude patterns that were set.
1169    The scanning code is localized so it can be made system-specific. */
1170    
1171    if ((sep = isdirectory(pathname)) != 0)
1172      {
1173      if (dee_action == dee_SKIP) return 1;
1174      if (dee_action == dee_RECURSE)
1175        {
1176        char buffer[1024];
1177        char *nextfile;
1178        directory_type *dir = opendirectory(pathname);
1179    
1180        if (dir == NULL)
1181          {
1182          if (!silent)
1183            fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1184              strerror(errno));
1185          return 2;
1186          }
1187    
1188        while ((nextfile = readdirectory(dir)) != NULL)
1189          {
1190          int frc, blen;
1191          sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1192          blen = strlen(buffer);
1193    
1194          if (exclude_compiled != NULL &&
1195              pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1196            continue;
1197    
1198          if (include_compiled != NULL &&
1199              pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1200            continue;
1201    
1202          frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1203          if (frc > 1) rc = frc;
1204           else if (frc == 0 && rc == 1) rc = 0;
1205          }
1206    
1207        closedirectory(dir);
1208        return rc;
1209        }
1210      }
1211    
1212    /* If the file is not a directory and not a regular file, skip it if that's
1213    been requested. */
1214    
1215    else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1216    
1217    /* Control reaches here if we have a regular file, or if we have a directory
1218    and recursion or skipping was not requested, or if we have anything else and
1219    skipping was not requested. The scan proceeds. If this is the first and only
1220    argument at top level, we don't show the file name, unless we are only showing
1221    the file name, or the filename was forced (-H). */
1222    
1223    in = fopen(pathname, "r");
1224    if (in == NULL)
1225      {
1226      if (!silent)
1227        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1228          strerror(errno));
1229      return 2;
1230      }
1231    
1232    rc = pcregrep(in, (filenames > FN_DEFAULT ||
1233      (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1234    
1235    fclose(in);
1236    return rc;
1237    }
1238    
1239    
1240    
1241    
1242    /*************************************************
1243    *                Usage function                  *
1244    *************************************************/
1245    
1246    static int
1247    usage(int rc)
1248    {
1249    option_item *op;
1250    fprintf(stderr, "Usage: pcregrep [-");
1251    for (op = optionlist; op->one_char != 0; op++)
1252      {
1253      if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1254      }
1255    fprintf(stderr, "] [long options] [pattern] [files]\n");
1256    fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1257    return rc;
1258    }
1259    
1260    
1261    
1262    
1263    /*************************************************
1264    *                Help function                   *
1265    *************************************************/
1266    
1267    static void
1268    help(void)
1269    {
1270    option_item *op;
1271    
1272    printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1273    printf("Search for PATTERN in each FILE or standard input.\n");
1274    printf("PATTERN must be present if neither -e nor -f is used.\n");
1275    printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1276    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1277    
1278    printf("Options:\n");
1279    
1280    for (op = optionlist; op->one_char != 0; op++)
1281      {
1282      int n;
1283      char s[4];
1284      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1285      printf("  %s --%s%n", s, op->long_name, &n);
1286      n = 30 - n;
1287      if (n < 1) n = 1;
1288      printf("%.*s%s\n", n, "                    ", op->help_text);
1289      }
1290    
1291    printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1292    printf("trailing white space is removed and blank lines are ignored.\n");
1293    printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1294    
1295    printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1296    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1297    }
1298    
1299    
1300    
1301    
1302    /*************************************************
1303    *    Handle a single-letter, no data option      *
1304    *************************************************/
1305    
1306    static int
1307    handle_option(int letter, int options)
1308    {
1309    switch(letter)
1310      {
1311      case N_HELP: help(); exit(0);
1312      case 'c': count_only = TRUE; break;
1313      case 'F': process_options |= PO_FIXED_STRINGS; break;
1314      case 'H': filenames = FN_FORCE; break;
1315      case 'h': filenames = FN_NONE; break;
1316      case 'i': options |= PCRE_CASELESS; break;
1317      case 'l': filenames = FN_ONLY; break;
1318      case 'L': filenames = FN_NOMATCH_ONLY; break;
1319      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1320      case 'n': number = TRUE; break;
1321      case 'o': only_matching = TRUE; break;
1322      case 'q': quiet = TRUE; break;
1323      case 'r': dee_action = dee_RECURSE; break;
1324      case 's': silent = TRUE; break;
1325      case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1326      case 'v': invert = TRUE; break;
1327      case 'w': process_options |= PO_WORD_MATCH; break;
1328      case 'x': process_options |= PO_LINE_MATCH; break;
1329    
1330      case 'V':
1331      fprintf(stderr, "pcregrep version %s using ", VERSION);
1332      fprintf(stderr, "PCRE version %s\n", pcre_version());
1333      exit(0);
1334      break;
1335    
1336      default:
1337      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1338      exit(usage(2));
1339      }
1340    
1341    return options;
1342    }
1343    
1344    
1345    
1346    
1347    /*************************************************
1348    *          Construct printed ordinal             *
1349    *************************************************/
1350    
1351    /* This turns a number into "1st", "3rd", etc. */
1352    
1353    static char *
1354    ordin(int n)
1355    {
1356    static char buffer[8];
1357    char *p = buffer;
1358    sprintf(p, "%d", n);
1359    while (*p != 0) p++;
1360    switch (n%10)
1361      {
1362      case 1: strcpy(p, "st"); break;
1363      case 2: strcpy(p, "nd"); break;
1364      case 3: strcpy(p, "rd"); break;
1365      default: strcpy(p, "th"); break;
1366      }
1367    return buffer;
1368    }
1369    
1370    
1371    
1372    /*************************************************
1373    *          Compile a single pattern              *
1374    *************************************************/
1375    
1376    /* When the -F option has been used, this is called for each substring.
1377    Otherwise it's called for each supplied pattern.
1378    
1379    Arguments:
1380      pattern        the pattern string
1381      options        the PCRE options
1382      filename       the file name, or NULL for a command-line pattern
1383      count          0 if this is the only command line pattern, or
1384                     number of the command line pattern, or
1385                     linenumber for a pattern from a file
1386    
1387    Returns:         TRUE on success, FALSE after an error
1388    */
1389    
1390    static BOOL
1391    compile_single_pattern(char *pattern, int options, char *filename, int count)
1392    {
1393    char buffer[MBUFTHIRD + 16];
1394    const char *error;
1395    int errptr;
1396    
1397    if (pattern_count >= MAX_PATTERN_COUNT)
1398      {
1399      fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1400        (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1401      return FALSE;
1402      }
1403    
1404    sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1405      suffix[process_options]);
1406    pattern_list[pattern_count] =
1407      pcre_compile(buffer, options, &error, &errptr, pcretables);
1408    if (pattern_list[pattern_count++] != NULL) return TRUE;
1409    
1410    /* Handle compile errors */
1411    
1412    errptr -= (int)strlen(prefix[process_options]);
1413    if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1414    
1415    if (filename == NULL)
1416      {
1417      if (count == 0)
1418        fprintf(stderr, "pcregrep: Error in command-line regex "
1419          "at offset %d: %s\n", errptr, error);
1420      else
1421        fprintf(stderr, "pcregrep: Error in %s command-line regex "
1422          "at offset %d: %s\n", ordin(count), errptr, error);
1423      }
1424    else
1425      {
1426      fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1427        "at offset %d: %s\n", count, filename, errptr, error);
1428      }
1429    
1430    return FALSE;
1431    }
1432    
1433    
1434    
1435    /*************************************************
1436    *           Compile one supplied pattern         *
1437    *************************************************/
1438    
1439    /* When the -F option has been used, each string may be a list of strings,
1440    separated by line breaks. They will be matched literally.
1441    
1442    Arguments:
1443      pattern        the pattern string
1444      options        the PCRE options
1445      filename       the file name, or NULL for a command-line pattern
1446      count          0 if this is the only command line pattern, or
1447                     number of the command line pattern, or
1448                     linenumber for a pattern from a file
1449    
1450    Returns:         TRUE on success, FALSE after an error
1451    */
1452    
1453    static BOOL
1454    compile_pattern(char *pattern, int options, char *filename, int count)
1455    {
1456    if ((process_options & PO_FIXED_STRINGS) != 0)
1457      {
1458      char *eop = pattern + strlen(pattern);
1459      char buffer[MBUFTHIRD];
1460      for(;;)
1461        {
1462        int ellength;
1463        char *p = end_of_line(pattern, eop, &ellength);
1464        if (ellength == 0)
1465          return compile_single_pattern(pattern, options, filename, count);
1466        sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1467        pattern = p;
1468        if (!compile_single_pattern(buffer, options, filename, count))
1469          return FALSE;
1470        }
1471      }
1472    else return compile_single_pattern(pattern, options, filename, count);
1473    }
1474    
1475    
1476    
1477    /*************************************************
1478    *                Main program                    *
1479    *************************************************/
1480    
1481    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1482    
1483    int
1484    main(int argc, char **argv)
1485    {
1486    int i, j;
1487    int rc = 1;
1488    int pcre_options = 0;
1489    int cmd_pattern_count = 0;
1490    int errptr;
1491    BOOL only_one_at_top;
1492    char *patterns[MAX_PATTERN_COUNT];
1493    const char *locale_from = "--locale";
1494    const char *error;
1495    
1496    /* Set the default line ending value from the default in the PCRE library;
1497    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1498    */
1499    
1500    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1501    switch(i)
1502      {
1503      default:                 newline = (char *)"lf"; break;
1504      case '\r':               newline = (char *)"cr"; break;
1505      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1506      case -1:                 newline = (char *)"any"; break;
1507      }
1508    
1509    /* Process the options */
1510    
1511    for (i = 1; i < argc; i++)
1512      {
1513      option_item *op = NULL;
1514      char *option_data = (char *)"";    /* default to keep compiler happy */
1515      BOOL longop;
1516      BOOL longopwasequals = FALSE;
1517    
1518      if (argv[i][0] != '-') break;
1519    
1520      /* If we hit an argument that is just "-", it may be a reference to STDIN,
1521      but only if we have previously had -e or -f to define the patterns. */
1522    
1523      if (argv[i][1] == 0)
1524        {
1525        if (pattern_filename != NULL || pattern_count > 0) break;
1526          else exit(usage(2));
1527        }
1528    
1529      /* Handle a long name option, or -- to terminate the options */
1530    
1531      if (argv[i][1] == '-')
1532        {
1533        char *arg = argv[i] + 2;
1534        char *argequals = strchr(arg, '=');
1535    
1536        if (*arg == 0)    /* -- terminates options */
1537          {
1538          i++;
1539          break;                /* out of the options-handling loop */
1540          }
1541    
1542        longop = TRUE;
1543    
1544        /* Some long options have data that follows after =, for example file=name.
1545        Some options have variations in the long name spelling: specifically, we
1546        allow "regexp" because GNU grep allows it, though I personally go along
1547        with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1548        These options are entered in the table as "regex(p)". No option is in both
1549        these categories, fortunately. */
1550    
1551        for (op = optionlist; op->one_char != 0; op++)
1552          {
1553          char *opbra = strchr(op->long_name, '(');
1554          char *equals = strchr(op->long_name, '=');
1555          if (opbra == NULL)     /* Not a (p) case */
1556            {
1557            if (equals == NULL)  /* Not thing=data case */
1558              {
1559              if (strcmp(arg, op->long_name) == 0) break;
1560              }
1561            else                 /* Special case xxx=data */
1562              {
1563              int oplen = equals - op->long_name;
1564              int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
1565              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1566                {
1567                option_data = arg + arglen;
1568                if (*option_data == '=')
1569                  {
1570                  option_data++;
1571                  longopwasequals = TRUE;
1572                  }
1573                break;
1574                }
1575              }
1576            }
1577          else                   /* Special case xxxx(p) */
1578            {
1579            char buff1[24];
1580            char buff2[24];
1581            int baselen = opbra - op->long_name;
1582            sprintf(buff1, "%.*s", baselen, op->long_name);
1583            sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
1584              opbra + 1);
1585            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1586              break;
1587            }
1588          }
1589    
1590        if (op->one_char == 0)
1591          {
1592          fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1593          exit(usage(2));
1594          }
1595        }
1596    
1597    
1598      /* Jeffrey Friedl's debugging harness uses these additional options which
1599      are not in the right form for putting in the option table because they use
1600      only one hyphen, yet are more than one character long. By putting them
1601      separately here, they will not get displayed as part of the help() output,
1602      but I don't think Jeffrey will care about that. */
1603    
1604    #ifdef JFRIEDL_DEBUG
1605      else if (strcmp(argv[i], "-pre") == 0) {
1606              jfriedl_prefix = argv[++i];
1607              continue;
1608      } else if (strcmp(argv[i], "-post") == 0) {
1609              jfriedl_postfix = argv[++i];
1610              continue;
1611      } else if (strcmp(argv[i], "-XT") == 0) {
1612              sscanf(argv[++i], "%d", &jfriedl_XT);
1613              continue;
1614      } else if (strcmp(argv[i], "-XR") == 0) {
1615              sscanf(argv[++i], "%d", &jfriedl_XR);
1616              continue;
1617      }
1618    #endif
1619    
1620    
1621      /* One-char options; many that have no data may be in a single argument; we
1622      continue till we hit the last one or one that needs data. */
1623    
1624      else
1625        {
1626        char *s = argv[i] + 1;
1627        longop = FALSE;
1628        while (*s != 0)
1629          {
1630          for (op = optionlist; op->one_char != 0; op++)
1631            { if (*s == op->one_char) break; }
1632          if (op->one_char == 0)
1633            {
1634            fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1635              *s, argv[i]);
1636            exit(usage(2));
1637            }
1638          if (op->type != OP_NODATA || s[1] == 0)
1639            {
1640            option_data = s+1;
1641            break;
1642            }
1643          pcre_options = handle_option(*s++, pcre_options);
1644          }
1645        }
1646    
1647      /* At this point we should have op pointing to a matched option. If the type
1648      is NO_DATA, it means that there is no data, and the option might set
1649      something in the PCRE options. */
1650    
1651      if (op->type == OP_NODATA)
1652        {
1653        pcre_options = handle_option(op->one_char, pcre_options);
1654        continue;
1655        }
1656    
1657      /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1658      either has a value or defaults to something. It cannot have data in a
1659      separate item. At the moment, the only such options are "colo(u)r" and
1660      Jeffrey Friedl's special -S debugging option. */
1661    
1662      if (*option_data == 0 &&
1663          (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1664        {
1665        switch (op->one_char)
1666          {
1667          case N_COLOUR:
1668          colour_option = (char *)"auto";
1669          break;
1670    #ifdef JFRIEDL_DEBUG
1671          case 'S':
1672          S_arg = 0;
1673          break;
1674    #endif
1675          }
1676        continue;
1677        }
1678    
1679      /* Otherwise, find the data string for the option. */
1680    
1681      if (*option_data == 0)
1682        {
1683        if (i >= argc - 1 || longopwasequals)
1684          {
1685          fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1686          exit(usage(2));
1687          }
1688        option_data = argv[++i];
1689        }
1690    
1691      /* If the option type is OP_PATLIST, it's the -e option, which can be called
1692      multiple times to create a list of patterns. */
1693    
1694      if (op->type == OP_PATLIST)
1695        {
1696        if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1697          {
1698          fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1699            MAX_PATTERN_COUNT);
1700          return 2;
1701          }
1702        patterns[cmd_pattern_count++] = option_data;
1703        }
1704    
1705      /* Otherwise, deal with single string or numeric data values. */
1706    
1707      else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1708        {
1709        *((char **)op->dataptr) = option_data;
1710        }
1711      else
1712        {
1713        char *endptr;
1714        int n = strtoul(option_data, &endptr, 10);
1715        if (*endptr != 0)
1716          {
1717          if (longop)
1718            {
1719            char *equals = strchr(op->long_name, '=');
1720            int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1721              equals - op->long_name;
1722            fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1723              option_data, nlen, op->long_name);
1724            }
1725          else
1726            fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1727              option_data, op->one_char);
1728          exit(usage(2));
1729          }
1730        *((int *)op->dataptr) = n;
1731        }
1732      }
1733    
1734    /* Options have been decoded. If -C was used, its value is used as a default
1735    for -A and -B. */
1736    
1737    if (both_context > 0)
1738      {
1739      if (after_context == 0) after_context = both_context;
1740      if (before_context == 0) before_context = both_context;
1741      }
1742    
1743    /* If a locale has not been provided as an option, see if the LC_CTYPE or
1744    LC_ALL environment variable is set, and if so, use it. */
1745    
1746    if (locale == NULL)
1747      {
1748      locale = getenv("LC_ALL");
1749      locale_from = "LCC_ALL";
1750      }
1751    
1752    if (locale == NULL)
1753      {
1754      locale = getenv("LC_CTYPE");
1755      locale_from = "LC_CTYPE";
1756      }
1757    
1758    /* If a locale has been provided, set it, and generate the tables the PCRE
1759    needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1760    
1761    if (locale != NULL)
1762      {
1763      if (setlocale(LC_CTYPE, locale) == NULL)
1764        {
1765        fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1766          locale, locale_from);
1767        return 2;
1768        }
1769      pcretables = pcre_maketables();
1770      }
1771    
1772    /* Sort out colouring */
1773    
1774    if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1775      {
1776      if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1777      else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1778      else
1779        {
1780        fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1781          colour_option);
1782        return 2;
1783        }
1784      if (do_colour)
1785        {
1786        char *cs = getenv("PCREGREP_COLOUR");
1787        if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1788        if (cs != NULL) colour_string = cs;
1789        }
1790      }
1791    
1792    /* Interpret the newline type; the default settings are Unix-like. */
1793    
1794    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1795      {
1796      pcre_options |= PCRE_NEWLINE_CR;
1797      endlinetype = EL_CR;
1798      }
1799    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1800      {
1801      pcre_options |= PCRE_NEWLINE_LF;
1802      endlinetype = EL_LF;
1803      }
1804    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1805      {
1806      pcre_options |= PCRE_NEWLINE_CRLF;
1807      endlinetype = EL_CRLF;
1808      }
1809    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1810      {
1811      pcre_options |= PCRE_NEWLINE_ANY;
1812      endlinetype = EL_ANY;
1813      }
1814    else
1815      {
1816      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1817      return 2;
1818      }
1819    
1820    /* Interpret the text values for -d and -D */
1821    
1822    if (dee_option != NULL)
1823      {
1824      if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1825      else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1826      else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1827      else
1828        {
1829        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1830        return 2;
1831        }
1832      }
1833    
1834    if (DEE_option != NULL)
1835      {
1836      if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1837      else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1838      else
1839        {
1840        fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1841        return 2;
1842        }
1843      }
1844    
1845    /* Check the values for Jeffrey Friedl's debugging options. */
1846    
1847    #ifdef JFRIEDL_DEBUG
1848    if (S_arg > 9)
1849      {
1850      fprintf(stderr, "pcregrep: bad value for -S option\n");
1851      return 2;
1852      }
1853    if (jfriedl_XT != 0 || jfriedl_XR != 0)
1854      {
1855      if (jfriedl_XT == 0) jfriedl_XT = 1;
1856      if (jfriedl_XR == 0) jfriedl_XR = 1;
1857      }
1858    #endif
1859    
1860    /* Get memory to store the pattern and hints lists. */
1861    
1862    pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1863    hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1864    
1865    if (pattern_list == NULL || hints_list == NULL)
1866      {
1867      fprintf(stderr, "pcregrep: malloc failed\n");
1868      return 2;
1869      }
1870    
1871    /* If no patterns were provided by -e, and there is no file provided by -f,
1872    the first argument is the one and only pattern, and it must exist. */
1873    
1874    if (cmd_pattern_count == 0 && pattern_filename == NULL)
1875      {
1876      if (i >= argc) return usage(2);
1877      patterns[cmd_pattern_count++] = argv[i++];
1878      }
1879    
1880    /* Compile the patterns that were provided on the command line, either by
1881    multiple uses of -e or as a single unkeyed pattern. */
1882    
1883    for (j = 0; j < cmd_pattern_count; j++)
1884      {
1885      if (!compile_pattern(patterns[j], pcre_options, NULL,
1886           (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1887        return 2;
1888      }
1889    
1890    /* Compile the regular expressions that are provided in a file. */
1891    
1892    if (pattern_filename != NULL)
1893      {
1894      int linenumber = 0;
1895      FILE *f;
1896      char *filename;
1897      char buffer[MBUFTHIRD];
1898    
1899      if (strcmp(pattern_filename, "-") == 0)
1900        {
1901        f = stdin;
1902        filename = stdin_name;
1903        }
1904      else
1905        {
1906        f = fopen(pattern_filename, "r");
1907        if (f == NULL)
1908          {
1909          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1910            strerror(errno));
1911          return 2;
1912          }
1913        filename = pattern_filename;
1914        }
1915    
1916      while (fgets(buffer, MBUFTHIRD, f) != NULL)
1917        {
1918        char *s = buffer + (int)strlen(buffer);
1919        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
1920        *s = 0;
1921        linenumber++;
1922        if (buffer[0] == 0) continue;   /* Skip blank lines */
1923        if (!compile_pattern(buffer, pcre_options, filename, linenumber))
1924          return 2;
1925        }
1926    
1927      if (f != stdin) fclose(f);
1928      }
1929    
1930    /* Study the regular expressions, as we will be running them many times */
1931    
1932    for (j = 0; j < pattern_count; j++)
1933      {
1934      hints_list[j] = pcre_study(pattern_list[j], 0, &error);
1935      if (error != NULL)
1936        {
1937        char s[16];
1938        if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
1939        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
1940        return 2;
1941        }
1942      }
1943    
1944    /* If there are include or exclude patterns, compile them. */
1945    
1946    if (exclude_pattern != NULL)
1947      {
1948      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
1949        pcretables);
1950      if (exclude_compiled == NULL)
1951      {      {
1952      int frc = pcregrep(in, filenames? argv[i] : NULL);      fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1953      if (frc == 0 && rc == 1) rc = 0;        errptr, error);
1954      fclose(in);      return 2;
1955      }      }
1956    }    }
1957    
1958    if (include_pattern != NULL)
1959      {
1960      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
1961        pcretables);
1962      if (include_compiled == NULL)
1963        {
1964        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1965          errptr, error);
1966        return 2;
1967        }
1968      }
1969    
1970    /* If there are no further arguments, do the business on stdin and exit. */
1971    
1972    if (i >= argc)
1973      return pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
1974    
1975    /* Otherwise, work through the remaining arguments as files or directories.
1976    Pass in the fact that there is only one argument at top level - this suppresses
1977    the file name if the argument is not a directory and filenames are not
1978    otherwise forced. */
1979    
1980    only_one_at_top = i == argc - 1;   /* Catch initial value of i */
1981    
1982    for (; i < argc; i++)
1983      {
1984      int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
1985        only_one_at_top);
1986      if (frc > 1) rc = frc;
1987        else if (frc == 0 && rc == 1) rc = 0;
1988      }
1989    
1990  return rc;  return rc;
1991  }  }
1992    
1993  /* End */  /* End of pcregrep */

Legend:
Removed from v.49  
changed lines
  Added in v.96

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12