/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. On a Unix system it can recurse into directories. */  its pattern matching. On a Unix or Win32 system it can recurse into
7    directories.
8    
9               Copyright (c) 1997-2005 University of Cambridge
10    
11    -----------------------------------------------------------------------------
12    Redistribution and use in source and binary forms, with or without
13    modification, are permitted provided that the following conditions are met:
14    
15        * Redistributions of source code must retain the above copyright notice,
16          this list of conditions and the following disclaimer.
17    
18        * Redistributions in binary form must reproduce the above copyright
19          notice, this list of conditions and the following disclaimer in the
20          documentation and/or other materials provided with the distribution.
21    
22        * Neither the name of the University of Cambridge nor the names of its
23          contributors may be used to endorse or promote products derived from
24          this software without specific prior written permission.
25    
26    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36    POSSIBILITY OF SUCH DAMAGE.
37    -----------------------------------------------------------------------------
38    */
39    
40  #include <ctype.h>  #include <ctype.h>
41  #include <stdio.h>  #include <stdio.h>
42  #include <string.h>  #include <string.h>
43  #include <stdlib.h>  #include <stdlib.h>
44  #include <errno.h>  #include <errno.h>
45    
46    #include <sys/types.h>
47    #include <sys/stat.h>
48    #include <unistd.h>
49    
50  #include "config.h"  #include "config.h"
51  #include "pcre.h"  #include "pcre.h"
52    
# Line 18  its pattern matching. On a Unix system i Line 55  its pattern matching. On a Unix system i
55    
56  typedef int BOOL;  typedef int BOOL;
57    
58  #define VERSION "2.0 01-Aug-2001"  #define VERSION "4.1 05-Sep-2005"
59  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
60    
61    #if BUFSIZ > 8192
62    #define MBUFTHIRD BUFSIZ
63    #else
64    #define MBUFTHIRD 8192
65    #endif
66    
67    
68    
69  /*************************************************  /*************************************************
70  *               Global variables                 *  *               Global variables                 *
71  *************************************************/  *************************************************/
72    
73  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
74    static char *stdin_name = (char *)"(standard input)";
75  static int  pattern_count = 0;  static int  pattern_count = 0;
76  static pcre **pattern_list;  static pcre **pattern_list;
77  static pcre_extra **hints_list;  static pcre_extra **hints_list;
78    
79    static char *include_pattern = NULL;
80    static char *exclude_pattern = NULL;
81    
82    static pcre *include_compiled = NULL;
83    static pcre *exclude_compiled = NULL;
84    
85    static int after_context = 0;
86    static int before_context = 0;
87    static int both_context = 0;
88    
89  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
90  static BOOL filenames = TRUE;  static BOOL filenames = TRUE;
91  static BOOL filenames_only = FALSE;  static BOOL filenames_only = FALSE;
92    static BOOL filenames_nomatch_only = FALSE;
93    static BOOL hyphenpending = FALSE;
94  static BOOL invert = FALSE;  static BOOL invert = FALSE;
95    static BOOL multiline = FALSE;
96  static BOOL number = FALSE;  static BOOL number = FALSE;
97    static BOOL quiet = FALSE;
98  static BOOL recurse = FALSE;  static BOOL recurse = FALSE;
99  static BOOL silent = FALSE;  static BOOL silent = FALSE;
100  static BOOL whole_lines = FALSE;  static BOOL whole_lines = FALSE;
101    static BOOL word_match = FALSE;
102    
103  /* Structure for options and list of them */  /* Structure for options and list of them */
104    
105    enum { OP_NODATA, OP_STRING, OP_NUMBER };
106    
107  typedef struct option_item {  typedef struct option_item {
108      int type;
109    int one_char;    int one_char;
110    char *long_name;    void *dataptr;
111    char *help_text;    const char *long_name;
112      const char *help_text;
113  } option_item;  } option_item;
114    
115  static option_item optionlist[] = {  static option_item optionlist[] = {
116    { -1,  "help",         "display this help and exit" },    { OP_NODATA, -1,  NULL,              "",              "  terminate options" },
117    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA, -1,  NULL,              "help",          "display this help and exit" },
118    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER, 'A', &after_context,    "after-context=number", "set number of following context lines" },
119    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER, 'B', &before_context,   "before-context=number", "set number of prior context lines" },
120    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_NUMBER, 'C', &both_context,     "context=number", "set number of context lines, before & after" },
121    { 'n', "line-number",  "print line number with output lines" },    { OP_NODATA, 'c', NULL,              "count",         "print only a count of matching lines per FILE" },
122    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_STRING, 'f', &pattern_filename, "file=path",     "read patterns from file" },
123    { 's', "no-messages",  "suppress error messages" },    { OP_NODATA, 'h', NULL,              "no-filename",   "suppress the prefixing filename on output" },
124    { 'V', "version",      "print version information and exit" },    { OP_NODATA, 'i', NULL,              "ignore-case",   "ignore case distinctions" },
125    { 'v', "invert-match", "select non-matching lines" },    { OP_NODATA, 'l', NULL,              "files-with-matches", "print only FILE names containing matches" },
126    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_NODATA, 'L', NULL,              "files-without-match","print only FILE names not containing matches" },
127    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_STRING, -1,  &stdin_name,       "label=name",    "set name for standard input" },
128    { 0,    NULL,           NULL }    { OP_NODATA, 'M', NULL,              "multiline",     "run in multiline mode" },
129      { OP_NODATA, 'n', NULL,              "line-number",   "print line number with output lines" },
130      { OP_NODATA, 'q', NULL,              "quiet",         "suppress output, just set return code" },
131      { OP_NODATA, 'r', NULL,              "recursive",     "recursively scan sub-directories" },
132      { OP_STRING, -1,  &exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
133      { OP_STRING, -1,  &include_pattern,  "include=pattern","include matching files when recursing" },
134      { OP_NODATA, 's', NULL,              "no-messages",   "suppress error messages" },
135      { OP_NODATA, 'u', NULL,              "utf-8",         "use UTF-8 mode" },
136      { OP_NODATA, 'V', NULL,              "version",       "print version information and exit" },
137      { OP_NODATA, 'v', NULL,              "invert-match",  "select non-matching lines" },
138      { OP_NODATA, 'w', NULL,              "word-regex(p)", "force PATTERN to match only as a word"  },
139      { OP_NODATA, 'x', NULL,              "line-regex(p)", "force PATTERN to match only whole lines" },
140      { OP_NODATA, 0,   NULL,               NULL,            NULL }
141  };  };
142    
143    
# Line 70  static option_item optionlist[] = { Line 146  static option_item optionlist[] = {
146  *************************************************/  *************************************************/
147    
148  /* These functions are defined so that they can be made system specific,  /* These functions are defined so that they can be made system specific,
149  although at present the only ones are for Unix, and for "no directory recursion  although at present the only ones are for Unix, Win32, and for "no directory
150  support". */  recursion support". */
151    
152    
153  /************* Directory scanning in Unix ***********/  /************* Directory scanning in Unix ***********/
# Line 83  support". */ Line 159  support". */
159    
160  typedef DIR directory_type;  typedef DIR directory_type;
161    
162  int  static int
163  isdirectory(char *filename)  isdirectory(char *filename)
164  {  {
165  struct stat statbuf;  struct stat statbuf;
# Line 92  if (stat(filename, &statbuf) < 0) Line 168  if (stat(filename, &statbuf) < 0)
168  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;  return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
169  }  }
170    
171  directory_type *  static directory_type *
172  opendirectory(char *filename)  opendirectory(char *filename)
173  {  {
174  return opendir(filename);  return opendir(filename);
175  }  }
176    
177  char *  static char *
178  readdirectory(directory_type *dir)  readdirectory(directory_type *dir)
179  {  {
180  for (;;)  for (;;)
# Line 111  for (;;) Line 187  for (;;)
187  return NULL;   /* Keep compiler happy; never executed */  return NULL;   /* Keep compiler happy; never executed */
188  }  }
189    
190  void  static void
191  closedirectory(directory_type *dir)  closedirectory(directory_type *dir)
192  {  {
193  closedir(dir);  closedir(dir);
194  }  }
195    
196    
197  #else  /************* Directory scanning in Win32 ***********/
198    
199    /* I (Philip Hazel) have no means of testing this code. It was contributed by
200    Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
201    when it did not exist. */
202    
203    
204    #elif HAVE_WIN32API
205    
206    #ifndef STRICT
207    # define STRICT
208    #endif
209    #ifndef WIN32_LEAN_AND_MEAN
210    # define WIN32_LEAN_AND_MEAN
211    #endif
212    #ifndef INVALID_FILE_ATTRIBUTES
213    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
214    #endif
215    
216    #include <windows.h>
217    
218    typedef struct directory_type
219    {
220    HANDLE handle;
221    BOOL first;
222    WIN32_FIND_DATA data;
223    } directory_type;
224    
225    int
226    isdirectory(char *filename)
227    {
228    DWORD attr = GetFileAttributes(filename);
229    if (attr == INVALID_FILE_ATTRIBUTES)
230      return 0;
231    return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
232    }
233    
234    directory_type *
235    opendirectory(char *filename)
236    {
237    size_t len;
238    char *pattern;
239    directory_type *dir;
240    DWORD err;
241    len = strlen(filename);
242    pattern = (char *) malloc(len + 3);
243    dir = (directory_type *) malloc(sizeof(*dir));
244    if ((pattern == NULL) || (dir == NULL))
245      {
246      fprintf(stderr, "pcregrep: malloc failed\n");
247      exit(2);
248      }
249    memcpy(pattern, filename, len);
250    memcpy(&(pattern[len]), "\\*", 3);
251    dir->handle = FindFirstFile(pattern, &(dir->data));
252    if (dir->handle != INVALID_HANDLE_VALUE)
253      {
254      free(pattern);
255      dir->first = TRUE;
256      return dir;
257      }
258    err = GetLastError();
259    free(pattern);
260    free(dir);
261    errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
262    return NULL;
263    }
264    
265    char *
266    readdirectory(directory_type *dir)
267    {
268    for (;;)
269      {
270      if (!dir->first)
271        {
272        if (!FindNextFile(dir->handle, &(dir->data)))
273          return NULL;
274        }
275      else
276        {
277        dir->first = FALSE;
278        }
279      if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
280        return dir->data.cFileName;
281      }
282    #ifndef _MSC_VER
283    return NULL;   /* Keep compiler happy; never executed */
284    #endif
285    }
286    
287    void
288    closedirectory(directory_type *dir)
289    {
290    FindClose(dir->handle);
291    free(dir);
292    }
293    
294    
295  /************* Directory scanning when we can't do it ***********/  /************* Directory scanning when we can't do it ***********/
296    
297  /* The type is void, and apart from isdirectory(), the functions do nothing. */  /* The type is void, and apart from isdirectory(), the functions do nothing. */
298    
299    #else
300    
301  typedef void directory_type;  typedef void directory_type;
302    
303  int isdirectory(char *filename) { return FALSE; }  int isdirectory(char *filename) { return FALSE; }
# Line 159  return sys_errlist[n]; Line 332  return sys_errlist[n];
332    
333    
334  /*************************************************  /*************************************************
335  *              Grep an individual file           *  *       Print the previous "after" lines         *
336  *************************************************/  *************************************************/
337    
338    /* This is called if we are about to lose said lines because of buffer filling,
339    and at the end of the file.
340    
341    Arguments:
342      lastmatchnumber   the number of the last matching line, plus one
343      lastmatchrestart  where we restarted after the last match
344      endptr            end of available data
345      printname         filename for printing
346    
347    Returns:            nothing
348    */
349    
350    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
351      char *endptr, char *printname)
352    {
353    if (after_context > 0 && lastmatchnumber > 0)
354      {
355      int count = 0;
356      while (lastmatchrestart < endptr && count++ < after_context)
357        {
358        char *pp = lastmatchrestart;
359        if (printname != NULL) fprintf(stdout, "%s-", printname);
360        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
361        while (*pp != '\n') pp++;
362        fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
363        lastmatchrestart = pp + 1;
364        }
365      hyphenpending = TRUE;
366      }
367    }
368    
369    
370    
371    /*************************************************
372    *            Grep an individual file             *
373    *************************************************/
374    
375    /* This is called from grep_or_recurse() below. It uses a buffer that is three
376    times the value of MBUFTHIRD. The matching point is never allowed to stray into
377    the top third of the buffer, thus keeping more of the file available for
378    context printing or for multiline scanning. For large files, the pointer will
379    be in the middle third most of the time, so the bottom third is available for
380    "before" context printing.
381    
382    Arguments:
383      in           the fopened FILE stream
384      printname    the file name if it is to be printed for each match
385                   or NULL if the file name is not to be printed
386                   it cannot be NULL if filenames[_nomatch]_only is set
387    
388    Returns:       0 if there was at least one match
389                   1 otherwise (no matches)
390    */
391    
392  static int  static int
393  pcregrep(FILE *in, char *name)  pcregrep(FILE *in, char *printname)
394  {  {
395  int rc = 1;  int rc = 1;
396  int linenumber = 0;  int linenumber = 1;
397    int lastmatchnumber = 0;
398  int count = 0;  int count = 0;
399  int offsets[99];  int offsets[99];
400  char buffer[BUFSIZ];  char *lastmatchrestart = NULL;
401    char buffer[3*MBUFTHIRD];
402    char *ptr = buffer;
403    char *endptr;
404    size_t bufflength;
405    BOOL endhyphenpending = FALSE;
406    
407    /* Do the first read into the start of the buffer and set up the pointer to
408    end of what we have. */
409    
410    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
411    endptr = buffer + bufflength;
412    
413    /* Loop while the current pointer is not at the end of the file. For large
414    files, endptr will be at the end of the buffer when we are in the middle of the
415    file, but ptr will never get there, because as soon as it gets over 2/3 of the
416    way, the buffer is shifted left and re-filled. */
417    
418  while (fgets(buffer, sizeof(buffer), in) != NULL)  while (ptr < endptr)
419    {    {
   BOOL match = FALSE;  
420    int i;    int i;
421    int length = (int)strlen(buffer);    BOOL match = FALSE;
422    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    char *t = ptr;
423    linenumber++;    size_t length, linelength;
424    
425      /* At this point, ptr is at the start of a line. We need to find the length
426      of the subject string to pass to pcre_exec(). In multiline mode, it is the
427      length remainder of the data in the buffer. Otherwise, it is the length of
428      the next line. After matching, we always advance by the length of the next
429      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
430      that any match is constrained to be in the first line. */
431    
432      linelength = 0;
433      while (t < endptr && *t++ != '\n') linelength++;
434      length = multiline? endptr - ptr : linelength;
435    
436      /* Run through all the patterns until one matches. Note that we don't include
437      the final newline in the subject string. */
438    
439    for (i = 0; !match && i < pattern_count; i++)    for (i = 0; !match && i < pattern_count; i++)
440      {      {
441      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
442        offsets, 99) >= 0;        offsets, 99) >= 0;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
443      }      }
444    
445      /* If it's a match or a not-match (as required), print what's wanted. */
446    
447    if (match != invert)    if (match != invert)
448      {      {
449        BOOL hyphenprinted = FALSE;
450    
451        if (filenames_nomatch_only) return 1;
452    
453      if (count_only) count++;      if (count_only) count++;
454    
455      else if (filenames_only)      else if (filenames_only)
456        {        {
457        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        fprintf(stdout, "%s\n", printname);
458        return 0;        return 0;
459        }        }
460    
461      else if (silent) return 0;      else if (quiet) return 0;
462    
463      else      else
464        {        {
465        if (name != NULL) fprintf(stdout, "%s:", name);        /* See if there is a requirement to print some "after" lines from a
466          previous match. We never print any overlaps. */
467    
468          if (after_context > 0 && lastmatchnumber > 0)
469            {
470            int linecount = 0;
471            char *p = lastmatchrestart;
472    
473            while (p < ptr && linecount < after_context)
474              {
475              while (*p != '\n') p++;
476              p++;
477              linecount++;
478              }
479    
480            /* It is important to advance lastmatchrestart during this printing so
481            that it interacts correctly with any "before" printing below. */
482    
483            while (lastmatchrestart < p)
484              {
485              char *pp = lastmatchrestart;
486              if (printname != NULL) fprintf(stdout, "%s-", printname);
487              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
488              while (*pp != '\n') pp++;
489              fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
490              lastmatchrestart = pp + 1;
491              }
492            if (lastmatchrestart != ptr) hyphenpending = TRUE;
493            }
494    
495          /* If there were non-contiguous lines printed above, insert hyphens. */
496    
497          if (hyphenpending)
498            {
499            fprintf(stdout, "--\n");
500            hyphenpending = FALSE;
501            hyphenprinted = TRUE;
502            }
503    
504          /* See if there is a requirement to print some "before" lines for this
505          match. Again, don't print overlaps. */
506    
507          if (before_context > 0)
508            {
509            int linecount = 0;
510            char *p = ptr;
511    
512            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
513                   linecount++ < before_context)
514              {
515              p--;
516              while (p > buffer && p[-1] != '\n') p--;
517              }
518    
519            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
520              fprintf(stdout, "--\n");
521    
522            while (p < ptr)
523              {
524              char *pp = p;
525              if (printname != NULL) fprintf(stdout, "%s-", printname);
526              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
527              while (*pp != '\n') pp++;
528              fprintf(stdout, "%.*s", pp - p + 1, p);
529              p = pp + 1;
530              }
531            }
532    
533          /* Now print the matching line(s); ensure we set hyphenpending at the end
534          of the file if any context lines are being output. */
535    
536          if (after_context > 0 || before_context > 0)
537            endhyphenpending = TRUE;
538    
539          if (printname != NULL) fprintf(stdout, "%s:", printname);
540        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
541        fprintf(stdout, "%s\n", buffer);  
542          /* In multiline mode, we want to print to the end of the line in which
543          the end of the matched string is found, so we adjust linelength and the
544          line number appropriately. Because the PCRE_FIRSTLINE option is set, the
545          start of the match will always be before the first \n character. */
546    
547          if (multiline)
548            {
549            char *endmatch = ptr + offsets[1];
550            t = ptr;
551            while (t < endmatch) { if (*t++ == '\n') linenumber++; }
552            while (endmatch < endptr && *endmatch != '\n') endmatch++;
553            linelength = endmatch - ptr;
554            }
555    
556          fprintf(stdout, "%.*s\n", linelength, ptr);
557          }
558    
559        rc = 0;    /* Had some success */
560    
561        /* Remember where the last match happened for after_context. We remember
562        where we are about to restart, and that line's number. */
563    
564        lastmatchrestart = ptr + linelength + 1;
565        lastmatchnumber = linenumber + 1;
566        }
567    
568      /* Advance to after the newline and increment the line number. */
569    
570      ptr += linelength + 1;
571      linenumber++;
572    
573      /* If we haven't yet reached the end of the file (the buffer is full), and
574      the current point is in the top 1/3 of the buffer, slide the buffer down by
575      1/3 and refill it. Before we do this, if some unprinted "after" lines are
576      about to be lost, print them. */
577    
578      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
579        {
580        if (after_context > 0 &&
581            lastmatchnumber > 0 &&
582            lastmatchrestart < buffer + MBUFTHIRD)
583          {
584          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
585          lastmatchnumber = 0;
586        }        }
587    
588      rc = 0;      /* Now do the shuffle */
589    
590        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
591        ptr -= MBUFTHIRD;
592        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
593        endptr = buffer + bufflength;
594    
595        /* Adjust any last match point */
596    
597        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
598      }      }
599      }     /* Loop through the whole file */
600    
601    /* End of file; print final "after" lines if wanted; do_after_lines sets
602    hyphenpending if it prints something. */
603    
604    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
605    hyphenpending |= endhyphenpending;
606    
607    /* Print the file name if we are looking for those without matches and there
608    were none. If we found a match, we won't have got this far. */
609    
610    if (filenames_nomatch_only)
611      {
612      fprintf(stdout, "%s\n", printname);
613      return 0;
614    }    }
615    
616    /* Print the match count if wanted */
617    
618  if (count_only)  if (count_only)
619    {    {
620    if (name != NULL) fprintf(stdout, "%s:", name);    if (printname != NULL) fprintf(stdout, "%s:", printname);
621    fprintf(stdout, "%d\n", count);    fprintf(stdout, "%d\n", count);
622    }    }
623    
# Line 220  return rc; Line 626  return rc;
626    
627    
628    
   
629  /*************************************************  /*************************************************
630  *     Grep a file or recurse into a directory    *  *     Grep a file or recurse into a directory    *
631  *************************************************/  *************************************************/
632    
633    /* Given a path name, if it's a directory, scan all the files if we are
634    recursing; if it's a file, grep it.
635    
636    Arguments:
637      pathname          the path to investigate
638      dir_recurse       TRUE if recursing is wanted (-r)
639      show_filenames    TRUE if file names are wanted for multiple files, except
640                          for the only file at top level when not filenames_only
641      only_one_at_top   TRUE if the path is the only one at toplevel
642    
643    Returns:   0 if there was at least one match
644               1 if there were no matches
645               2 there was some kind of error
646    
647    However, file opening failures are suppressed if "silent" is set.
648    */
649    
650  static int  static int
651  grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,
652    BOOL only_one_at_top)    BOOL only_one_at_top)
653  {  {
654  int rc = 1;  int rc = 1;
655  int sep;  int sep;
656  FILE *in;  FILE *in;
657    char *printname;
658    
659    /* If the file name is "-" we scan stdin */
660    
661  /* If the file is a directory and we are recursing, scan each file within it.  if (strcmp(pathname, "-") == 0)
662  The scanning code is localized so it can be made system-specific. */    {
663      return pcregrep(stdin,
664        (filenames_only || filenames_nomatch_only ||
665        (show_filenames && !only_one_at_top))?
666          stdin_name : NULL);
667      }
668    
669  if ((sep = isdirectory(filename)) != 0 && recurse)  /* If the file is a directory and we are recursing, scan each file within it,
670    subject to any include or exclude patterns that were set. The scanning code is
671    localized so it can be made system-specific. */
672    
673    if ((sep = isdirectory(pathname)) != 0 && dir_recurse)
674    {    {
675    char buffer[1024];    char buffer[1024];
676    char *nextfile;    char *nextfile;
677    directory_type *dir = opendirectory(filename);    directory_type *dir = opendirectory(pathname);
678    
679    if (dir == NULL)    if (dir == NULL)
680      {      {
681      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,      if (!silent)
682        strerror(errno));        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
683            strerror(errno));
684      return 2;      return 2;
685      }      }
686    
687    while ((nextfile = readdirectory(dir)) != NULL)    while ((nextfile = readdirectory(dir)) != NULL)
688      {      {
689      int frc;      int frc, blen;
690      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
691      frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);      blen = strlen(buffer);
692      if (frc == 0 && rc == 1) rc = 0;  
693        if (exclude_compiled != NULL &&
694            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
695          continue;
696    
697        if (include_compiled != NULL &&
698            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
699          continue;
700    
701        frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);
702        if (frc > 1) rc = frc;
703         else if (frc == 0 && rc == 1) rc = 0;
704      }      }
705    
706    closedirectory(dir);    closedirectory(dir);
# Line 262  if ((sep = isdirectory(filename)) != 0 & Line 708  if ((sep = isdirectory(filename)) != 0 &
708    }    }
709    
710  /* If the file is not a directory, or we are not recursing, scan it. If this is  /* If the file is not a directory, or we are not recursing, scan it. If this is
711  the first and only argument at top level, we don't show the file name.  the first and only argument at top level, we don't show the file name (unless
712  Otherwise, control is via the show_filenames variable. */  we are only showing the file name). Otherwise, control is via the
713    show_filenames variable. */
714    
715  in = fopen(filename, "r");  in = fopen(pathname, "r");
716  if (in == NULL)  if (in == NULL)
717    {    {
718    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));    if (!silent)
719        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
720          strerror(errno));
721    return 2;    return 2;
722    }    }
723    
724  rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);  printname =  (filenames_only || filenames_nomatch_only ||
725      (show_filenames && !only_one_at_top))? pathname : NULL;
726    
727    rc = pcregrep(in, printname);
728    
729  fclose(in);  fclose(in);
730  return rc;  return rc;
731  }  }
# Line 287  return rc; Line 740  return rc;
740  static int  static int
741  usage(int rc)  usage(int rc)
742  {  {
743  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");  fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");
744  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information.\n");
745  return rc;  return rc;
746  }  }
# Line 304  help(void) Line 757  help(void)
757  {  {
758  option_item *op;  option_item *op;
759    
760  printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
761  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
762    printf("PATTERN must be present if -f is not used.\n");
763    printf("\"-\" can be used as a file name to mean STDIN.\n");
764  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
765    
766  printf("Options:\n");  printf("Options:\n");
# Line 321  for (op = optionlist; op->one_char != 0; Line 776  for (op = optionlist; op->one_char != 0;
776    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
777    }    }
778    
779  printf("\n  -f<filename>  or  --file=<filename>\n");  printf("\nWhen reading patterns from a file instead of using a command line option,\n");
780  printf("    Read patterns from <filename> instead of using a command line option.\n");  printf("trailing white space is removed and blank lines are ignored.\n");
781  printf("    Trailing white space is removed; blanks lines are ignored.\n");  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
782    
783  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
784  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
785  }  }
786    
# Line 334  printf("Exit status is 0 if any matches, Line 788  printf("Exit status is 0 if any matches,
788    
789    
790  /*************************************************  /*************************************************
791  *                Handle an option                *  *    Handle a single-letter, no data option      *
792  *************************************************/  *************************************************/
793    
794  static int  static int
# Line 346  switch(letter) Line 800  switch(letter)
800    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
801    case 'h': filenames = FALSE; break;    case 'h': filenames = FALSE; break;
802    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
803    case 'l': filenames_only = TRUE;    case 'l': filenames_only = TRUE; break;
804      case 'L': filenames_nomatch_only = TRUE; break;
805      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
806    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
807      case 'q': quiet = TRUE; break;
808    case 'r': recurse = TRUE; break;    case 'r': recurse = TRUE; break;
809    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
810      case 'u': options |= PCRE_UTF8; break;
811    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
812    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;    case 'w': word_match = TRUE; break;
813      case 'x': whole_lines = TRUE; break;
814    
815    case 'V':    case 'V':
816    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s using ", VERSION);
# Line 374  return options; Line 833  return options;
833  *                Main program                    *  *                Main program                    *
834  *************************************************/  *************************************************/
835    
836    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
837    
838  int  int
839  main(int argc, char **argv)  main(int argc, char **argv)
840  {  {
# Line 388  BOOL only_one_at_top; Line 849  BOOL only_one_at_top;
849    
850  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
851    {    {
852      option_item *op = NULL;
853      char *option_data = (char *)"";    /* default to keep compiler happy */
854      BOOL longop;
855      BOOL longopwasequals = FALSE;
856    
857    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
858    
859    /* Long name options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
860      but only if we have previously had -f to define the patterns. */
861    
862      if (argv[i][1] == 0)
863        {
864        if (pattern_filename != NULL) break;
865          else exit(usage(2));
866        }
867    
868      /* Handle a long name option, or -- to terminate the options */
869    
870    if (argv[i][1] == '-')    if (argv[i][1] == '-')
871      {      {
872      option_item *op;      char *arg = argv[i] + 2;
873        char *argequals = strchr(arg, '=');
874    
875      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
876        {        {
877        pattern_filename = argv[i] + 7;        i++;
878        continue;        break;                /* out of the options-handling loop */
879        }        }
880    
881        longop = TRUE;
882    
883        /* Some long options have data that follows after =, for example file=name.
884        Some options have variations in the long name spelling: specifically, we
885        allow "regexp" because GNU grep allows it, though I personally go along
886        with Jeff Friedl in preferring "regex" without the "p". These options are
887        entered in the table as "regex(p)". No option is in both these categories,
888        fortunately. */
889    
890      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
891        {        {
892        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
893          char *equals = strchr(op->long_name, '=');
894          if (opbra == NULL)     /* Not a (p) case */
895          {          {
896          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
897          break;            {
898              if (strcmp(arg, op->long_name) == 0) break;
899              }
900            else                 /* Special case xxx=data */
901              {
902              int oplen = equals - op->long_name;
903              int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
904              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
905                {
906                option_data = arg + arglen;
907                if (*option_data == '=')
908                  {
909                  option_data++;
910                  longopwasequals = TRUE;
911                  }
912                break;
913                }
914              }
915            }
916          else                   /* Special case xxxx(p) */
917            {
918            char buff1[24];
919            char buff2[24];
920            int baselen = opbra - op->long_name;
921            sprintf(buff1, "%.*s", baselen, op->long_name);
922            sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
923              opbra + 1);
924            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
925              break;
926          }          }
927        }        }
928    
929      if (op->one_char == 0)      if (op->one_char == 0)
930        {        {
931        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 417  for (i = 1; i < argc; i++) Line 933  for (i = 1; i < argc; i++)
933        }        }
934      }      }
935    
936    /* One-char options */    /* One-char options; many that have no data may be in a single argument; we
937      continue till we hit the last one or one that needs data. */
938    
939    else    else
940      {      {
941      char *s = argv[i] + 1;      char *s = argv[i] + 1;
942        longop = FALSE;
943      while (*s != 0)      while (*s != 0)
944        {        {
945        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
946            { if (*s == op->one_char) break; }
947          if (op->one_char == 0)
948          {          {
949          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
950          if (pattern_filename[0] == 0)            *s, argv[i]);
951            {          exit(usage(2));
952            if (i >= argc - 1)          }
953              {        if (op->type != OP_NODATA || s[1] == 0)
954              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
955              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
956          break;          break;
957          }          }
958        else options = handle_option(*s++, options);        options = handle_option(*s++, options);
959        }        }
960      }      }
961    
962      /* At this point we should have op pointing to a matched option */
963    
964      if (op->type == OP_NODATA)
965        options = handle_option(op->one_char, options);
966      else
967        {
968        if (*option_data == 0)
969          {
970          if (i >= argc - 1 || longopwasequals)
971            {
972            fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
973            exit(usage(2));
974            }
975          option_data = argv[++i];
976          }
977    
978        if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else
979          {
980          char *endptr;
981          int n = strtoul(option_data, &endptr, 10);
982          if (*endptr != 0)
983            {
984            if (longop)
985              fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",
986                option_data, op->long_name);
987            else
988              fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
989                option_data, op->one_char);
990            exit(usage(2));
991            }
992          *((int *)op->dataptr) = n;
993          }
994        }
995      }
996    
997    /* Options have been decoded. If -C was used, its value is used as a default
998    for -A and -B. */
999    
1000    if (both_context > 0)
1001      {
1002      if (after_context == 0) after_context = both_context;
1003      if (before_context == 0) before_context = both_context;
1004    }    }
1005    
1006  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1007  hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1008    
1009  if (pattern_list == NULL || hints_list == NULL)  if (pattern_list == NULL || hints_list == NULL)
1010    {    {
# Line 457  if (pattern_list == NULL || hints_list = Line 1017  if (pattern_list == NULL || hints_list =
1017  if (pattern_filename != NULL)  if (pattern_filename != NULL)
1018    {    {
1019    FILE *f = fopen(pattern_filename, "r");    FILE *f = fopen(pattern_filename, "r");
1020    char buffer[BUFSIZ];    char buffer[MBUFTHIRD + 16];
1021      char *rdstart;
1022      int adjust = 0;
1023    
1024    if (f == NULL)    if (f == NULL)
1025      {      {
1026      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1027        strerror(errno));        strerror(errno));
1028      return 2;      return 2;
1029      }      }
1030    while (fgets(buffer, sizeof(buffer), f) != NULL)  
1031      if (whole_lines)
1032        {
1033        strcpy(buffer, "^(?:");
1034        adjust = 4;
1035        }
1036      else if (word_match)
1037        {
1038        strcpy(buffer, "\\b");
1039        adjust = 2;
1040        }
1041    
1042      rdstart = buffer + adjust;
1043      while (fgets(rdstart, MBUFTHIRD, f) != NULL)
1044      {      {
1045      char *s = buffer + (int)strlen(buffer);      char *s = rdstart + (int)strlen(rdstart);
1046      if (pattern_count >= MAX_PATTERN_COUNT)      if (pattern_count >= MAX_PATTERN_COUNT)
1047        {        {
1048        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
1049          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
1050        return 2;        return 2;
1051        }        }
1052      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;
1053      if (s == buffer) continue;      if (s == rdstart) continue;
1054      *s = 0;      if (whole_lines) strcpy(s, ")$");
1055          else if (word_match)strcpy(s, "\\b");
1056            else *s = 0;
1057      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
1058        &errptr, NULL);        &errptr, NULL);
1059      if (pattern_list[pattern_count++] == NULL)      if (pattern_list[pattern_count++] == NULL)
1060        {        {
1061        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
1062          pattern_count, errptr, error);          pattern_count, errptr - adjust, error);
1063        return 2;        return 2;
1064        }        }
1065      }      }
1066    fclose(f);    fclose(f);
1067    }    }
1068    
1069  /* If no file name, a single regex must be given inline */  /* If no file name, a single regex must be given inline. */
1070    
1071  else  else
1072    {    {
1073    if (i >= argc) return usage(0);    char buffer[MBUFTHIRD + 16];
1074    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);    char *pat;
1075      int adjust = 0;
1076    
1077      if (i >= argc) return usage(2);
1078    
1079      if (whole_lines)
1080        {
1081        sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);
1082        pat = buffer;
1083        adjust = 4;
1084        }
1085      else if (word_match)
1086        {
1087        sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);
1088        pat = buffer;
1089        adjust = 2;
1090        }
1091      else pat = argv[i++];
1092    
1093      pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);
1094    
1095    if (pattern_list[0] == NULL)    if (pattern_list[0] == NULL)
1096      {      {
1097      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",
1098        error);        errptr - adjust, error);
1099      return 2;      return 2;
1100      }      }
1101    pattern_count++;    pattern_count++;
1102    }    }
1103    
1104  /* Study the regular expressions, as we will be running them may times */  /* Study the regular expressions, as we will be running them many times */
1105    
1106  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
1107    {    {
# Line 517  for (j = 0; j < pattern_count; j++) Line 1115  for (j = 0; j < pattern_count; j++)
1115      }      }
1116    }    }
1117    
1118    /* If there are include or exclude patterns, compile them. */
1119    
1120    if (exclude_pattern != NULL)
1121      {
1122      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);
1123      if (exclude_compiled == NULL)
1124        {
1125        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1126          errptr, error);
1127        return 2;
1128        }
1129      }
1130    
1131    if (include_pattern != NULL)
1132      {
1133      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);
1134      if (include_compiled == NULL)
1135        {
1136        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1137          errptr, error);
1138        return 2;
1139        }
1140      }
1141    
1142  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit */
1143    
1144  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc) return pcregrep(stdin,
1145      (filenames_only || filenames_nomatch_only)? stdin_name : NULL);
1146    
1147  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
1148  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
1149  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames_only is not set.
1150    */
1151    
1152  only_one_at_top = (i == argc - 1);  only_one_at_top = (i == argc - 1);
 if (filenames_only) filenames = TRUE;  
1153    
1154  for (; i < argc; i++)  for (; i < argc; i++)
1155    {    {
1156    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
1157    if (frc == 0 && rc == 1) rc = 0;    if (frc > 1) rc = frc;
1158        else if (frc == 0 && rc == 1) rc = 0;
1159    }    }
1160    
1161  return rc;  return rc;
1162  }  }
1163    
1164  /* End */  /* End of pcregrep */

Legend:
Removed from v.53  
changed lines
  Added in v.85

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12