/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* This is a grep program that uses the PCRE regular expression library to do  /* This is a grep program that uses the PCRE regular expression library to do
6  its pattern matching. */  its pattern matching. On a Unix system it can recurse into directories. */
7    
8    #include <ctype.h>
9  #include <stdio.h>  #include <stdio.h>
10  #include <string.h>  #include <string.h>
11  #include <stdlib.h>  #include <stdlib.h>
# Line 17  its pattern matching. */ Line 18  its pattern matching. */
18    
19  typedef int BOOL;  typedef int BOOL;
20    
21    #define VERSION "2.0 01-Aug-2001"
22    #define MAX_PATTERN_COUNT 100
23    
24    
25  /*************************************************  /*************************************************
26  *               Global variables                 *  *               Global variables                 *
27  *************************************************/  *************************************************/
28    
29  static pcre *pattern;  static char *pattern_filename = NULL;
30  static pcre_extra *hints;  static int  pattern_count = 0;
31    static pcre **pattern_list;
32    static pcre_extra **hints_list;
33    
34  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
35    static BOOL filenames = TRUE;
36  static BOOL filenames_only = FALSE;  static BOOL filenames_only = FALSE;
37  static BOOL invert = FALSE;  static BOOL invert = FALSE;
38  static BOOL number = FALSE;  static BOOL number = FALSE;
39    static BOOL recurse = FALSE;
40  static BOOL silent = FALSE;  static BOOL silent = FALSE;
41  static BOOL whole_lines = FALSE;  static BOOL whole_lines = FALSE;
42    
43    /* Structure for options and list of them */
44    
45    typedef struct option_item {
46      int one_char;
47      char *long_name;
48      char *help_text;
49    } option_item;
50    
51    static option_item optionlist[] = {
52      { -1,  "help",         "display this help and exit" },
53      { 'c', "count",        "print only a count of matching lines per FILE" },
54      { 'h', "no-filename",  "suppress the prefixing filename on output" },
55      { 'i', "ignore-case",  "ignore case distinctions" },
56      { 'l', "files-with-matches", "print only FILE names containing matches" },
57      { 'n', "line-number",  "print line number with output lines" },
58      { 'r', "recursive",    "recursively scan sub-directories" },
59      { 's', "no-messages",  "suppress error messages" },
60      { 'V', "version",      "print version information and exit" },
61      { 'v', "invert-match", "select non-matching lines" },
62      { 'x', "line-regex",   "force PATTERN to match only whole lines" },
63      { 'x', "line-regexp",  "force PATTERN to match only whole lines" },
64      { 0,    NULL,           NULL }
65    };
66    
67    
68    /*************************************************
69    *       Functions for directory scanning         *
70    *************************************************/
71    
72    /* These functions are defined so that they can be made system specific,
73    although at present the only ones are for Unix, and for "no directory recursion
74    support". */
75    
76    
77    /************* Directory scanning in Unix ***********/
78    
79    #if IS_UNIX
80    #include <sys/types.h>
81    #include <sys/stat.h>
82    #include <dirent.h>
83    
84    typedef DIR directory_type;
85    
86    int
87    isdirectory(char *filename)
88    {
89    struct stat statbuf;
90    if (stat(filename, &statbuf) < 0)
91      return 0;        /* In the expectation that opening as a file will fail */
92    return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
93    }
94    
95    directory_type *
96    opendirectory(char *filename)
97    {
98    return opendir(filename);
99    }
100    
101    char *
102    readdirectory(directory_type *dir)
103    {
104    for (;;)
105      {
106      struct dirent *dent = readdir(dir);
107      if (dent == NULL) return NULL;
108      if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
109        return dent->d_name;
110      }
111    return NULL;   /* Keep compiler happy; never executed */
112    }
113    
114    void
115    closedirectory(directory_type *dir)
116    {
117    closedir(dir);
118    }
119    
120    
121    #else
122    
123    
124    /************* Directory scanning when we can't do it ***********/
125    
126    /* The type is void, and apart from isdirectory(), the functions do nothing. */
127    
128    typedef void directory_type;
129    
130    int isdirectory(char *filename) { return FALSE; }
131    directory_type * opendirectory(char *filename) {}
132    char *readdirectory(directory_type *dir) {}
133    void closedirectory(directory_type *dir) {}
134    
135    #endif
136    
137    
138    
139  #if ! HAVE_STRERROR  #if ! HAVE_STRERROR
# Line 72  char buffer[BUFSIZ]; Line 173  char buffer[BUFSIZ];
173    
174  while (fgets(buffer, sizeof(buffer), in) != NULL)  while (fgets(buffer, sizeof(buffer), in) != NULL)
175    {    {
176    BOOL match;    BOOL match = FALSE;
177      int i;
178    int length = (int)strlen(buffer);    int length = (int)strlen(buffer);
179    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;
180    linenumber++;    linenumber++;
181    
182    match = pcre_exec(pattern, hints, buffer, length, 0, 0, offsets, 99) >= 0;    for (i = 0; !match && i < pattern_count; i++)
183    if (match && whole_lines && offsets[1] != length) match = FALSE;      {
184        match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,
185          offsets, 99) >= 0;
186        if (match && whole_lines && offsets[1] != length) match = FALSE;
187        }
188    
189    if (match != invert)    if (match != invert)
190      {      {
# Line 116  return rc; Line 222  return rc;
222    
223    
224  /*************************************************  /*************************************************
225    *     Grep a file or recurse into a directory    *
226    *************************************************/
227    
228    static int
229    grep_or_recurse(char *filename, BOOL recurse, BOOL show_filenames,
230      BOOL only_one_at_top)
231    {
232    int rc = 1;
233    int sep;
234    FILE *in;
235    
236    /* If the file is a directory and we are recursing, scan each file within it.
237    The scanning code is localized so it can be made system-specific. */
238    
239    if ((sep = isdirectory(filename)) != 0 && recurse)
240      {
241      char buffer[1024];
242      char *nextfile;
243      directory_type *dir = opendirectory(filename);
244    
245      if (dir == NULL)
246        {
247        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,
248          strerror(errno));
249        return 2;
250        }
251    
252      while ((nextfile = readdirectory(dir)) != NULL)
253        {
254        int frc;
255        sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);
256        frc = grep_or_recurse(buffer, recurse, TRUE, FALSE);
257        if (frc == 0 && rc == 1) rc = 0;
258        }
259    
260      closedirectory(dir);
261      return rc;
262      }
263    
264    /* If the file is not a directory, or we are not recursing, scan it. If this is
265    the first and only argument at top level, we don't show the file name.
266    Otherwise, control is via the show_filenames variable. */
267    
268    in = fopen(filename, "r");
269    if (in == NULL)
270      {
271      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));
272      return 2;
273      }
274    
275    rc = pcregrep(in, (show_filenames && !only_one_at_top)? filename : NULL);
276    fclose(in);
277    return rc;
278    }
279    
280    
281    
282    
283    /*************************************************
284  *                Usage function                  *  *                Usage function                  *
285  *************************************************/  *************************************************/
286    
287  static int  static int
288  usage(int rc)  usage(int rc)
289  {  {
290  fprintf(stderr, "Usage: pcregrep [-Vchilnsvx] pattern [file] ...\n");  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] pattern [file] ...\n");
291    fprintf(stderr, "Type `pcregrep --help' for more information.\n");
292  return rc;  return rc;
293  }  }
294    
# Line 130  return rc; Line 296  return rc;
296    
297    
298  /*************************************************  /*************************************************
299    *                Help function                   *
300    *************************************************/
301    
302    static void
303    help(void)
304    {
305    option_item *op;
306    
307    printf("Usage: pcregrep [OPTION]... PATTERN [FILE] ...\n");
308    printf("Search for PATTERN in each FILE or standard input.\n");
309    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
310    
311    printf("Options:\n");
312    
313    for (op = optionlist; op->one_char != 0; op++)
314      {
315      int n;
316      char s[4];
317      if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
318      printf("  %s --%s%n", s, op->long_name, &n);
319      n = 30 - n;
320      if (n < 1) n = 1;
321      printf("%.*s%s\n", n, "                    ", op->help_text);
322      }
323    
324    printf("\n  -f<filename>  or  --file=<filename>\n");
325    printf("    Read patterns from <filename> instead of using a command line option.\n");
326    printf("    Trailing white space is removed; blanks lines are ignored.\n");
327    printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
328    
329    printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");
330    printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
331    }
332    
333    
334    
335    
336    /*************************************************
337    *                Handle an option                *
338    *************************************************/
339    
340    static int
341    handle_option(int letter, int options)
342    {
343    switch(letter)
344      {
345      case -1:  help(); exit(0);
346      case 'c': count_only = TRUE; break;
347      case 'h': filenames = FALSE; break;
348      case 'i': options |= PCRE_CASELESS; break;
349      case 'l': filenames_only = TRUE;
350      case 'n': number = TRUE; break;
351      case 'r': recurse = TRUE; break;
352      case 's': silent = TRUE; break;
353      case 'v': invert = TRUE; break;
354      case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;
355    
356      case 'V':
357      fprintf(stderr, "pcregrep version %s using ", VERSION);
358      fprintf(stderr, "PCRE version %s\n", pcre_version());
359      exit(0);
360      break;
361    
362      default:
363      fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
364      exit(usage(2));
365      }
366    
367    return options;
368    }
369    
370    
371    
372    
373    /*************************************************
374  *                Main program                    *  *                Main program                    *
375  *************************************************/  *************************************************/
376    
377  int  int
378  main(int argc, char **argv)  main(int argc, char **argv)
379  {  {
380  int i;  int i, j;
381  int rc = 1;  int rc = 1;
382  int options = 0;  int options = 0;
383  int errptr;  int errptr;
384  const char *error;  const char *error;
385  BOOL filenames = TRUE;  BOOL only_one_at_top;
386    
387  /* Process the options */  /* Process the options */
388    
389  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
390    {    {
   char *s;  
391    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
392    s = argv[i] + 1;  
393    while (*s != 0)    /* Long name options */
394    
395      if (argv[i][1] == '-')
396      {      {
397      switch (*s++)      option_item *op;
398    
399        if (strncmp(argv[i]+2, "file=", 5) == 0)
400          {
401          pattern_filename = argv[i] + 7;
402          continue;
403          }
404    
405        for (op = optionlist; op->one_char != 0; op++)
406        {        {
407        case 'c': count_only = TRUE; break;        if (strcmp(argv[i]+2, op->long_name) == 0)
408        case 'h': filenames = FALSE; break;          {
409        case 'i': options |= PCRE_CASELESS; break;          options = handle_option(op->one_char, options);
410        case 'l': filenames_only = TRUE;          break;
411        case 'n': number = TRUE; break;          }
412        case 's': silent = TRUE; break;        }
413        case 'v': invert = TRUE; break;      if (op->one_char == 0)
414        case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;        {
415          fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
416          exit(usage(2));
417          }
418        }
419    
420        case 'V':    /* One-char options */
       fprintf(stderr, "PCRE version %s\n", pcre_version());  
       break;  
421    
422        default:    else
423        fprintf(stderr, "pcregrep: unknown option %c\n", s[-1]);      {
424        return usage(2);      char *s = argv[i] + 1;
425        while (*s != 0)
426          {
427          if (*s == 'f')
428            {
429            pattern_filename = s + 1;
430            if (pattern_filename[0] == 0)
431              {
432              if (i >= argc - 1)
433                {
434                fprintf(stderr, "pcregrep: File name missing after -f\n");
435                exit(usage(2));
436                }
437              pattern_filename = argv[++i];
438              }
439            break;
440            }
441          else options = handle_option(*s++, options);
442        }        }
443      }      }
444    }    }
445    
446  /* There must be at least a regexp argument */  pattern_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
447    hints_list = malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
448    
449  if (i >= argc) return usage(0);  if (pattern_list == NULL || hints_list == NULL)
450      {
451      fprintf(stderr, "pcregrep: malloc failed\n");
452      return 2;
453      }
454    
455  /* Compile the regular expression. */  /* Compile the regular expression(s). */
456    
457  pattern = pcre_compile(argv[i++], options, &error, &errptr, NULL);  if (pattern_filename != NULL)
 if (pattern == NULL)  
458    {    {
459    fprintf(stderr, "pcregrep: error in regex at offset %d: %s\n", errptr, error);    FILE *f = fopen(pattern_filename, "r");
460    return 2;    char buffer[BUFSIZ];
461      if (f == NULL)
462        {
463        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
464          strerror(errno));
465        return 2;
466        }
467      while (fgets(buffer, sizeof(buffer), f) != NULL)
468        {
469        char *s = buffer + (int)strlen(buffer);
470        if (pattern_count >= MAX_PATTERN_COUNT)
471          {
472          fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
473            MAX_PATTERN_COUNT);
474          return 2;
475          }
476        while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
477        if (s == buffer) continue;
478        *s = 0;
479        pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
480          &errptr, NULL);
481        if (pattern_list[pattern_count++] == NULL)
482          {
483          fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
484            pattern_count, errptr, error);
485          return 2;
486          }
487        }
488      fclose(f);
489    }    }
490    
491  /* Study the regular expression, as we will be running it may times */  /* If no file name, a single regex must be given inline */
492    
493  hints = pcre_study(pattern, 0, &error);  else
 if (error != NULL)  
494    {    {
495    fprintf(stderr, "pcregrep: error while studing regex: %s\n", error);    if (i >= argc) return usage(0);
496    return 2;    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);
497      if (pattern_list[0] == NULL)
498        {
499        fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,
500          error);
501        return 2;
502        }
503      pattern_count++;
504      }
505    
506    /* Study the regular expressions, as we will be running them may times */
507    
508    for (j = 0; j < pattern_count; j++)
509      {
510      hints_list[j] = pcre_study(pattern_list[j], 0, &error);
511      if (error != NULL)
512        {
513        char s[16];
514        if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
515        fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
516        return 2;
517        }
518    }    }
519    
520  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit */
521    
522  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc) return pcregrep(stdin, NULL);
523    
524  /* Otherwise, work through the remaining arguments as files. If there is only  /* Otherwise, work through the remaining arguments as files or directories.
525  one, don't give its name on the output. */  Pass in the fact that there is only one argument at top level - this suppresses
526    the file name if the argument is not a directory. */
527    
528  if (i == argc - 1) filenames = FALSE;  only_one_at_top = (i == argc - 1);
529  if (filenames_only) filenames = TRUE;  if (filenames_only) filenames = TRUE;
530    
531  for (; i < argc; i++)  for (; i < argc; i++)
532    {    {
533    FILE *in = fopen(argv[i], "r");    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
534    if (in == NULL)    if (frc == 0 && rc == 1) rc = 0;
     {  
     fprintf(stderr, "%s: failed to open: %s\n", argv[i], strerror(errno));  
     rc = 2;  
     }  
   else  
     {  
     int frc = pcregrep(in, filenames? argv[i] : NULL);  
     if (frc == 0 && rc == 1) rc = 0;  
     fclose(in);  
     }  
535    }    }
536    
537  return rc;  return rc;

Legend:
Removed from v.49  
changed lines
  Added in v.53

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12