/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 76 by nigel, Sat Feb 24 21:40:37 2007 UTC revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2004 University of Cambridge             Copyright (c) 1997-2005 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  #include <string.h>  #include <string.h>
43  #include <stdlib.h>  #include <stdlib.h>
44  #include <errno.h>  #include <errno.h>
45    
46    #include <sys/types.h>
47    #include <sys/stat.h>
48    #include <unistd.h>
49    
50  #include "config.h"  #include "config.h"
51  #include "pcre.h"  #include "pcre.h"
52    
# Line 50  POSSIBILITY OF SUCH DAMAGE. Line 55  POSSIBILITY OF SUCH DAMAGE.
55    
56  typedef int BOOL;  typedef int BOOL;
57    
58  #define VERSION "3.0 14-Jan-2003"  #define VERSION "4.0 07-Jun-2005"
59  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
60    
61    #if BUFSIZ > 8192
62    #define MBUFTHIRD BUFSIZ
63    #else
64    #define MBUFTHIRD 8192
65    #endif
66    
67    
68    
69  /*************************************************  /*************************************************
70  *               Global variables                 *  *               Global variables                 *
71  *************************************************/  *************************************************/
72    
73  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
74    static char *stdin_name = (char *)"(standard input)";
75  static int  pattern_count = 0;  static int  pattern_count = 0;
76  static pcre **pattern_list;  static pcre **pattern_list;
77  static pcre_extra **hints_list;  static pcre_extra **hints_list;
78    
79    static char *include_pattern = NULL;
80    static char *exclude_pattern = NULL;
81    
82    static pcre *include_compiled = NULL;
83    static pcre *exclude_compiled = NULL;
84    
85    static int after_context = 0;
86    static int before_context = 0;
87    static int both_context = 0;
88    
89  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
90  static BOOL filenames = TRUE;  static BOOL filenames = TRUE;
91  static BOOL filenames_only = FALSE;  static BOOL filenames_only = FALSE;
92    static BOOL filenames_nomatch_only = FALSE;
93    static BOOL hyphenpending = FALSE;
94  static BOOL invert = FALSE;  static BOOL invert = FALSE;
95    static BOOL multiline = FALSE;
96  static BOOL number = FALSE;  static BOOL number = FALSE;
97    static BOOL quiet = FALSE;
98  static BOOL recurse = FALSE;  static BOOL recurse = FALSE;
99  static BOOL silent = FALSE;  static BOOL silent = FALSE;
100  static BOOL whole_lines = FALSE;  static BOOL whole_lines = FALSE;
101    static BOOL word_match = FALSE;
102    
103  /* Structure for options and list of them */  /* Structure for options and list of them */
104    
105    enum { OP_NODATA, OP_STRING, OP_NUMBER };
106    
107  typedef struct option_item {  typedef struct option_item {
108      int type;
109    int one_char;    int one_char;
110      void *dataptr;
111    const char *long_name;    const char *long_name;
112    const char *help_text;    const char *help_text;
113  } option_item;  } option_item;
114    
115  static option_item optionlist[] = {  static option_item optionlist[] = {
116    { -1,  "help",         "display this help and exit" },    { OP_NODATA, -1,  NULL,              "",              "  terminate options" },
117    { 'c', "count",        "print only a count of matching lines per FILE" },    { OP_NODATA, -1,  NULL,              "help",          "display this help and exit" },
118    { 'h', "no-filename",  "suppress the prefixing filename on output" },    { OP_NUMBER, 'A', &after_context,    "after-context=number", "set number of following context lines" },
119    { 'i', "ignore-case",  "ignore case distinctions" },    { OP_NUMBER, 'B', &before_context,   "before-context=number", "set number of prior context lines" },
120    { 'l', "files-with-matches", "print only FILE names containing matches" },    { OP_NUMBER, 'C', &both_context,     "context=number", "set number of context lines, before & after" },
121    { 'n', "line-number",  "print line number with output lines" },    { OP_NODATA, 'c', NULL,              "count",         "print only a count of matching lines per FILE" },
122    { 'r', "recursive",    "recursively scan sub-directories" },    { OP_STRING, 'f', &pattern_filename, "file=path",     "read patterns from file" },
123    { 's', "no-messages",  "suppress error messages" },    { OP_NODATA, 'h', NULL,              "no-filename",   "suppress the prefixing filename on output" },
124    { 'u', "utf-8",        "use UTF-8 mode" },    { OP_NODATA, 'i', NULL,              "ignore-case",   "ignore case distinctions" },
125    { 'V', "version",      "print version information and exit" },    { OP_NODATA, 'l', NULL,              "files-with-matches", "print only FILE names containing matches" },
126    { 'v', "invert-match", "select non-matching lines" },    { OP_NODATA, 'L', NULL,              "files-without-match","print only FILE names not containing matches" },
127    { 'x', "line-regex",   "force PATTERN to match only whole lines" },    { OP_STRING, -1,  &stdin_name,       "label=name",    "set name for standard input" },
128    { 'x', "line-regexp",  "force PATTERN to match only whole lines" },    { OP_NODATA, 'M', NULL,              "multiline",     "run in multiline mode" },
129    { 0,    NULL,           NULL }    { OP_NODATA, 'n', NULL,              "line-number",   "print line number with output lines" },
130      { OP_NODATA, 'q', NULL,              "quiet",         "suppress output, just set return code" },
131      { OP_NODATA, 'r', NULL,              "recursive",     "recursively scan sub-directories" },
132      { OP_STRING, -1,  &exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
133      { OP_STRING, -1,  &include_pattern,  "include=pattern","include matching files when recursing" },
134      { OP_NODATA, 's', NULL,              "no-messages",   "suppress error messages" },
135      { OP_NODATA, 'u', NULL,              "utf-8",         "use UTF-8 mode" },
136      { OP_NODATA, 'V', NULL,              "version",       "print version information and exit" },
137      { OP_NODATA, 'v', NULL,              "invert-match",  "select non-matching lines" },
138      { OP_NODATA, 'w', NULL,              "word-regex(p)", "force PATTERN to match only as a word"  },
139      { OP_NODATA, 'x', NULL,              "line-regex(p)", "force PATTERN to match only whole lines" },
140      { OP_NODATA, 0,   NULL,               NULL,            NULL }
141  };  };
142    
143    
# Line 154  closedir(dir); Line 197  closedir(dir);
197  /************* Directory scanning in Win32 ***********/  /************* Directory scanning in Win32 ***********/
198    
199  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
200  Lionel Fourquaux. */  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
201    when it did not exist. */
202    
203    
204  #elif HAVE_WIN32API  #elif HAVE_WIN32API
# Line 165  Lionel Fourquaux. */ Line 209  Lionel Fourquaux. */
209  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
210  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
211  #endif  #endif
212    #ifndef INVALID_FILE_ATTRIBUTES
213    #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
214    #endif
215    
216  #include <windows.h>  #include <windows.h>
217    
218  typedef struct directory_type  typedef struct directory_type
# Line 284  return sys_errlist[n]; Line 332  return sys_errlist[n];
332    
333    
334  /*************************************************  /*************************************************
335  *              Grep an individual file           *  *       Print the previous "after" lines         *
336    *************************************************/
337    
338    /* This is called if we are about to lose said lines because of buffer filling,
339    and at the end of the file.
340    
341    Arguments:
342      lastmatchnumber   the number of the last matching line, plus one
343      lastmatchrestart  where we restarted after the last match
344      endptr            end of available data
345      printname         filename for printing
346    
347    Returns:            nothing
348    */
349    
350    static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
351      char *endptr, char *printname)
352    {
353    if (after_context > 0 && lastmatchnumber > 0)
354      {
355      int count = 0;
356      while (lastmatchrestart < endptr && count++ < after_context)
357        {
358        char *pp = lastmatchrestart;
359        if (printname != NULL) fprintf(stdout, "%s-", printname);
360        if (number) fprintf(stdout, "%d-", lastmatchnumber++);
361        while (*pp != '\n') pp++;
362        fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
363        lastmatchrestart = pp + 1;
364        }
365      hyphenpending = TRUE;
366      }
367    }
368    
369    
370    
371    /*************************************************
372    *            Grep an individual file             *
373  *************************************************/  *************************************************/
374    
375    /* This is called from grep_or_recurse() below. It uses a buffer that is three
376    times the value of MBUFTHIRD. The matching point is never allowed to stray into
377    the top third of the buffer, thus keeping more of the file available for
378    context printing or for multiline scanning. For large files, the pointer will
379    be in the middle third most of the time, so the bottom third is available for
380    "before" context printing.
381    
382    Arguments:
383      in           the fopened FILE stream
384      printname    the file name if it is to be printed for each match
385                   or NULL if the file name is not to be printed
386                   it cannot be NULL if filenames[_nomatch]_only is set
387    
388    Returns:       0 if there was at least one match
389                   1 otherwise (no matches)
390    */
391    
392  static int  static int
393  pcregrep(FILE *in, char *name)  pcregrep(FILE *in, char *printname)
394  {  {
395  int rc = 1;  int rc = 1;
396  int linenumber = 0;  int linenumber = 1;
397    int lastmatchnumber = 0;
398  int count = 0;  int count = 0;
399  int offsets[99];  int offsets[99];
400  char buffer[BUFSIZ];  char *lastmatchrestart = NULL;
401    char buffer[3*MBUFTHIRD];
402    char *ptr = buffer;
403    char *endptr;
404    size_t bufflength;
405    BOOL endhyphenpending = FALSE;
406    
407    /* Do the first read into the start of the buffer and set up the pointer to
408    end of what we have. */
409    
410    bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
411    endptr = buffer + bufflength;
412    
413    /* Loop while the current pointer is not at the end of the file. For large
414    files, endptr will be at the end of the buffer when we are in the middle of the
415    file, but ptr will never get there, because as soon as it gets over 2/3 of the
416    way, the buffer is shifted left and re-filled. */
417    
418  while (fgets(buffer, sizeof(buffer), in) != NULL)  while (ptr < endptr)
419    {    {
   BOOL match = FALSE;  
420    int i;    int i;
421    int length = (int)strlen(buffer);    BOOL match = FALSE;
422    if (length > 0 && buffer[length-1] == '\n') buffer[--length] = 0;    char *t = ptr;
423    linenumber++;    size_t length, linelength;
424    
425      /* At this point, ptr is at the start of a line. We need to find the length
426      of the subject string to pass to pcre_exec(). In multiline mode, it is the
427      length remainder of the data in the buffer. Otherwise, it is the length of
428      the next line. After matching, we always advance by the length of the next
429      line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
430      that any match is constrained to be in the first line. */
431    
432      linelength = 0;
433      while (t < endptr && *t++ != '\n') linelength++;
434      length = multiline? endptr - ptr : linelength;
435    
436      /* Run through all the patterns until one matches. Note that we don't include
437      the final newline in the subject string. */
438    
439    for (i = 0; !match && i < pattern_count; i++)    for (i = 0; !match && i < pattern_count; i++)
440      {      {
441      match = pcre_exec(pattern_list[i], hints_list[i], buffer, length, 0, 0,      match = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
442        offsets, 99) >= 0;        offsets, 99) >= 0;
     if (match && whole_lines && offsets[1] != length) match = FALSE;  
443      }      }
444    
445      /* If it's a match or a not-match (as required), print what's wanted. */
446    
447    if (match != invert)    if (match != invert)
448      {      {
449        BOOL hyphenprinted = FALSE;
450    
451        if (filenames_nomatch_only) return 1;
452    
453      if (count_only) count++;      if (count_only) count++;
454    
455      else if (filenames_only)      else if (filenames_only)
456        {        {
457        fprintf(stdout, "%s\n", (name == NULL)? "<stdin>" : name);        fprintf(stdout, "%s\n", printname);
458        return 0;        return 0;
459        }        }
460    
461      else if (silent) return 0;      else if (quiet) return 0;
462    
463      else      else
464        {        {
465        if (name != NULL) fprintf(stdout, "%s:", name);        /* See if there is a requirement to print some "after" lines from a
466          previous match. We never print any overlaps. */
467    
468          if (after_context > 0 && lastmatchnumber > 0)
469            {
470            int linecount = 0;
471            char *p = lastmatchrestart;
472    
473            while (p < ptr && linecount < after_context)
474              {
475              while (*p != '\n') p++;
476              p++;
477              linecount++;
478              }
479    
480            /* It is important to advance lastmatchrestart during this printing so
481            that it interacts correctly with any "before" printing below. */
482    
483            while (lastmatchrestart < p)
484              {
485              char *pp = lastmatchrestart;
486              if (printname != NULL) fprintf(stdout, "%s-", printname);
487              if (number) fprintf(stdout, "%d-", lastmatchnumber++);
488              while (*pp != '\n') pp++;
489              fprintf(stdout, "%.*s", pp - lastmatchrestart + 1, lastmatchrestart);
490              lastmatchrestart = pp + 1;
491              }
492            if (lastmatchrestart != ptr) hyphenpending = TRUE;
493            }
494    
495          /* If there were non-contiguous lines printed above, insert hyphens. */
496    
497          if (hyphenpending)
498            {
499            fprintf(stdout, "--\n");
500            hyphenpending = FALSE;
501            hyphenprinted = TRUE;
502            }
503    
504          /* See if there is a requirement to print some "before" lines for this
505          match. Again, don't print overlaps. */
506    
507          if (before_context > 0)
508            {
509            int linecount = 0;
510            char *p = ptr;
511    
512            while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
513                   linecount++ < before_context)
514              {
515              p--;
516              while (p > buffer && p[-1] != '\n') p--;
517              }
518    
519            if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
520              fprintf(stdout, "--\n");
521    
522            while (p < ptr)
523              {
524              char *pp = p;
525              if (printname != NULL) fprintf(stdout, "%s-", printname);
526              if (number) fprintf(stdout, "%d-", linenumber - linecount--);
527              while (*pp != '\n') pp++;
528              fprintf(stdout, "%.*s", pp - p + 1, p);
529              p = pp + 1;
530              }
531            }
532    
533          /* Now print the matching line(s); ensure we set hyphenpending at the end
534          of the file. */
535    
536          endhyphenpending = TRUE;
537          if (printname != NULL) fprintf(stdout, "%s:", printname);
538        if (number) fprintf(stdout, "%d:", linenumber);        if (number) fprintf(stdout, "%d:", linenumber);
539        fprintf(stdout, "%s\n", buffer);  
540          /* In multiline mode, we want to print to the end of the line in which
541          the end of the matched string is found, so we adjust linelength and the
542          line number appropriately. Because the PCRE_FIRSTLINE option is set, the
543          start of the match will always be before the first \n character. */
544    
545          if (multiline)
546            {
547            char *endmatch = ptr + offsets[1];
548            t = ptr;
549            while (t < endmatch) { if (*t++ == '\n') linenumber++; }
550            while (endmatch < endptr && *endmatch != '\n') endmatch++;
551            linelength = endmatch - ptr;
552            }
553    
554          fprintf(stdout, "%.*s\n", linelength, ptr);
555        }        }
556    
557      rc = 0;      rc = 0;    /* Had some success */
558    
559        /* Remember where the last match happened for after_context. We remember
560        where we are about to restart, and that line's number. */
561    
562        lastmatchrestart = ptr + linelength + 1;
563        lastmatchnumber = linenumber + 1;
564      }      }
565    
566      /* Advance to after the newline and increment the line number. */
567    
568      ptr += linelength + 1;
569      linenumber++;
570    
571      /* If we haven't yet reached the end of the file (the buffer is full), and
572      the current point is in the top 1/3 of the buffer, slide the buffer down by
573      1/3 and refill it. Before we do this, if some unprinted "after" lines are
574      about to be lost, print them. */
575    
576      if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
577        {
578        if (after_context > 0 &&
579            lastmatchnumber > 0 &&
580            lastmatchrestart < buffer + MBUFTHIRD)
581          {
582          do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
583          lastmatchnumber = 0;
584          }
585    
586        /* Now do the shuffle */
587    
588        memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
589        ptr -= MBUFTHIRD;
590        bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
591        endptr = buffer + bufflength;
592    
593        /* Adjust any last match point */
594    
595        if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
596        }
597      }     /* Loop through the whole file */
598    
599    /* End of file; print final "after" lines if wanted; do_after_lines sets
600    hyphenpending if it prints something. */
601    
602    do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
603    hyphenpending |= endhyphenpending;
604    
605    /* Print the file name if we are looking for those without matches and there
606    were none. If we found a match, we won't have got this far. */
607    
608    if (filenames_nomatch_only)
609      {
610      fprintf(stdout, "%s\n", printname);
611      return 0;
612    }    }
613    
614    /* Print the match count if wanted */
615    
616  if (count_only)  if (count_only)
617    {    {
618    if (name != NULL) fprintf(stdout, "%s:", name);    if (printname != NULL) fprintf(stdout, "%s:", printname);
619    fprintf(stdout, "%d\n", count);    fprintf(stdout, "%d\n", count);
620    }    }
621    
# Line 345  return rc; Line 624  return rc;
624    
625    
626    
   
627  /*************************************************  /*************************************************
628  *     Grep a file or recurse into a directory    *  *     Grep a file or recurse into a directory    *
629  *************************************************/  *************************************************/
630    
631    /* Given a path name, if it's a directory, scan all the files if we are
632    recursing; if it's a file, grep it.
633    
634    Arguments:
635      pathname          the path to investigate
636      dir_recurse       TRUE if recursing is wanted (-r)
637      show_filenames    TRUE if file names are wanted for multiple files, except
638                          for the only file at top level when not filenames_only
639      only_one_at_top   TRUE if the path is the only one at toplevel
640    
641    Returns:   0 if there was at least one match
642               1 if there were no matches
643               2 there was some kind of error
644    
645    However, file opening failures are suppressed if "silent" is set.
646    */
647    
648  static int  static int
649  grep_or_recurse(char *filename, BOOL dir_recurse, BOOL show_filenames,  grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL show_filenames,
650    BOOL only_one_at_top)    BOOL only_one_at_top)
651  {  {
652  int rc = 1;  int rc = 1;
653  int sep;  int sep;
654  FILE *in;  FILE *in;
655    char *printname;
656    
657    /* If the file name is "-" we scan stdin */
658    
659    if (strcmp(pathname, "-") == 0)
660      {
661      return pcregrep(stdin,
662        (filenames_only || filenames_nomatch_only ||
663        (show_filenames && !only_one_at_top))?
664          stdin_name : NULL);
665      }
666    
667  /* If the file is a directory and we are recursing, scan each file within it.  /* If the file is a directory and we are recursing, scan each file within it,
668  The scanning code is localized so it can be made system-specific. */  subject to any include or exclude patterns that were set. The scanning code is
669    localized so it can be made system-specific. */
670    
671  if ((sep = isdirectory(filename)) != 0 && dir_recurse)  if ((sep = isdirectory(pathname)) != 0 && dir_recurse)
672    {    {
673    char buffer[1024];    char buffer[1024];
674    char *nextfile;    char *nextfile;
675    directory_type *dir = opendirectory(filename);    directory_type *dir = opendirectory(pathname);
676    
677    if (dir == NULL)    if (dir == NULL)
678      {      {
679      fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", filename,      if (!silent)
680        strerror(errno));        fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
681            strerror(errno));
682      return 2;      return 2;
683      }      }
684    
685    while ((nextfile = readdirectory(dir)) != NULL)    while ((nextfile = readdirectory(dir)) != NULL)
686      {      {
687      int frc;      int frc, blen;
688      sprintf(buffer, "%.512s%c%.128s", filename, sep, nextfile);      sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
689        blen = strlen(buffer);
690    
691        if (exclude_compiled != NULL &&
692            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
693          continue;
694    
695        if (include_compiled != NULL &&
696            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
697          continue;
698    
699      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);      frc = grep_or_recurse(buffer, dir_recurse, TRUE, FALSE);
700      if (frc == 0 && rc == 1) rc = 0;      if (frc > 1) rc = frc;
701         else if (frc == 0 && rc == 1) rc = 0;
702      }      }
703    
704    closedirectory(dir);    closedirectory(dir);
# Line 391  the first and only argument at top level Line 710  the first and only argument at top level
710  we are only showing the file name). Otherwise, control is via the  we are only showing the file name). Otherwise, control is via the
711  show_filenames variable. */  show_filenames variable. */
712    
713  in = fopen(filename, "r");  in = fopen(pathname, "r");
714  if (in == NULL)  if (in == NULL)
715    {    {
716    fprintf(stderr, "pcregrep: Failed to open %s: %s\n", filename, strerror(errno));    if (!silent)
717        fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
718          strerror(errno));
719    return 2;    return 2;
720    }    }
721    
722  rc = pcregrep(in, (filenames_only || (show_filenames && !only_one_at_top))?  printname =  (filenames_only || filenames_nomatch_only ||
723    filename : NULL);    (show_filenames && !only_one_at_top))? pathname : NULL;
724    
725    rc = pcregrep(in, printname);
726    
727  fclose(in);  fclose(in);
728  return rc;  return rc;
729  }  }
# Line 414  return rc; Line 738  return rc;
738  static int  static int
739  usage(int rc)  usage(int rc)
740  {  {
741  fprintf(stderr, "Usage: pcregrep [-Vcfhilnrsvx] [long-options] [pattern] [file1 file2 ...]\n");  fprintf(stderr, "Usage: pcregrep [-LMVcfhilnqrsvwx] [long-options] [pattern] [file1 file2 ...]\n");
742  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information.\n");
743  return rc;  return rc;
744  }  }
# Line 434  option_item *op; Line 758  option_item *op;
758  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
759  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
760  printf("PATTERN must be present if -f is not used.\n");  printf("PATTERN must be present if -f is not used.\n");
761    printf("\"-\" can be used as a file name to mean STDIN.\n");
762  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
763    
764  printf("Options:\n");  printf("Options:\n");
# Line 449  for (op = optionlist; op->one_char != 0; Line 774  for (op = optionlist; op->one_char != 0;
774    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
775    }    }
776    
777  printf("\n  -f<filename>  or  --file=<filename>\n");  printf("\nWhen reading patterns from a file instead of using a command line option,\n");
778  printf("    Read patterns from <filename> instead of using a command line option.\n");  printf("trailing white space is removed and blank lines are ignored.\n");
779  printf("    Trailing white space is removed; blanks lines are ignored.\n");  printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
 printf("    There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);  
780    
781  printf("\nWith no FILE, read standard input. If fewer than two FILEs given, assume -h.\n");  printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
782  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");  printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
783  }  }
784    
# Line 462  printf("Exit status is 0 if any matches, Line 786  printf("Exit status is 0 if any matches,
786    
787    
788  /*************************************************  /*************************************************
789  *                Handle an option                *  *    Handle a single-letter, no data option      *
790  *************************************************/  *************************************************/
791    
792  static int  static int
# Line 474  switch(letter) Line 798  switch(letter)
798    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
799    case 'h': filenames = FALSE; break;    case 'h': filenames = FALSE; break;
800    case 'i': options |= PCRE_CASELESS; break;    case 'i': options |= PCRE_CASELESS; break;
801    case 'l': filenames_only = TRUE;    case 'l': filenames_only = TRUE; break;
802      case 'L': filenames_nomatch_only = TRUE; break;
803      case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
804    case 'n': number = TRUE; break;    case 'n': number = TRUE; break;
805      case 'q': quiet = TRUE; break;
806    case 'r': recurse = TRUE; break;    case 'r': recurse = TRUE; break;
807    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
808    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; break;
809    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
810    case 'x': whole_lines = TRUE; options |= PCRE_ANCHORED; break;    case 'w': word_match = TRUE; break;
811      case 'x': whole_lines = TRUE; break;
812    
813    case 'V':    case 'V':
814    fprintf(stderr, "pcregrep version %s using ", VERSION);    fprintf(stderr, "pcregrep version %s using ", VERSION);
# Line 503  return options; Line 831  return options;
831  *                Main program                    *  *                Main program                    *
832  *************************************************/  *************************************************/
833    
834    /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
835    
836  int  int
837  main(int argc, char **argv)  main(int argc, char **argv)
838  {  {
# Line 517  BOOL only_one_at_top; Line 847  BOOL only_one_at_top;
847    
848  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
849    {    {
850      option_item *op = NULL;
851      char *option_data = (char *)"";    /* default to keep compiler happy */
852      BOOL longop;
853      BOOL longopwasequals = FALSE;
854    
855    if (argv[i][0] != '-') break;    if (argv[i][0] != '-') break;
856    
857    /* Missing options */    /* If we hit an argument that is just "-", it may be a reference to STDIN,
858      but only if we have previously had -f to define the patterns. */
859    
860    if (argv[i][1] == 0) exit(usage(2));    if (argv[i][1] == 0)
861        {
862        if (pattern_filename != NULL) break;
863          else exit(usage(2));
864        }
865    
866    /* Long name options */    /* Handle a long name option, or -- to terminate the options */
867    
868    if (argv[i][1] == '-')    if (argv[i][1] == '-')
869      {      {
870      option_item *op;      char *arg = argv[i] + 2;
871        char *argequals = strchr(arg, '=');
872    
873      if (strncmp(argv[i]+2, "file=", 5) == 0)      if (*arg == 0)    /* -- terminates options */
874        {        {
875        pattern_filename = argv[i] + 7;        i++;
876        continue;        break;                /* out of the options-handling loop */
877        }        }
878    
879        longop = TRUE;
880    
881        /* Some long options have data that follows after =, for example file=name.
882        Some options have variations in the long name spelling: specifically, we
883        allow "regexp" because GNU grep allows it, though I personally go along
884        with Jeff Friedl in preferring "regex" without the "p". These options are
885        entered in the table as "regex(p)". No option is in both these categories,
886        fortunately. */
887    
888      for (op = optionlist; op->one_char != 0; op++)      for (op = optionlist; op->one_char != 0; op++)
889        {        {
890        if (strcmp(argv[i]+2, op->long_name) == 0)        char *opbra = strchr(op->long_name, '(');
891          char *equals = strchr(op->long_name, '=');
892          if (opbra == NULL)     /* Not a (p) case */
893          {          {
894          options = handle_option(op->one_char, options);          if (equals == NULL)  /* Not thing=data case */
895          break;            {
896              if (strcmp(arg, op->long_name) == 0) break;
897              }
898            else                 /* Special case xxx=data */
899              {
900              int oplen = equals - op->long_name;
901              int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;
902              if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
903                {
904                option_data = arg + arglen;
905                if (*option_data == '=')
906                  {
907                  option_data++;
908                  longopwasequals = TRUE;
909                  }
910                break;
911                }
912              }
913            }
914          else                   /* Special case xxxx(p) */
915            {
916            char buff1[24];
917            char buff2[24];
918            int baselen = opbra - op->long_name;
919            sprintf(buff1, "%.*s", baselen, op->long_name);
920            sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,
921              opbra + 1);
922            if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
923              break;
924          }          }
925        }        }
926    
927      if (op->one_char == 0)      if (op->one_char == 0)
928        {        {
929        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);        fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
# Line 550  for (i = 1; i < argc; i++) Line 931  for (i = 1; i < argc; i++)
931        }        }
932      }      }
933    
934    /* One-char options */    /* One-char options; many that have no data may be in a single argument; we
935      continue till we hit the last one or one that needs data. */
936    
937    else    else
938      {      {
939      char *s = argv[i] + 1;      char *s = argv[i] + 1;
940        longop = FALSE;
941      while (*s != 0)      while (*s != 0)
942        {        {
943        if (*s == 'f')        for (op = optionlist; op->one_char != 0; op++)
944            { if (*s == op->one_char) break; }
945          if (op->one_char == 0)
946          {          {
947          pattern_filename = s + 1;          fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
948          if (pattern_filename[0] == 0)            *s, argv[i]);
949            {          exit(usage(2));
950            if (i >= argc - 1)          }
951              {        if (op->type != OP_NODATA || s[1] == 0)
952              fprintf(stderr, "pcregrep: File name missing after -f\n");          {
953              exit(usage(2));          option_data = s+1;
             }  
           pattern_filename = argv[++i];  
           }  
954          break;          break;
955          }          }
956        else options = handle_option(*s++, options);        options = handle_option(*s++, options);
957          }
958        }
959    
960      /* At this point we should have op pointing to a matched option */
961    
962      if (op->type == OP_NODATA)
963        options = handle_option(op->one_char, options);
964      else
965        {
966        if (*option_data == 0)
967          {
968          if (i >= argc - 1 || longopwasequals)
969            {
970            fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
971            exit(usage(2));
972            }
973          option_data = argv[++i];
974          }
975    
976        if (op->type == OP_STRING) *((char **)op->dataptr) = option_data; else
977          {
978          char *endptr;
979          int n = strtoul(option_data, &endptr, 10);
980          if (*endptr != 0)
981            {
982            if (longop)
983              fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%s\n",
984                option_data, op->long_name);
985            else
986              fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
987                option_data, op->one_char);
988            exit(usage(2));
989            }
990          *((int *)op->dataptr) = n;
991        }        }
992      }      }
993    }    }
994    
995    /* Options have been decoded. If -C was used, its value is used as a default
996    for -A and -B. */
997    
998    if (both_context > 0)
999      {
1000      if (after_context == 0) after_context = both_context;
1001      if (before_context == 0) before_context = both_context;
1002      }
1003    
1004  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));  pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1005  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));  hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1006    
# Line 590  if (pattern_list == NULL || hints_list = Line 1015  if (pattern_list == NULL || hints_list =
1015  if (pattern_filename != NULL)  if (pattern_filename != NULL)
1016    {    {
1017    FILE *f = fopen(pattern_filename, "r");    FILE *f = fopen(pattern_filename, "r");
1018    char buffer[BUFSIZ];    char buffer[MBUFTHIRD + 16];
1019      char *rdstart;
1020      int adjust = 0;
1021    
1022    if (f == NULL)    if (f == NULL)
1023      {      {
1024      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
1025        strerror(errno));        strerror(errno));
1026      return 2;      return 2;
1027      }      }
1028    while (fgets(buffer, sizeof(buffer), f) != NULL)  
1029      if (whole_lines)
1030        {
1031        strcpy(buffer, "^(?:");
1032        adjust = 4;
1033        }
1034      else if (word_match)
1035        {
1036        strcpy(buffer, "\\b");
1037        adjust = 2;
1038        }
1039    
1040      rdstart = buffer + adjust;
1041      while (fgets(rdstart, MBUFTHIRD, f) != NULL)
1042      {      {
1043      char *s = buffer + (int)strlen(buffer);      char *s = rdstart + (int)strlen(rdstart);
1044      if (pattern_count >= MAX_PATTERN_COUNT)      if (pattern_count >= MAX_PATTERN_COUNT)
1045        {        {
1046        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",        fprintf(stderr, "pcregrep: Too many patterns in file (max %d)\n",
1047          MAX_PATTERN_COUNT);          MAX_PATTERN_COUNT);
1048        return 2;        return 2;
1049        }        }
1050      while (s > buffer && isspace((unsigned char)(s[-1]))) s--;      while (s > rdstart && isspace((unsigned char)(s[-1]))) s--;
1051      if (s == buffer) continue;      if (s == rdstart) continue;
1052      *s = 0;      if (whole_lines) strcpy(s, ")$");
1053          else if (word_match)strcpy(s, "\\b");
1054            else *s = 0;
1055      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,      pattern_list[pattern_count] = pcre_compile(buffer, options, &error,
1056        &errptr, NULL);        &errptr, NULL);
1057      if (pattern_list[pattern_count++] == NULL)      if (pattern_list[pattern_count++] == NULL)
1058        {        {
1059        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",        fprintf(stderr, "pcregrep: Error in regex number %d at offset %d: %s\n",
1060          pattern_count, errptr, error);          pattern_count, errptr - adjust, error);
1061        return 2;        return 2;
1062        }        }
1063      }      }
1064    fclose(f);    fclose(f);
1065    }    }
1066    
1067  /* If no file name, a single regex must be given inline */  /* If no file name, a single regex must be given inline. */
1068    
1069  else  else
1070    {    {
1071      char buffer[MBUFTHIRD + 16];
1072      char *pat;
1073      int adjust = 0;
1074    
1075    if (i >= argc) return usage(2);    if (i >= argc) return usage(2);
1076    pattern_list[0] = pcre_compile(argv[i++], options, &error, &errptr, NULL);  
1077      if (whole_lines)
1078        {
1079        sprintf(buffer, "^(?:%.*s)$", MBUFTHIRD, argv[i++]);
1080        pat = buffer;
1081        adjust = 4;
1082        }
1083      else if (word_match)
1084        {
1085        sprintf(buffer, "\\b%.*s\\b", MBUFTHIRD, argv[i++]);
1086        pat = buffer;
1087        adjust = 2;
1088        }
1089      else pat = argv[i++];
1090    
1091      pattern_list[0] = pcre_compile(pat, options, &error, &errptr, NULL);
1092    
1093    if (pattern_list[0] == NULL)    if (pattern_list[0] == NULL)
1094      {      {
1095      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n", errptr,      fprintf(stderr, "pcregrep: Error in regex at offset %d: %s\n",
1096        error);        errptr - adjust, error);
1097      return 2;      return 2;
1098      }      }
1099    pattern_count++;    pattern_count++;
1100    }    }
1101    
1102  /* Study the regular expressions, as we will be running them may times */  /* Study the regular expressions, as we will be running them many times */
1103    
1104  for (j = 0; j < pattern_count; j++)  for (j = 0; j < pattern_count; j++)
1105    {    {
# Line 650  for (j = 0; j < pattern_count; j++) Line 1113  for (j = 0; j < pattern_count; j++)
1113      }      }
1114    }    }
1115    
1116    /* If there are include or exclude patterns, compile them. */
1117    
1118    if (exclude_pattern != NULL)
1119      {
1120      exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr, NULL);
1121      if (exclude_compiled == NULL)
1122        {
1123        fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
1124          errptr, error);
1125        return 2;
1126        }
1127      }
1128    
1129    if (include_pattern != NULL)
1130      {
1131      include_compiled = pcre_compile(include_pattern, 0, &error, &errptr, NULL);
1132      if (include_compiled == NULL)
1133        {
1134        fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
1135          errptr, error);
1136        return 2;
1137        }
1138      }
1139    
1140  /* If there are no further arguments, do the business on stdin and exit */  /* If there are no further arguments, do the business on stdin and exit */
1141    
1142  if (i >= argc) return pcregrep(stdin, NULL);  if (i >= argc) return pcregrep(stdin,
1143      (filenames_only || filenames_nomatch_only)? stdin_name : NULL);
1144    
1145  /* Otherwise, work through the remaining arguments as files or directories.  /* Otherwise, work through the remaining arguments as files or directories.
1146  Pass in the fact that there is only one argument at top level - this suppresses  Pass in the fact that there is only one argument at top level - this suppresses
1147  the file name if the argument is not a directory. */  the file name if the argument is not a directory and filenames_only is not set.
1148    */
1149    
1150  only_one_at_top = (i == argc - 1);  only_one_at_top = (i == argc - 1);
 if (filenames_only) filenames = TRUE;  
1151    
1152  for (; i < argc; i++)  for (; i < argc; i++)
1153    {    {
1154    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);    int frc = grep_or_recurse(argv[i], recurse, filenames, only_one_at_top);
1155    if (frc == 0 && rc == 1) rc = 0;    if (frc > 1) rc = frc;
1156        else if (frc == 0 && rc == 1) rc = 0;
1157    }    }
1158    
1159  return rc;  return rc;
1160  }  }
1161    
1162  /* End */  /* End of pcregrep */

Legend:
Removed from v.76  
changed lines
  Added in v.77

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12