/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 141 by ph10, Fri Mar 30 15:46:27 2007 UTC revision 325 by ph10, Sat Mar 8 17:13:02 2008 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  */  */
39    
40  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
41  #  include <config.h>  #include "config.h"
42  #endif  #endif
43    
44  #include <ctype.h>  #include <ctype.h>
# Line 50  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54  #ifdef HAVE_UNISTD_H  #ifdef HAVE_UNISTD_H
55  #  include <unistd.h>  #include <unistd.h>
56    #endif
57    
58    #ifdef SUPPORT_LIBZ
59    #include <zlib.h>
60  #endif  #endif
61    
62  #include <pcre.h>  #ifdef SUPPORT_LIBBZ2
63    #include <bzlib.h>
64    #endif
65    
66    #include "pcre.h"
67    
68  #define FALSE 0  #define FALSE 0
69  #define TRUE 1  #define TRUE 1
# Line 75  all values greater than FN_DEFAULT. */ Line 84  all values greater than FN_DEFAULT. */
84    
85  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };  enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
86    
87    /* File reading styles */
88    
89    enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
90    
91  /* Actions for the -d and -D options */  /* Actions for the -d and -D options */
92    
93  enum { dee_READ, dee_SKIP, dee_RECURSE };  enum { dee_READ, dee_SKIP, dee_RECURSE };
# Line 88  enum { DEE_READ, DEE_SKIP }; Line 101  enum { DEE_READ, DEE_SKIP };
101    
102  /* Line ending types */  /* Line ending types */
103    
104  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
105    
106    
107    
# Line 126  static pcre_extra **hints_list = NULL; Line 139  static pcre_extra **hints_list = NULL;
139    
140  static char *include_pattern = NULL;  static char *include_pattern = NULL;
141  static char *exclude_pattern = NULL;  static char *exclude_pattern = NULL;
142    static char *include_dir_pattern = NULL;
143    static char *exclude_dir_pattern = NULL;
144    
145  static pcre *include_compiled = NULL;  static pcre *include_compiled = NULL;
146  static pcre *exclude_compiled = NULL;  static pcre *exclude_compiled = NULL;
147    static pcre *include_dir_compiled = NULL;
148    static pcre *exclude_dir_compiled = NULL;
149    
150  static int after_context = 0;  static int after_context = 0;
151  static int before_context = 0;  static int before_context = 0;
# Line 141  static int process_options = 0; Line 158  static int process_options = 0;
158    
159  static BOOL count_only = FALSE;  static BOOL count_only = FALSE;
160  static BOOL do_colour = FALSE;  static BOOL do_colour = FALSE;
161    static BOOL file_offsets = FALSE;
162  static BOOL hyphenpending = FALSE;  static BOOL hyphenpending = FALSE;
163  static BOOL invert = FALSE;  static BOOL invert = FALSE;
164    static BOOL line_offsets = FALSE;
165  static BOOL multiline = FALSE;  static BOOL multiline = FALSE;
166  static BOOL number = FALSE;  static BOOL number = FALSE;
167  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
# Line 166  typedef struct option_item { Line 185  typedef struct option_item {
185  /* Options without a single-letter equivalent get a negative value. This can be  /* Options without a single-letter equivalent get a negative value. This can be
186  used to identify them. */  used to identify them. */
187    
188  #define N_COLOUR    (-1)  #define N_COLOUR       (-1)
189  #define N_EXCLUDE   (-2)  #define N_EXCLUDE      (-2)
190  #define N_HELP      (-3)  #define N_EXCLUDE_DIR  (-3)
191  #define N_INCLUDE   (-4)  #define N_HELP         (-4)
192  #define N_LABEL     (-5)  #define N_INCLUDE      (-5)
193  #define N_LOCALE    (-6)  #define N_INCLUDE_DIR  (-6)
194  #define N_NULL      (-7)  #define N_LABEL        (-7)
195    #define N_LOCALE       (-8)
196    #define N_NULL         (-9)
197    #define N_LOFFSETS     (-10)
198    #define N_FOFFSETS     (-11)
199    
200  static option_item optionlist[] = {  static option_item optionlist[] = {
201    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },    { OP_NODATA,    N_NULL,   NULL,              "",              "  terminate options" },
# Line 188  static option_item optionlist[] = { Line 211  static option_item optionlist[] = {
211    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },    { OP_PATLIST,   'e',      NULL,              "regex(p)",      "specify pattern (may be used more than once)" },
212    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },    { OP_NODATA,    'F',      NULL,              "fixed_strings", "patterns are sets of newline-separated strings" },
213    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },    { OP_STRING,    'f',      &pattern_filename, "file=path",     "read patterns from file" },
214      { OP_NODATA,    N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
215    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },    { OP_NODATA,    'H',      NULL,              "with-filename", "force the prefixing filename on output" },
216    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },    { OP_NODATA,    'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
217    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },    { OP_NODATA,    'i',      NULL,              "ignore-case",   "ignore case distinctions" },
218    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },    { OP_NODATA,    'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
219    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },    { OP_NODATA,    'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
220    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
221      { OP_NODATA,    N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
222    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
223    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
224    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },    { OP_STRING,    'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
225    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
226    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
227    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
228    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },    { OP_NODATA,    'r',      NULL,              "recursive",     "recursively scan sub-directories" },
229    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },    { OP_STRING,    N_EXCLUDE,&exclude_pattern,  "exclude=pattern","exclude matching files when recursing" },
230    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },    { OP_STRING,    N_INCLUDE,&include_pattern,  "include=pattern","include matching files when recursing" },
231      { OP_STRING,    N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
232      { OP_STRING,    N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
233  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
234    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },    { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
235  #endif  #endif
# Line 226  static const char *prefix[] = { Line 253  static const char *prefix[] = {
253  static const char *suffix[] = {  static const char *suffix[] = {
254    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
255    
256  /* UTF-8 tables - used only when the newline setting is "all". */  /* UTF-8 tables - used only when the newline setting is "any". */
257    
258  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
259    
# Line 280  for (;;) Line 307  for (;;)
307    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
308      return dent->d_name;      return dent->d_name;
309    }    }
310  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
311  }  }
312    
313  static void  static void
# Line 315  return isatty(fileno(stdout)); Line 342  return isatty(fileno(stdout));
342    
343  /* I (Philip Hazel) have no means of testing this code. It was contributed by  /* I (Philip Hazel) have no means of testing this code. It was contributed by
344  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES  Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
345  when it did not exist. */  when it did not exist. David Byron added a patch that moved the #include of
346    <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
347    */
348    
349  #elif HAVE_WINDOWS_H  #elif HAVE_WINDOWS_H
350    
# Line 326  when it did not exist. */ Line 354  when it did not exist. */
354  #ifndef WIN32_LEAN_AND_MEAN  #ifndef WIN32_LEAN_AND_MEAN
355  # define WIN32_LEAN_AND_MEAN  # define WIN32_LEAN_AND_MEAN
356  #endif  #endif
357    
358    #include <windows.h>
359    
360  #ifndef INVALID_FILE_ATTRIBUTES  #ifndef INVALID_FILE_ATTRIBUTES
361  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF  #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
362  #endif  #endif
363    
 #include <windows.h>  
   
364  typedef struct directory_type  typedef struct directory_type
365  {  {
366  HANDLE handle;  HANDLE handle;
# Line 416  regular if they are not directories. */ Line 445  regular if they are not directories. */
445    
446  int isregfile(char *filename)  int isregfile(char *filename)
447  {  {
448  return !isdirectory(filename)  return !isdirectory(filename);
449  }  }
450    
451    
# Line 427  return !isdirectory(filename) Line 456  return !isdirectory(filename)
456  static BOOL  static BOOL
457  is_stdout_tty(void)  is_stdout_tty(void)
458  {  {
459  FALSE;  return FALSE;
460  }  }
461    
462    
# Line 545  switch(endlinetype) Line 574  switch(endlinetype)
574      }      }
575    break;    break;
576    
577      case EL_ANYCRLF:
578      while (p < endptr)
579        {
580        int extra = 0;
581        register int c = *((unsigned char *)p);
582    
583        if (utf8 && c >= 0xc0)
584          {
585          int gcii, gcss;
586          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
587          gcss = 6*extra;
588          c = (c & utf8_table3[extra]) << gcss;
589          for (gcii = 1; gcii <= extra; gcii++)
590            {
591            gcss -= 6;
592            c |= (p[gcii] & 0x3f) << gcss;
593            }
594          }
595    
596        p += 1 + extra;
597    
598        switch (c)
599          {
600          case 0x0a:    /* LF */
601          *lenptr = 1;
602          return p;
603    
604          case 0x0d:    /* CR */
605          if (p < endptr && *p == 0x0a)
606            {
607            *lenptr = 2;
608            p++;
609            }
610          else *lenptr = 1;
611          return p;
612    
613          default:
614          break;
615          }
616        }   /* End of loop for ANYCRLF case */
617    
618      *lenptr = 0;  /* Must have hit the end */
619      return endptr;
620    
621    case EL_ANY:    case EL_ANY:
622    while (p < endptr)    while (p < endptr)
623      {      {
# Line 643  switch(endlinetype) Line 716  switch(endlinetype)
716    return p;   /* But control should never get here */    return p;   /* But control should never get here */
717    
718    case EL_ANY:    case EL_ANY:
719      case EL_ANYCRLF:
720    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
721    if (utf8) while ((*p & 0xc0) == 0x80) p--;    if (utf8) while ((*p & 0xc0) == 0x80) p--;
722    
# Line 671  switch(endlinetype) Line 745  switch(endlinetype)
745        }        }
746      else c = *((unsigned char *)pp);      else c = *((unsigned char *)pp);
747    
748      switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
749          {
750          case 0x0a:    /* LF */
751          case 0x0d:    /* CR */
752          return p;
753    
754          default:
755          break;
756          }
757    
758        else switch (c)
759        {        {
760        case 0x0a:    /* LF */        case 0x0a:    /* LF */
761        case 0x0b:    /* VT */        case 0x0b:    /* VT */
# Line 748  be in the middle third most of the time, Line 832  be in the middle third most of the time,
832  "before" context printing.  "before" context printing.
833    
834  Arguments:  Arguments:
835    in           the fopened FILE stream    handle       the fopened FILE stream for a normal file
836                   the gzFile pointer when reading is via libz
837                   the BZFILE pointer when reading is via libbz2
838      frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
839    printname    the file name if it is to be printed for each match    printname    the file name if it is to be printed for each match
840                 or NULL if the file name is not to be printed                 or NULL if the file name is not to be printed
841                 it cannot be NULL if filenames[_nomatch]_only is set                 it cannot be NULL if filenames[_nomatch]_only is set
842    
843  Returns:       0 if there was at least one match  Returns:       0 if there was at least one match
844                 1 otherwise (no matches)                 1 otherwise (no matches)
845                   2 if there is a read error on a .bz2 file
846  */  */
847    
848  static int  static int
849  pcregrep(FILE *in, char *printname)  pcregrep(void *handle, int frtype, char *printname)
850  {  {
851  int rc = 1;  int rc = 1;
852  int linenumber = 1;  int linenumber = 1;
853  int lastmatchnumber = 0;  int lastmatchnumber = 0;
854  int count = 0;  int count = 0;
855    int filepos = 0;
856  int offsets[99];  int offsets[99];
857  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
858  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
# Line 771  char *ptr = buffer; Line 860  char *ptr = buffer;
860  char *endptr;  char *endptr;
861  size_t bufflength;  size_t bufflength;
862  BOOL endhyphenpending = FALSE;  BOOL endhyphenpending = FALSE;
863    FILE *in = NULL;                    /* Ensure initialized */
864    
865    #ifdef SUPPORT_LIBZ
866    gzFile ingz = NULL;
867    #endif
868    
869    #ifdef SUPPORT_LIBBZ2
870    BZFILE *inbz2 = NULL;
871    #endif
872    
873    
874  /* Do the first read into the start of the buffer and set up the pointer to  /* Do the first read into the start of the buffer and set up the pointer to end
875  end of what we have. */  of what we have. In the case of libz, a non-zipped .gz file will be read as a
876    plain file. However, if a .bz2 file isn't actually bzipped, the first read will
877    fail. */
878    
879    #ifdef SUPPORT_LIBZ
880    if (frtype == FR_LIBZ)
881      {
882      ingz = (gzFile)handle;
883      bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
884      }
885    else
886    #endif
887    
888    #ifdef SUPPORT_LIBBZ2
889    if (frtype == FR_LIBBZ2)
890      {
891      inbz2 = (BZFILE *)handle;
892      bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
893      if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
894      }                                    /* without the cast it is unsigned. */
895    else
896    #endif
897    
898      {
899      in = (FILE *)handle;
900      bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
901      }
902    
 bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);  
903  endptr = buffer + bufflength;  endptr = buffer + bufflength;
904    
905  /* Loop while the current pointer is not at the end of the file. For large  /* Loop while the current pointer is not at the end of the file. For large
# Line 788  while (ptr < endptr) Line 912  while (ptr < endptr)
912    int i, endlinelength;    int i, endlinelength;
913    int mrc = 0;    int mrc = 0;
914    BOOL match = FALSE;    BOOL match = FALSE;
915      char *matchptr = ptr;
916    char *t = ptr;    char *t = ptr;
917    size_t length, linelength;    size_t length, linelength;
918    
# Line 800  while (ptr < endptr) Line 925  while (ptr < endptr)
925    
926    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
927    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
928    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
929    
930    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
931    
# Line 850  while (ptr < endptr) Line 975  while (ptr < endptr)
975    }    }
976  #endif  #endif
977    
978      /* We come back here after a match when the -o option (only_matching) is set,
979      in order to find any further matches in the same line. */
980    
981      ONLY_MATCHING_RESTART:
982    
983    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches. Note that we don't include
984    the final newline in the subject string. */    the final newline in the subject string. */
985    
986    for (i = 0; i < pattern_count; i++)    for (i = 0; i < pattern_count; i++)
987      {      {
988      mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,      mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,
989        offsets, 99);        offsets, 99);
990      if (mrc >= 0) { match = TRUE; break; }      if (mrc >= 0) { match = TRUE; break; }
991      if (mrc != PCRE_ERROR_NOMATCH)      if (mrc != PCRE_ERROR_NOMATCH)
# Line 864  while (ptr < endptr) Line 993  while (ptr < endptr)
993        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);        fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
994        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);        if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
995        fprintf(stderr, "this line:\n");        fprintf(stderr, "this line:\n");
996        fwrite(ptr, 1, linelength, stderr);   /* In case binary zero included */        fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */
997        fprintf(stderr, "\n");        fprintf(stderr, "\n");
998        if (error_count == 0 &&        if (error_count == 0 &&
999            (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))            (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
# Line 911  while (ptr < endptr) Line 1040  while (ptr < endptr)
1040      else if (quiet) return 0;      else if (quiet) return 0;
1041    
1042      /* The --only-matching option prints just the substring that matched, and      /* The --only-matching option prints just the substring that matched, and
1043      does not pring any context. */      the --file-offsets and --line-offsets options output offsets for the
1044        matching substring (they both force --only-matching). None of these options
1045        prints any context. Afterwards, adjust the start and length, and then jump
1046        back to look for further matches in the same line. If we are in invert
1047        mode, however, nothing is printed - this could be still useful because the
1048        return code is set. */
1049    
1050      else if (only_matching)      else if (only_matching)
1051        {        {
1052        if (printname != NULL) fprintf(stdout, "%s:", printname);        if (!invert)
1053        if (number) fprintf(stdout, "%d:", linenumber);          {
1054        fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1055        fprintf(stdout, "\n");          if (number) fprintf(stdout, "%d:", linenumber);
1056            if (line_offsets)
1057              fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,
1058                offsets[1] - offsets[0]);
1059            else if (file_offsets)
1060              fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,
1061                offsets[1] - offsets[0]);
1062            else
1063              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1064            fprintf(stdout, "\n");
1065            matchptr += offsets[1];
1066            length -= offsets[1];
1067            match = FALSE;
1068            goto ONLY_MATCHING_RESTART;
1069            }
1070        }        }
1071    
1072      /* This is the default case when none of the above options is set. We print      /* This is the default case when none of the above options is set. We print
# Line 1008  while (ptr < endptr) Line 1156  while (ptr < endptr)
1156    
1157        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1158        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1159        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1160        start of the match will always be before the first newline sequence. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1161          the match will always be before the first newline sequence. */
1162    
1163        if (multiline)        if (multiline)
1164          {          {
1165          int ellength;          int ellength;
1166          char *endmatch = ptr + offsets[1];          char *endmatch = ptr;
1167          t = ptr;          if (!invert)
         while (t < endmatch)  
1168            {            {
1169            t = end_of_line(t, endptr, &ellength);            endmatch += offsets[1];
1170            if (t <= endmatch) linenumber++; else break;            t = ptr;
1171              while (t < endmatch)
1172                {
1173                t = end_of_line(t, endptr, &ellength);
1174                if (t <= endmatch) linenumber++; else break;
1175                }
1176            }            }
1177          endmatch = end_of_line(endmatch, endptr, &ellength);          endmatch = end_of_line(endmatch, endptr, &ellength);
1178          linelength = endmatch - ptr - ellength;          linelength = endmatch - ptr - ellength;
# Line 1052  while (ptr < endptr) Line 1205  while (ptr < endptr)
1205          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1206          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1207          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1208          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1209              stdout);
1210          }          }
1211        else fwrite(ptr, 1, linelength + endlinelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
1212        }        }
# Line 1068  while (ptr < endptr) Line 1222  while (ptr < endptr)
1222      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1223      }      }
1224    
1225    /* Advance to after the newline and increment the line number. */    /* For a match in multiline inverted mode (which of course did not cause
1226      anything to be printed), we have to move on to the end of the match before
1227      proceeding. */
1228    
1229      if (multiline && invert && match)
1230        {
1231        int ellength;
1232        char *endmatch = ptr + offsets[1];
1233        t = ptr;
1234        while (t < endmatch)
1235          {
1236          t = end_of_line(t, endptr, &ellength);
1237          if (t <= endmatch) linenumber++; else break;
1238          }
1239        endmatch = end_of_line(endmatch, endptr, &ellength);
1240        linelength = endmatch - ptr - ellength;
1241        }
1242    
1243      /* Advance to after the newline and increment the line number. The file
1244      offset to the current line is maintained in filepos. */
1245    
1246    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
1247      filepos += linelength + endlinelength;
1248    linenumber++;    linenumber++;
1249    
1250    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 1092  while (ptr < endptr) Line 1266  while (ptr < endptr)
1266    
1267      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);      memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1268      ptr -= MBUFTHIRD;      ptr -= MBUFTHIRD;
1269    
1270    #ifdef SUPPORT_LIBZ
1271        if (frtype == FR_LIBZ)
1272          bufflength = 2*MBUFTHIRD +
1273            gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1274        else
1275    #endif
1276    
1277    #ifdef SUPPORT_LIBBZ2
1278        if (frtype == FR_LIBBZ2)
1279          bufflength = 2*MBUFTHIRD +
1280            BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1281        else
1282    #endif
1283    
1284      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);      bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1285    
1286      endptr = buffer + bufflength;      endptr = buffer + bufflength;
1287    
1288      /* Adjust any last match point */      /* Adjust any last match point */
# Line 1156  grep_or_recurse(char *pathname, BOOL dir Line 1346  grep_or_recurse(char *pathname, BOOL dir
1346  {  {
1347  int rc = 1;  int rc = 1;
1348  int sep;  int sep;
1349  FILE *in;  int frtype;
1350    int pathlen;
1351    void *handle;
1352    FILE *in = NULL;           /* Ensure initialized */
1353    
1354    #ifdef SUPPORT_LIBZ
1355    gzFile ingz = NULL;
1356    #endif
1357    
1358    #ifdef SUPPORT_LIBBZ2
1359    BZFILE *inbz2 = NULL;
1360    #endif
1361    
1362  /* If the file name is "-" we scan stdin */  /* If the file name is "-" we scan stdin */
1363    
1364  if (strcmp(pathname, "-") == 0)  if (strcmp(pathname, "-") == 0)
1365    {    {
1366    return pcregrep(stdin,    return pcregrep(stdin, FR_PLAIN,
1367      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?      (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1368        stdin_name : NULL);        stdin_name : NULL);
1369    }    }
1370    
   
1371  /* If the file is a directory, skip if skipping or if we are recursing, scan  /* If the file is a directory, skip if skipping or if we are recursing, scan
1372  each file within it, subject to any include or exclude patterns that were set.  each file and directory within it, subject to any include or exclude patterns
1373  The scanning code is localized so it can be made system-specific. */  that were set. The scanning code is localized so it can be made
1374    system-specific. */
1375    
1376  if ((sep = isdirectory(pathname)) != 0)  if ((sep = isdirectory(pathname)) != 0)
1377    {    {
# Line 1191  if ((sep = isdirectory(pathname)) != 0) Line 1392  if ((sep = isdirectory(pathname)) != 0)
1392    
1393      while ((nextfile = readdirectory(dir)) != NULL)      while ((nextfile = readdirectory(dir)) != NULL)
1394        {        {
1395        int frc, blen;        int frc, nflen;
1396        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);        sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1397        blen = strlen(buffer);        nflen = strlen(nextfile);
1398    
1399        if (exclude_compiled != NULL &&        if (isdirectory(buffer))
1400            pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)          {
1401          continue;          if (exclude_dir_compiled != NULL &&
1402                pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1403        if (include_compiled != NULL &&            continue;
1404            pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)  
1405          continue;          if (include_dir_compiled != NULL &&
1406                pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1407              continue;
1408            }
1409          else
1410            {
1411            if (exclude_compiled != NULL &&
1412                pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1413              continue;
1414    
1415            if (include_compiled != NULL &&
1416                pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1417              continue;
1418            }
1419    
1420        frc = grep_or_recurse(buffer, dir_recurse, FALSE);        frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1421        if (frc > 1) rc = frc;        if (frc > 1) rc = frc;
# Line 1224  skipping was not requested. The scan pro Line 1438  skipping was not requested. The scan pro
1438  argument at top level, we don't show the file name, unless we are only showing  argument at top level, we don't show the file name, unless we are only showing
1439  the file name, or the filename was forced (-H). */  the file name, or the filename was forced (-H). */
1440    
1441  in = fopen(pathname, "r");  pathlen = strlen(pathname);
1442  if (in == NULL)  
1443    /* Open using zlib if it is supported and the file name ends with .gz. */
1444    
1445    #ifdef SUPPORT_LIBZ
1446    if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1447      {
1448      ingz = gzopen(pathname, "rb");
1449      if (ingz == NULL)
1450        {
1451        if (!silent)
1452          fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1453            strerror(errno));
1454        return 2;
1455        }
1456      handle = (void *)ingz;
1457      frtype = FR_LIBZ;
1458      }
1459    else
1460    #endif
1461    
1462    /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1463    
1464    #ifdef SUPPORT_LIBBZ2
1465    if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1466      {
1467      inbz2 = BZ2_bzopen(pathname, "rb");
1468      handle = (void *)inbz2;
1469      frtype = FR_LIBBZ2;
1470      }
1471    else
1472    #endif
1473    
1474    /* Otherwise use plain fopen(). The label is so that we can come back here if
1475    an attempt to read a .bz2 file indicates that it really is a plain file. */
1476    
1477    #ifdef SUPPORT_LIBBZ2
1478    PLAIN_FILE:
1479    #endif
1480      {
1481      in = fopen(pathname, "r");
1482      handle = (void *)in;
1483      frtype = FR_PLAIN;
1484      }
1485    
1486    /* All the opening methods return errno when they fail. */
1487    
1488    if (handle == NULL)
1489    {    {
1490    if (!silent)    if (!silent)
1491      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,      fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
# Line 1233  if (in == NULL) Line 1493  if (in == NULL)
1493    return 2;    return 2;
1494    }    }
1495    
1496  rc = pcregrep(in, (filenames > FN_DEFAULT ||  /* Now grep the file */
1497    
1498    rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1499    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);    (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1500    
1501    /* Close in an appropriate manner. */
1502    
1503    #ifdef SUPPORT_LIBZ
1504    if (frtype == FR_LIBZ)
1505      gzclose(ingz);
1506    else
1507    #endif
1508    
1509    /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1510    read failed. If the error indicates that the file isn't in fact bzipped, try
1511    again as a normal file. */
1512    
1513    #ifdef SUPPORT_LIBBZ2
1514    if (frtype == FR_LIBBZ2)
1515      {
1516      if (rc == 2)
1517        {
1518        int errnum;
1519        const char *err = BZ2_bzerror(inbz2, &errnum);
1520        if (errnum == BZ_DATA_ERROR_MAGIC)
1521          {
1522          BZ2_bzclose(inbz2);
1523          goto PLAIN_FILE;
1524          }
1525        else if (!silent)
1526          fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1527            pathname, err);
1528        }
1529      BZ2_bzclose(inbz2);
1530      }
1531    else
1532    #endif
1533    
1534    /* Normal file close */
1535    
1536  fclose(in);  fclose(in);
1537    
1538    /* Pass back the yield from pcregrep(). */
1539    
1540  return rc;  return rc;
1541  }  }
1542    
# Line 1257  for (op = optionlist; op->one_char != 0; Line 1557  for (op = optionlist; op->one_char != 0;
1557    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);    if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1558    }    }
1559  fprintf(stderr, "] [long options] [pattern] [files]\n");  fprintf(stderr, "] [long options] [pattern] [files]\n");
1560  fprintf(stderr, "Type `pcregrep --help' for more information.\n");  fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1561      "options.\n");
1562  return rc;  return rc;
1563  }  }
1564    
# Line 1276  option_item *op; Line 1577  option_item *op;
1577  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");  printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1578  printf("Search for PATTERN in each FILE or standard input.\n");  printf("Search for PATTERN in each FILE or standard input.\n");
1579  printf("PATTERN must be present if neither -e nor -f is used.\n");  printf("PATTERN must be present if neither -e nor -f is used.\n");
1580  printf("\"-\" can be used as a file name to mean STDIN.\n\n");  printf("\"-\" can be used as a file name to mean STDIN.\n");
1581  printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");  
1582    #ifdef SUPPORT_LIBZ
1583    printf("Files whose names end in .gz are read using zlib.\n");
1584    #endif
1585    
1586    #ifdef SUPPORT_LIBBZ2
1587    printf("Files whose names end in .bz2 are read using bzlib2.\n");
1588    #endif
1589    
1590    #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1591    printf("Other files and the standard input are read as plain files.\n\n");
1592    #else
1593    printf("All files are read as plain files, without any interpretation.\n\n");
1594    #endif
1595    
1596    printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1597  printf("Options:\n");  printf("Options:\n");
1598    
1599  for (op = optionlist; op->one_char != 0; op++)  for (op = optionlist; op->one_char != 0; op++)
# Line 1286  for (op = optionlist; op->one_char != 0; Line 1601  for (op = optionlist; op->one_char != 0;
1601    int n;    int n;
1602    char s[4];    char s[4];
1603    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");    if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, "   ");
1604    printf("  %s --%s%n", s, op->long_name, &n);    n = 30 - printf("  %s --%s", s, op->long_name);
   n = 30 - n;  
1605    if (n < 1) n = 1;    if (n < 1) n = 1;
1606    printf("%.*s%s\n", n, "                    ", op->help_text);    printf("%.*s%s\n", n, "                    ", op->help_text);
1607    }    }
# Line 1312  handle_option(int letter, int options) Line 1626  handle_option(int letter, int options)
1626  {  {
1627  switch(letter)  switch(letter)
1628    {    {
1629      case N_FOFFSETS: file_offsets = TRUE; break;
1630    case N_HELP: help(); exit(0);    case N_HELP: help(); exit(0);
1631      case N_LOFFSETS: line_offsets = number = TRUE; break;
1632    case 'c': count_only = TRUE; break;    case 'c': count_only = TRUE; break;
1633    case 'F': process_options |= PO_FIXED_STRINGS; break;    case 'F': process_options |= PO_FIXED_STRINGS; break;
1634    case 'H': filenames = FN_FORCE; break;    case 'H': filenames = FN_FORCE; break;
# Line 1408  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1724  sprintf(buffer, "%s%.*s%s", prefix[proce
1724    suffix[process_options]);    suffix[process_options]);
1725  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1726    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1727  if (pattern_list[pattern_count] != NULL)  if (pattern_list[pattern_count] != NULL)
1728    {    {
1729    pattern_count++;    pattern_count++;
1730    return TRUE;    return TRUE;
1731    }    }
1732    
1733  /* Handle compile errors */  /* Handle compile errors */
1734    
# Line 1470  if ((process_options & PO_FIXED_STRINGS) Line 1786  if ((process_options & PO_FIXED_STRINGS)
1786      char *p = end_of_line(pattern, eop, &ellength);      char *p = end_of_line(pattern, eop, &ellength);
1787      if (ellength == 0)      if (ellength == 0)
1788        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1789      sprintf(buffer, "%.*s", p - pattern - ellength, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1790      pattern = p;      pattern = p;
1791      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1792        return FALSE;        return FALSE;
# Line 1512  switch(i) Line 1828  switch(i)
1828    case '\r':               newline = (char *)"cr"; break;    case '\r':               newline = (char *)"cr"; break;
1829    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1830    case -1:                 newline = (char *)"any"; break;    case -1:                 newline = (char *)"any"; break;
1831      case -2:                 newline = (char *)"anycrlf"; break;
1832    }    }
1833    
1834  /* Process the options */  /* Process the options */
# Line 1569  for (i = 1; i < argc; i++) Line 1886  for (i = 1; i < argc; i++)
1886          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1887            {            {
1888            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1889            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1890            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1891              {              {
1892              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1588  for (i = 1; i < argc; i++) Line 1905  for (i = 1; i < argc; i++)
1905          char buff2[24];          char buff2[24];
1906          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1907          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1908          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1909            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1910          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1911            break;            break;
1912          }          }
# Line 1748  if (both_context > 0) Line 2065  if (both_context > 0)
2065    if (before_context == 0) before_context = both_context;    if (before_context == 0) before_context = both_context;
2066    }    }
2067    
2068    /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2069    However, the latter two set the only_matching flag. */
2070    
2071    if ((only_matching && (file_offsets || line_offsets)) ||
2072        (file_offsets && line_offsets))
2073      {
2074      fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2075        "and/or --line-offsets\n");
2076      exit(usage(2));
2077      }
2078    
2079    if (file_offsets || line_offsets) only_matching = TRUE;
2080    
2081  /* If a locale has not been provided as an option, see if the LC_CTYPE or  /* If a locale has not been provided as an option, see if the LC_CTYPE or
2082  LC_ALL environment variable is set, and if so, use it. */  LC_ALL environment variable is set, and if so, use it. */
2083    
# Line 1819  else if (strcmp(newline, "any") == 0 || Line 2149  else if (strcmp(newline, "any") == 0 ||
2149    pcre_options |= PCRE_NEWLINE_ANY;    pcre_options |= PCRE_NEWLINE_ANY;
2150    endlinetype = EL_ANY;    endlinetype = EL_ANY;
2151    }    }
2152    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2153      {
2154      pcre_options |= PCRE_NEWLINE_ANYCRLF;
2155      endlinetype = EL_ANYCRLF;
2156      }
2157  else  else
2158    {    {
2159    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
# Line 1947  for (j = 0; j < pattern_count; j++) Line 2282  for (j = 0; j < pattern_count; j++)
2282      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2283      goto EXIT2;      goto EXIT2;
2284      }      }
2285    hint_count++;    hint_count++;
2286    }    }
2287    
2288  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 1976  if (include_pattern != NULL) Line 2311  if (include_pattern != NULL)
2311      }      }
2312    }    }
2313    
2314    if (exclude_dir_pattern != NULL)
2315      {
2316      exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2317        pcretables);
2318      if (exclude_dir_compiled == NULL)
2319        {
2320        fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2321          errptr, error);
2322        goto EXIT2;
2323        }
2324      }
2325    
2326    if (include_dir_pattern != NULL)
2327      {
2328      include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2329        pcretables);
2330      if (include_dir_compiled == NULL)
2331        {
2332        fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2333          errptr, error);
2334        goto EXIT2;
2335        }
2336      }
2337    
2338  /* If there are no further arguments, do the business on stdin and exit. */  /* If there are no further arguments, do the business on stdin and exit. */
2339    
2340  if (i >= argc)  if (i >= argc)
2341    {    {
2342    rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);    rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2343    goto EXIT;    goto EXIT;
2344    }    }
2345    

Legend:
Removed from v.141  
changed lines
  Added in v.325

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12