/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 345 by ph10, Mon Apr 28 15:10:02 2008 UTC revision 391 by ph10, Tue Mar 17 21:16:01 2009 UTC
# Line 6  Line 6 
6  its pattern matching. On a Unix or Win32 system it can recurse into  its pattern matching. On a Unix or Win32 system it can recurse into
7  directories.  directories.
8    
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 71  POSSIBILITY OF SUCH DAMAGE. Line 71  POSSIBILITY OF SUCH DAMAGE.
71  typedef int BOOL;  typedef int BOOL;
72    
73  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
74    #define OFFSET_SIZE 99
75    
76  #if BUFSIZ > 8192  #if BUFSIZ > 8192
77  #define MBUFTHIRD BUFSIZ  #define MBUFTHIRD BUFSIZ
# Line 821  if (after_context > 0 && lastmatchnumber Line 822  if (after_context > 0 && lastmatchnumber
822    
823    
824  /*************************************************  /*************************************************
825    *   Apply patterns to subject till one matches   *
826    *************************************************/
827    
828    /* This function is called to run through all patterns, looking for a match. It
829    is used multiple times for the same subject when colouring is enabled, in order
830    to find all possible matches.
831    
832    Arguments:
833      matchptr    the start of the subject
834      length      the length of the subject to match
835      offsets     the offets vector to fill in
836      mrc         address of where to put the result of pcre_exec()
837    
838    Returns:      TRUE if there was a match
839                  FALSE if there was no match
840                  invert if there was a non-fatal error
841    */
842    
843    static BOOL
844    match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
845    {
846    int i;
847    for (i = 0; i < pattern_count; i++)
848      {
849      *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0,
850        PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
851      if (*mrc >= 0) return TRUE;
852      if (*mrc == PCRE_ERROR_NOMATCH) continue;
853      fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
854      if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
855      fprintf(stderr, "this text:\n");
856      fwrite(matchptr, 1, length, stderr);  /* In case binary zero included */
857      fprintf(stderr, "\n");
858      if (error_count == 0 &&
859          (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
860        {
861        fprintf(stderr, "pcregrep: error %d means that a resource limit "
862          "was exceeded\n", *mrc);
863        fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
864        }
865      if (error_count++ > 20)
866        {
867        fprintf(stderr, "pcregrep: too many errors - abandoned\n");
868        exit(2);
869        }
870      return invert;    /* No more matching; don't show the line again */
871      }
872    
873    return FALSE;  /* No match, no errors */
874    }
875    
876    
877    
878    /*************************************************
879  *            Grep an individual file             *  *            Grep an individual file             *
880  *************************************************/  *************************************************/
881    
# Line 853  int linenumber = 1; Line 908  int linenumber = 1;
908  int lastmatchnumber = 0;  int lastmatchnumber = 0;
909  int count = 0;  int count = 0;
910  int filepos = 0;  int filepos = 0;
911  int offsets[99];  int offsets[OFFSET_SIZE];
912  char *lastmatchrestart = NULL;  char *lastmatchrestart = NULL;
913  char buffer[3*MBUFTHIRD];  char buffer[3*MBUFTHIRD];
914  char *ptr = buffer;  char *ptr = buffer;
# Line 909  way, the buffer is shifted left and re-f Line 964  way, the buffer is shifted left and re-f
964    
965  while (ptr < endptr)  while (ptr < endptr)
966    {    {
967    int i, endlinelength;    int endlinelength;
968    int mrc = 0;    int mrc = 0;
969    BOOL match = FALSE;    BOOL match;
970    char *matchptr = ptr;    char *matchptr = ptr;
971    char *t = ptr;    char *t = ptr;
972    size_t length, linelength;    size_t length, linelength;
# Line 919  while (ptr < endptr) Line 974  while (ptr < endptr)
974    /* At this point, ptr is at the start of a line. We need to find the length    /* At this point, ptr is at the start of a line. We need to find the length
975    of the subject string to pass to pcre_exec(). In multiline mode, it is the    of the subject string to pass to pcre_exec(). In multiline mode, it is the
976    length remainder of the data in the buffer. Otherwise, it is the length of    length remainder of the data in the buffer. Otherwise, it is the length of
977    the next line. After matching, we always advance by the length of the next    the next line, excluding the terminating newline. After matching, we always
978    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
979    that any match is constrained to be in the first line. */    option is used for compiling, so that any match is constrained to be in the
980      first line. */
981    
982    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
983    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
# Line 936  while (ptr < endptr) Line 992  while (ptr < endptr)
992        #include <time.h>        #include <time.h>
993        struct timeval start_time, end_time;        struct timeval start_time, end_time;
994        struct timezone dummy;        struct timezone dummy;
995          int i;
996    
997        if (jfriedl_XT)        if (jfriedl_XT)
998        {        {
# Line 961  while (ptr < endptr) Line 1018  while (ptr < endptr)
1018    
1019    
1020        for (i = 0; i < jfriedl_XR; i++)        for (i = 0; i < jfriedl_XR; i++)
1021            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);            match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1022                  PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1023    
1024        if (gettimeofday(&end_time, &dummy) != 0)        if (gettimeofday(&end_time, &dummy) != 0)
1025                perror("bad gettimeofday");                perror("bad gettimeofday");
# Line 980  while (ptr < endptr) Line 1038  while (ptr < endptr)
1038    
1039    ONLY_MATCHING_RESTART:    ONLY_MATCHING_RESTART:
1040    
1041    /* Run through all the patterns until one matches. Note that we don't include    /* Run through all the patterns until one matches or there is an error other
1042    the final newline in the subject string. */    than NOMATCH. This code is in a subroutine so that it can be re-used for
1043      finding subsequent matches when colouring matched lines. */
1044    for (i = 0; i < pattern_count; i++)  
1045      {    match = match_patterns(matchptr, length, offsets, &mrc);
     mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, length, 0, 0,  
       offsets, 99);  
     if (mrc >= 0) { match = TRUE; break; }  
     if (mrc != PCRE_ERROR_NOMATCH)  
       {  
       fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);  
       if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);  
       fprintf(stderr, "this line:\n");  
       fwrite(matchptr, 1, linelength, stderr);  /* In case binary zero included */  
       fprintf(stderr, "\n");  
       if (error_count == 0 &&  
           (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))  
         {  
         fprintf(stderr, "pcregrep: error %d means that a resource limit "  
           "was exceeded\n", mrc);  
         fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");  
         }  
       if (error_count++ > 20)  
         {  
         fprintf(stderr, "pcregrep: too many errors - abandoned\n");  
         exit(2);  
         }  
       match = invert;    /* No more matching; don't show the line again */  
       break;  
       }  
     }  
1046    
1047    /* If it's a match or a not-match (as required), do what's wanted. */    /* If it's a match or a not-match (as required), do what's wanted. */
1048    
# Line 1054  while (ptr < endptr) Line 1086  while (ptr < endptr)
1086          if (printname != NULL) fprintf(stdout, "%s:", printname);          if (printname != NULL) fprintf(stdout, "%s:", printname);
1087          if (number) fprintf(stdout, "%d:", linenumber);          if (number) fprintf(stdout, "%d:", linenumber);
1088          if (line_offsets)          if (line_offsets)
1089            fprintf(stdout, "%d,%d", matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1090              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1091          else if (file_offsets)          else if (file_offsets)
1092            fprintf(stdout, "%d,%d", filepos + matchptr + offsets[0] - ptr,            fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1093              offsets[1] - offsets[0]);              offsets[1] - offsets[0]);
1094          else          else
1095              {
1096              if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1097            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);            fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1098              if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1099              }
1100          fprintf(stdout, "\n");          fprintf(stdout, "\n");
1101          matchptr += offsets[1];          matchptr += offsets[1];
1102          length -= offsets[1];          length -= offsets[1];
# Line 1197  while (ptr < endptr) Line 1233  while (ptr < endptr)
1233        else        else
1234  #endif  #endif
1235    
1236        /* We have to split the line(s) up if colouring. */        /* We have to split the line(s) up if colouring, and search for further
1237          matches. */
1238    
1239        if (do_colour)        if (do_colour)
1240          {          {
1241            int last_offset = 0;
1242          fwrite(ptr, 1, offsets[0], stdout);          fwrite(ptr, 1, offsets[0], stdout);
1243          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1244          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1245          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1246          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],          for (;;)
1247              {
1248              last_offset += offsets[1];
1249              matchptr += offsets[1];
1250              length -= offsets[1];
1251              if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1252              fwrite(matchptr, 1, offsets[0], stdout);
1253              fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1254              fwrite(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1255              fprintf(stdout, "%c[00m", 0x1b);
1256              }
1257            fwrite(ptr + last_offset, 1, (linelength + endlinelength) - last_offset,
1258            stdout);            stdout);
1259          }          }
1260    
1261          /* Not colouring; no need to search for further matches */
1262    
1263        else fwrite(ptr, 1, linelength + endlinelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
1264        }        }
1265    
# Line 1819  const char *error; Line 1871  const char *error;
1871    
1872  /* Set the default line ending value from the default in the PCRE library;  /* Set the default line ending value from the default in the PCRE library;
1873  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".  "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1874  */  Note that the return values from pcre_config(), though derived from the ASCII
1875    codes, are the same in EBCDIC environments, so we must use the actual values
1876    rather than escapes such as as '\r'. */
1877    
1878  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1879  switch(i)  switch(i)
1880    {    {
1881    default:                 newline = (char *)"lf"; break;    default:               newline = (char *)"lf"; break;
1882    case '\r':               newline = (char *)"cr"; break;    case 13:               newline = (char *)"cr"; break;
1883    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case (13 << 8) | 10:   newline = (char *)"crlf"; break;
1884    case -1:                 newline = (char *)"any"; break;    case -1:               newline = (char *)"any"; break;
1885    case -2:                 newline = (char *)"anycrlf"; break;    case -2:               newline = (char *)"anycrlf"; break;
1886    }    }
1887    
1888  /* Process the options */  /* Process the options */

Legend:
Removed from v.345  
changed lines
  Added in v.391

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12