/[pcre]/code/tags/pcre-7.1/pcregrep.c
ViewVC logotype

Diff of /code/tags/pcre-7.1/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 92 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 56  POSSIBILITY OF SUCH DAMAGE.
56    
57  typedef int BOOL;  typedef int BOOL;
58    
59  #define VERSION "4.3 01-Jun-2006"  #define VERSION "4.4 29-Nov-2006"
60  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
61    
62  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 65  typedef int BOOL; Line 65  typedef int BOOL;
65  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
66  #endif  #endif
67    
   
68  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
69  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
70  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 82  enum { DEE_READ, DEE_SKIP };
82  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
83  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
84    
85    /* Line ending types */
86    
87    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };
88    
89    
90    
91  /*************************************************  /*************************************************
# Line 100  static const char *jfriedl_prefix = ""; Line 103  static const char *jfriedl_prefix = "";
103  static const char *jfriedl_postfix = "";  static const char *jfriedl_postfix = "";
104  #endif  #endif
105    
106  static int  endlinebyte = '\n';     /* Last byte of endline sequence */  static int  endlinetype;
 static int  endlineextra = 0;       /* Extra bytes for endline sequence */  
107    
108  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
109  static char *colour_option = NULL;  static char *colour_option = NULL;
# Line 142  static BOOL number = FALSE; Line 144  static BOOL number = FALSE;
144  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
145  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
146  static BOOL silent = FALSE;  static BOOL silent = FALSE;
147    static BOOL utf8 = FALSE;
148    
149  /* Structure for options and list of them */  /* Structure for options and list of them */
150    
# Line 219  static const char *prefix[] = { Line 222  static const char *prefix[] = {
222  static const char *suffix[] = {  static const char *suffix[] = {
223    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
224    
225    /* UTF-8 tables - used only when the newline setting is "all". */
226    
227    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
228    
229    const char utf8_table4[] = {
230      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
231      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
232      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
233      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
234    
235    
236    
237  /*************************************************  /*************************************************
# Line 471  return sys_errlist[n]; Line 484  return sys_errlist[n];
484    
485    
486  /*************************************************  /*************************************************
487    *             Find end of line                   *
488    *************************************************/
489    
490    /* The length of the endline sequence that is found is set via lenptr. This may
491    be zero at the very end of the file if there is no line-ending sequence there.
492    
493    Arguments:
494      p         current position in line
495      endptr    end of available data
496      lenptr    where to put the length of the eol sequence
497    
498    Returns:    pointer to the last byte of the line
499    */
500    
501    static char *
502    end_of_line(char *p, char *endptr, int *lenptr)
503    {
504    switch(endlinetype)
505      {
506      default:      /* Just in case */
507      case EL_LF:
508      while (p < endptr && *p != '\n') p++;
509      if (p < endptr)
510        {
511        *lenptr = 1;
512        return p + 1;
513        }
514      *lenptr = 0;
515      return endptr;
516    
517      case EL_CR:
518      while (p < endptr && *p != '\r') p++;
519      if (p < endptr)
520        {
521        *lenptr = 1;
522        return p + 1;
523        }
524      *lenptr = 0;
525      return endptr;
526    
527      case EL_CRLF:
528      for (;;)
529        {
530        while (p < endptr && *p != '\r') p++;
531        if (++p >= endptr)
532          {
533          *lenptr = 0;
534          return endptr;
535          }
536        if (*p == '\n')
537          {
538          *lenptr = 2;
539          return p + 1;
540          }
541        }
542      break;
543    
544      case EL_ANY:
545      while (p < endptr)
546        {
547        int extra = 0;
548        register int c = *((unsigned char *)p);
549    
550        if (utf8 && c >= 0xc0)
551          {
552          int gcii, gcss;
553          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
554          gcss = 6*extra;
555          c = (c & utf8_table3[extra]) << gcss;
556          for (gcii = 1; gcii <= extra; gcii++)
557            {
558            gcss -= 6;
559            c |= (p[gcii] & 0x3f) << gcss;
560            }
561          }
562    
563        p += 1 + extra;
564    
565        switch (c)
566          {
567          case 0x0a:    /* LF */
568          case 0x0b:    /* VT */
569          case 0x0c:    /* FF */
570          *lenptr = 1;
571          return p;
572    
573          case 0x0d:    /* CR */
574          if (p < endptr && *p == 0x0a)
575            {
576            *lenptr = 2;
577            p++;
578            }
579          else *lenptr = 1;
580          return p;
581    
582          case 0x85:    /* NEL */
583          *lenptr = utf8? 2 : 1;
584          return p;
585    
586          case 0x2028:  /* LS */
587          case 0x2029:  /* PS */
588          *lenptr = 3;
589          return p;
590    
591          default:
592          break;
593          }
594        }   /* End of loop for ANY case */
595    
596      *lenptr = 0;  /* Must have hit the end */
597      return endptr;
598      }     /* End of overall switch */
599    }
600    
601    
602    
603    /*************************************************
604    *         Find start of previous line            *
605    *************************************************/
606    
607    /* This is called when looking back for before lines to print.
608    
609    Arguments:
610      p         start of the subsequent line
611      startptr  start of available data
612    
613    Returns:    pointer to the start of the previous line
614    */
615    
616    static char *
617    previous_line(char *p, char *startptr)
618    {
619    switch(endlinetype)
620      {
621      default:      /* Just in case */
622      case EL_LF:
623      p--;
624      while (p > startptr && p[-1] != '\n') p--;
625      return p;
626    
627      case EL_CR:
628      p--;
629      while (p > startptr && p[-1] != '\n') p--;
630      return p;
631    
632      case EL_CRLF:
633      for (;;)
634        {
635        p -= 2;
636        while (p > startptr && p[-1] != '\n') p--;
637        if (p <= startptr + 1 || p[-2] == '\r') return p;
638        }
639      return p;   /* But control should never get here */
640    
641      case EL_ANY:
642      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
643      if (utf8) while ((*p & 0xc0) == 0x80) p--;
644    
645      while (p > startptr)
646        {
647        register int c;
648        char *pp = p - 1;
649    
650        if (utf8)
651          {
652          int extra = 0;
653          while ((*pp & 0xc0) == 0x80) pp--;
654          c = *((unsigned char *)pp);
655          if (c >= 0xc0)
656            {
657            int gcii, gcss;
658            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
659            gcss = 6*extra;
660            c = (c & utf8_table3[extra]) << gcss;
661            for (gcii = 1; gcii <= extra; gcii++)
662              {
663              gcss -= 6;
664              c |= (pp[gcii] & 0x3f) << gcss;
665              }
666            }
667          }
668        else c = *((unsigned char *)pp);
669    
670        switch (c)
671          {
672          case 0x0a:    /* LF */
673          case 0x0b:    /* VT */
674          case 0x0c:    /* FF */
675          case 0x0d:    /* CR */
676          case 0x85:    /* NEL */
677          case 0x2028:  /* LS */
678          case 0x2029:  /* PS */
679          return p;
680    
681          default:
682          break;
683          }
684    
685        p = pp;  /* Back one character */
686        }        /* End of loop for ANY case */
687    
688      return startptr;  /* Hit start of data */
689      }     /* End of overall switch */
690    }
691    
692    
693    
694    
695    
696    /*************************************************
697  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
698  *************************************************/  *************************************************/
699    
# Line 495  if (after_context > 0 && lastmatchnumber Line 718  if (after_context > 0 && lastmatchnumber
718    int count = 0;    int count = 0;
719    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
720      {      {
721        int ellength;
722      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
723      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
724      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
725      while (*pp != endlinebyte) pp++;      pp = end_of_line(pp, endptr, &ellength);
726      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + (1 + endlineextra),      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
727        stdout);      lastmatchrestart = pp;
     lastmatchrestart = pp + 1;  
728      }      }
729    hyphenpending = TRUE;    hyphenpending = TRUE;
730    }    }
# Line 558  way, the buffer is shifted left and re-f Line 781  way, the buffer is shifted left and re-f
781    
782  while (ptr < endptr)  while (ptr < endptr)
783    {    {
784    int i;    int i, endlinelength;
785    int mrc = 0;    int mrc = 0;
786    BOOL match = FALSE;    BOOL match = FALSE;
787    char *t = ptr;    char *t = ptr;
# Line 571  while (ptr < endptr) Line 794  while (ptr < endptr)
794    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
795    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
796    
797    linelength = 0;    t = end_of_line(t, endptr, &endlinelength);
798    while (t < endptr && *t++ != endlinebyte) linelength++;    linelength = t - ptr - endlinelength;
799    length = multiline? endptr - ptr : linelength;    length = multiline? endptr - ptr : linelength;
800    
   
801    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
802    
803  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
# Line 706  while (ptr < endptr) Line 928  while (ptr < endptr)
928    
929        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
930          {          {
931            int ellength;
932          int linecount = 0;          int linecount = 0;
933          char *p = lastmatchrestart;          char *p = lastmatchrestart;
934    
935          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
936            {            {
937            while (*p != endlinebyte) p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
938            linecount++;            linecount++;
939            }            }
940    
# Line 725  while (ptr < endptr) Line 947  while (ptr < endptr)
947            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
948            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
949            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
950            while (*pp != endlinebyte) pp++;            pp = end_of_line(pp, endptr, &ellength);
951            fwrite(lastmatchrestart, 1, pp - lastmatchrestart +            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
952              (1 + endlineextra), stdout);            lastmatchrestart = pp;
           lastmatchrestart = pp + 1;  
953            }            }
954          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
955          }          }
# Line 754  while (ptr < endptr) Line 975  while (ptr < endptr)
975                 linecount < before_context)                 linecount < before_context)
976            {            {
977            linecount++;            linecount++;
978            p--;            p = previous_line(p, buffer);
           while (p > buffer && p[-1] != endlinebyte) p--;  
979            }            }
980    
981          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 763  while (ptr < endptr) Line 983  while (ptr < endptr)
983    
984          while (p < ptr)          while (p < ptr)
985            {            {
986              int ellength;
987            char *pp = p;            char *pp = p;
988            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
989            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
990            while (*pp != endlinebyte) pp++;            pp = end_of_line(pp, endptr, &ellength);
991            fwrite(p, 1, pp - p + (1 + endlineextra), stdout);            fwrite(p, 1, pp - p, stdout);
992            p = pp + 1;            p = pp;
993            }            }
994          }          }
995    
# Line 788  while (ptr < endptr) Line 1009  while (ptr < endptr)
1009    
1010        if (multiline)        if (multiline)
1011          {          {
1012            int ellength;
1013          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1014          t = ptr;          t = ptr;
1015          while (t < endmatch) { if (*t++ == endlinebyte) linenumber++; }          while (t < endmatch)
1016          while (endmatch < endptr && *endmatch != endlinebyte) endmatch++;            {
1017          linelength = endmatch - ptr;            t = end_of_line(t, endptr, &ellength);
1018              if (t <= endmatch) linenumber++; else break;
1019              }
1020            endmatch = end_of_line(endmatch, endptr, &ellength);
1021            linelength = endmatch - ptr - ellength;
1022          }          }
1023    
1024        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 824  while (ptr < endptr) Line 1050  while (ptr < endptr)
1050          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1051          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1052          }          }
1053        else fwrite(ptr, 1, linelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
   
       fprintf(stdout, "\n");  
1054        }        }
1055    
1056      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match */
# Line 836  while (ptr < endptr) Line 1060  while (ptr < endptr)
1060      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1061      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1062    
1063      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1064      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1065      }      }
1066    
1067    /* Advance to after the newline and increment the line number. */    /* Advance to after the newline and increment the line number. */
1068    
1069    ptr += linelength + 1;    ptr += linelength + endlinelength;
1070    linenumber++;    linenumber++;
1071    
1072    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 1098  switch(letter) Line 1322  switch(letter)
1322    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1323    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1324    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1325    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1326    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1327    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1328    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
# Line 1231  compile_pattern(char *pattern, int optio Line 1455  compile_pattern(char *pattern, int optio
1455  {  {
1456  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
1457    {    {
1458      char *eop = pattern + strlen(pattern);
1459    char buffer[MBUFTHIRD];    char buffer[MBUFTHIRD];
1460    for(;;)    for(;;)
1461      {      {
1462      char *p = strchr(pattern, endlinebyte);      int ellength;
1463      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
1464        if (ellength == 0)
1465        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1466      sprintf(buffer, "%.*s", p - pattern - endlineextra, pattern);      sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1467      pattern = p + 1;      pattern = p;
1468      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1469        return FALSE;        return FALSE;
1470      }      }
# Line 1267  char *patterns[MAX_PATTERN_COUNT]; Line 1493  char *patterns[MAX_PATTERN_COUNT];
1493  const char *locale_from = "--locale";  const char *locale_from = "--locale";
1494  const char *error;  const char *error;
1495    
1496  /* Set the default line ending value from the default in the PCRE library. */  /* Set the default line ending value from the default in the PCRE library;
1497    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1498    */
1499    
1500  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);  (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1501  switch(i)  switch(i)
# Line 1275  switch(i) Line 1503  switch(i)
1503    default:                 newline = (char *)"lf"; break;    default:                 newline = (char *)"lf"; break;
1504    case '\r':               newline = (char *)"cr"; break;    case '\r':               newline = (char *)"cr"; break;
1505    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1506      case -1:                 newline = (char *)"any"; break;
1507    }    }
1508    
1509  /* Process the options */  /* Process the options */
# Line 1565  if (colour_option != NULL && strcmp(colo Line 1794  if (colour_option != NULL && strcmp(colo
1794  if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)  if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1795    {    {
1796    pcre_options |= PCRE_NEWLINE_CR;    pcre_options |= PCRE_NEWLINE_CR;
1797    endlinebyte = '\r';    endlinetype = EL_CR;
1798    }    }
1799  else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)  else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1800    {    {
1801    pcre_options |= PCRE_NEWLINE_LF;    pcre_options |= PCRE_NEWLINE_LF;
1802      endlinetype = EL_LF;
1803    }    }
1804  else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)  else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1805    {    {
1806    pcre_options |= PCRE_NEWLINE_CRLF;    pcre_options |= PCRE_NEWLINE_CRLF;
1807    endlineextra = 1;    endlinetype = EL_CRLF;
1808      }
1809    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1810      {
1811      pcre_options |= PCRE_NEWLINE_ANY;
1812      endlinetype = EL_ANY;
1813    }    }
1814  else  else
1815    {    {

Legend:
Removed from v.92  
changed lines
  Added in v.93

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12