/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 89 by nigel, Sat Feb 24 21:41:27 2007 UTC revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC
# Line 56  POSSIBILITY OF SUCH DAMAGE. Line 56  POSSIBILITY OF SUCH DAMAGE.
56    
57  typedef int BOOL;  typedef int BOOL;
58    
59  #define VERSION "4.2 09-Jan-2006"  #define VERSION "4.4 29-Nov-2006"
60  #define MAX_PATTERN_COUNT 100  #define MAX_PATTERN_COUNT 100
61    
62  #if BUFSIZ > 8192  #if BUFSIZ > 8192
# Line 65  typedef int BOOL; Line 65  typedef int BOOL;
65  #define MBUFTHIRD 8192  #define MBUFTHIRD 8192
66  #endif  #endif
67    
   
68  /* Values for the "filenames" variable, which specifies options for file name  /* Values for the "filenames" variable, which specifies options for file name
69  output. The order is important; it is assumed that a file name is wanted for  output. The order is important; it is assumed that a file name is wanted for
70  all values greater than FN_DEFAULT. */  all values greater than FN_DEFAULT. */
# Line 83  enum { DEE_READ, DEE_SKIP }; Line 82  enum { DEE_READ, DEE_SKIP };
82  #define PO_LINE_MATCH     0x0002  #define PO_LINE_MATCH     0x0002
83  #define PO_FIXED_STRINGS  0x0004  #define PO_FIXED_STRINGS  0x0004
84    
85    /* Line ending types */
86    
87    enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };
88    
89    
90    
91  /*************************************************  /*************************************************
# Line 100  static const char *jfriedl_prefix = ""; Line 103  static const char *jfriedl_prefix = "";
103  static const char *jfriedl_postfix = "";  static const char *jfriedl_postfix = "";
104  #endif  #endif
105    
106    static int  endlinetype;
107    
108  static char *colour_string = (char *)"1;31";  static char *colour_string = (char *)"1;31";
109  static char *colour_option = NULL;  static char *colour_option = NULL;
110  static char *dee_option = NULL;  static char *dee_option = NULL;
111  static char *DEE_option = NULL;  static char *DEE_option = NULL;
112    static char *newline = NULL;
113  static char *pattern_filename = NULL;  static char *pattern_filename = NULL;
114  static char *stdin_name = (char *)"(standard input)";  static char *stdin_name = (char *)"(standard input)";
115  static char *locale = NULL;  static char *locale = NULL;
# Line 138  static BOOL number = FALSE; Line 144  static BOOL number = FALSE;
144  static BOOL only_matching = FALSE;  static BOOL only_matching = FALSE;
145  static BOOL quiet = FALSE;  static BOOL quiet = FALSE;
146  static BOOL silent = FALSE;  static BOOL silent = FALSE;
147    static BOOL utf8 = FALSE;
148    
149  /* Structure for options and list of them */  /* Structure for options and list of them */
150    
# Line 185  static option_item optionlist[] = { Line 192  static option_item optionlist[] = {
192    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
193    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
194    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
195      { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },
196    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
197    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
198    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 214  static const char *prefix[] = { Line 222  static const char *prefix[] = {
222  static const char *suffix[] = {  static const char *suffix[] = {
223    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
224    
225    /* UTF-8 tables - used only when the newline setting is "all". */
226    
227    const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
228    
229    const char utf8_table4[] = {
230      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
231      1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
232      2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
233      3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
234    
235    
236    
237  /*************************************************  /*************************************************
# Line 466  return sys_errlist[n]; Line 484  return sys_errlist[n];
484    
485    
486  /*************************************************  /*************************************************
487    *             Find end of line                   *
488    *************************************************/
489    
490    /* The length of the endline sequence that is found is set via lenptr. This may
491    be zero at the very end of the file if there is no line-ending sequence there.
492    
493    Arguments:
494      p         current position in line
495      endptr    end of available data
496      lenptr    where to put the length of the eol sequence
497    
498    Returns:    pointer to the last byte of the line
499    */
500    
501    static char *
502    end_of_line(char *p, char *endptr, int *lenptr)
503    {
504    switch(endlinetype)
505      {
506      default:      /* Just in case */
507      case EL_LF:
508      while (p < endptr && *p != '\n') p++;
509      if (p < endptr)
510        {
511        *lenptr = 1;
512        return p + 1;
513        }
514      *lenptr = 0;
515      return endptr;
516    
517      case EL_CR:
518      while (p < endptr && *p != '\r') p++;
519      if (p < endptr)
520        {
521        *lenptr = 1;
522        return p + 1;
523        }
524      *lenptr = 0;
525      return endptr;
526    
527      case EL_CRLF:
528      for (;;)
529        {
530        while (p < endptr && *p != '\r') p++;
531        if (++p >= endptr)
532          {
533          *lenptr = 0;
534          return endptr;
535          }
536        if (*p == '\n')
537          {
538          *lenptr = 2;
539          return p + 1;
540          }
541        }
542      break;
543    
544      case EL_ANY:
545      while (p < endptr)
546        {
547        int extra = 0;
548        register int c = *((unsigned char *)p);
549    
550        if (utf8 && c >= 0xc0)
551          {
552          int gcii, gcss;
553          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
554          gcss = 6*extra;
555          c = (c & utf8_table3[extra]) << gcss;
556          for (gcii = 1; gcii <= extra; gcii++)
557            {
558            gcss -= 6;
559            c |= (p[gcii] & 0x3f) << gcss;
560            }
561          }
562    
563        p += 1 + extra;
564    
565        switch (c)
566          {
567          case 0x0a:    /* LF */
568          case 0x0b:    /* VT */
569          case 0x0c:    /* FF */
570          *lenptr = 1;
571          return p;
572    
573          case 0x0d:    /* CR */
574          if (p < endptr && *p == 0x0a)
575            {
576            *lenptr = 2;
577            p++;
578            }
579          else *lenptr = 1;
580          return p;
581    
582          case 0x85:    /* NEL */
583          *lenptr = utf8? 2 : 1;
584          return p;
585    
586          case 0x2028:  /* LS */
587          case 0x2029:  /* PS */
588          *lenptr = 3;
589          return p;
590    
591          default:
592          break;
593          }
594        }   /* End of loop for ANY case */
595    
596      *lenptr = 0;  /* Must have hit the end */
597      return endptr;
598      }     /* End of overall switch */
599    }
600    
601    
602    
603    /*************************************************
604    *         Find start of previous line            *
605    *************************************************/
606    
607    /* This is called when looking back for before lines to print.
608    
609    Arguments:
610      p         start of the subsequent line
611      startptr  start of available data
612    
613    Returns:    pointer to the start of the previous line
614    */
615    
616    static char *
617    previous_line(char *p, char *startptr)
618    {
619    switch(endlinetype)
620      {
621      default:      /* Just in case */
622      case EL_LF:
623      p--;
624      while (p > startptr && p[-1] != '\n') p--;
625      return p;
626    
627      case EL_CR:
628      p--;
629      while (p > startptr && p[-1] != '\n') p--;
630      return p;
631    
632      case EL_CRLF:
633      for (;;)
634        {
635        p -= 2;
636        while (p > startptr && p[-1] != '\n') p--;
637        if (p <= startptr + 1 || p[-2] == '\r') return p;
638        }
639      return p;   /* But control should never get here */
640    
641      case EL_ANY:
642      if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
643      if (utf8) while ((*p & 0xc0) == 0x80) p--;
644    
645      while (p > startptr)
646        {
647        register int c;
648        char *pp = p - 1;
649    
650        if (utf8)
651          {
652          int extra = 0;
653          while ((*pp & 0xc0) == 0x80) pp--;
654          c = *((unsigned char *)pp);
655          if (c >= 0xc0)
656            {
657            int gcii, gcss;
658            extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
659            gcss = 6*extra;
660            c = (c & utf8_table3[extra]) << gcss;
661            for (gcii = 1; gcii <= extra; gcii++)
662              {
663              gcss -= 6;
664              c |= (pp[gcii] & 0x3f) << gcss;
665              }
666            }
667          }
668        else c = *((unsigned char *)pp);
669    
670        switch (c)
671          {
672          case 0x0a:    /* LF */
673          case 0x0b:    /* VT */
674          case 0x0c:    /* FF */
675          case 0x0d:    /* CR */
676          case 0x85:    /* NEL */
677          case 0x2028:  /* LS */
678          case 0x2029:  /* PS */
679          return p;
680    
681          default:
682          break;
683          }
684    
685        p = pp;  /* Back one character */
686        }        /* End of loop for ANY case */
687    
688      return startptr;  /* Hit start of data */
689      }     /* End of overall switch */
690    }
691    
692    
693    
694    
695    
696    /*************************************************
697  *       Print the previous "after" lines         *  *       Print the previous "after" lines         *
698  *************************************************/  *************************************************/
699    
# Line 490  if (after_context > 0 && lastmatchnumber Line 718  if (after_context > 0 && lastmatchnumber
718    int count = 0;    int count = 0;
719    while (lastmatchrestart < endptr && count++ < after_context)    while (lastmatchrestart < endptr && count++ < after_context)
720      {      {
721        int ellength;
722      char *pp = lastmatchrestart;      char *pp = lastmatchrestart;
723      if (printname != NULL) fprintf(stdout, "%s-", printname);      if (printname != NULL) fprintf(stdout, "%s-", printname);
724      if (number) fprintf(stdout, "%d-", lastmatchnumber++);      if (number) fprintf(stdout, "%d-", lastmatchnumber++);
725      while (*pp != '\n') pp++;      pp = end_of_line(pp, endptr, &ellength);
726      fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);      fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
727      lastmatchrestart = pp + 1;      lastmatchrestart = pp;
728      }      }
729    hyphenpending = TRUE;    hyphenpending = TRUE;
730    }    }
# Line 552  way, the buffer is shifted left and re-f Line 781  way, the buffer is shifted left and re-f
781    
782  while (ptr < endptr)  while (ptr < endptr)
783    {    {
784    int i;    int i, endlinelength;
785    int mrc = 0;    int mrc = 0;
786    BOOL match = FALSE;    BOOL match = FALSE;
787    char *t = ptr;    char *t = ptr;
# Line 565  while (ptr < endptr) Line 794  while (ptr < endptr)
794    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so    line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
795    that any match is constrained to be in the first line. */    that any match is constrained to be in the first line. */
796    
797    linelength = 0;    t = end_of_line(t, endptr, &endlinelength);
798    while (t < endptr && *t++ != '\n') linelength++;    linelength = t - ptr - endlinelength;
799    length = multiline? endptr - ptr : linelength;    length = multiline? endptr - ptr : linelength;
800    
   
801    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
802    
803  #ifdef JFRIEDL_DEBUG  #ifdef JFRIEDL_DEBUG
# Line 700  while (ptr < endptr) Line 928  while (ptr < endptr)
928    
929        if (after_context > 0 && lastmatchnumber > 0)        if (after_context > 0 && lastmatchnumber > 0)
930          {          {
931            int ellength;
932          int linecount = 0;          int linecount = 0;
933          char *p = lastmatchrestart;          char *p = lastmatchrestart;
934    
935          while (p < ptr && linecount < after_context)          while (p < ptr && linecount < after_context)
936            {            {
937            while (*p != '\n') p++;            p = end_of_line(p, ptr, &ellength);
           p++;  
938            linecount++;            linecount++;
939            }            }
940    
# Line 719  while (ptr < endptr) Line 947  while (ptr < endptr)
947            char *pp = lastmatchrestart;            char *pp = lastmatchrestart;
948            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
949            if (number) fprintf(stdout, "%d-", lastmatchnumber++);            if (number) fprintf(stdout, "%d-", lastmatchnumber++);
950            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
951            fwrite(lastmatchrestart, 1, pp - lastmatchrestart + 1, stdout);            fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
952            lastmatchrestart = pp + 1;            lastmatchrestart = pp;
953            }            }
954          if (lastmatchrestart != ptr) hyphenpending = TRUE;          if (lastmatchrestart != ptr) hyphenpending = TRUE;
955          }          }
# Line 747  while (ptr < endptr) Line 975  while (ptr < endptr)
975                 linecount < before_context)                 linecount < before_context)
976            {            {
977            linecount++;            linecount++;
978            p--;            p = previous_line(p, buffer);
           while (p > buffer && p[-1] != '\n') p--;  
979            }            }
980    
981          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)          if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
# Line 756  while (ptr < endptr) Line 983  while (ptr < endptr)
983    
984          while (p < ptr)          while (p < ptr)
985            {            {
986              int ellength;
987            char *pp = p;            char *pp = p;
988            if (printname != NULL) fprintf(stdout, "%s-", printname);            if (printname != NULL) fprintf(stdout, "%s-", printname);
989            if (number) fprintf(stdout, "%d-", linenumber - linecount--);            if (number) fprintf(stdout, "%d-", linenumber - linecount--);
990            while (*pp != '\n') pp++;            pp = end_of_line(pp, endptr, &ellength);
991            fwrite(p, 1, pp - p + 1, stdout);   /* In case binary zero */            fwrite(p, 1, pp - p, stdout);
992            p = pp + 1;            p = pp;
993            }            }
994          }          }
995    
# Line 777  while (ptr < endptr) Line 1005  while (ptr < endptr)
1005        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1006        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1007        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately. Because the PCRE_FIRSTLINE option is set, the
1008        start of the match will always be before the first \n character. */        start of the match will always be before the first newline sequence. */
1009    
1010        if (multiline)        if (multiline)
1011          {          {
1012            int ellength;
1013          char *endmatch = ptr + offsets[1];          char *endmatch = ptr + offsets[1];
1014          t = ptr;          t = ptr;
1015          while (t < endmatch) { if (*t++ == '\n') linenumber++; }          while (t < endmatch)
1016          while (endmatch < endptr && *endmatch != '\n') endmatch++;            {
1017          linelength = endmatch - ptr;            t = end_of_line(t, endptr, &ellength);
1018              if (t <= endmatch) linenumber++; else break;
1019              }
1020            endmatch = end_of_line(endmatch, endptr, &ellength);
1021            linelength = endmatch - ptr - ellength;
1022          }          }
1023    
1024        /*** NOTE: Use only fwrite() to output the data line, so that binary        /*** NOTE: Use only fwrite() to output the data line, so that binary
# Line 817  while (ptr < endptr) Line 1050  while (ptr < endptr)
1050          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1051          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);
1052          }          }
1053        else fwrite(ptr, 1, linelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
   
       fprintf(stdout, "\n");  
1054        }        }
1055    
1056      /* End of doing what has to be done for a match */      /* End of doing what has to be done for a match */
# Line 829  while (ptr < endptr) Line 1060  while (ptr < endptr)
1060      /* Remember where the last match happened for after_context. We remember      /* Remember where the last match happened for after_context. We remember
1061      where we are about to restart, and that line's number. */      where we are about to restart, and that line's number. */
1062    
1063      lastmatchrestart = ptr + linelength + 1;      lastmatchrestart = ptr + linelength + endlinelength;
1064      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1065      }      }
1066    
1067    /* Advance to after the newline and increment the line number. */    /* Advance to after the newline and increment the line number. */
1068    
1069    ptr += linelength + 1;    ptr += linelength + endlinelength;
1070    linenumber++;    linenumber++;
1071    
1072    /* If we haven't yet reached the end of the file (the buffer is full), and    /* If we haven't yet reached the end of the file (the buffer is full), and
# Line 1091  switch(letter) Line 1322  switch(letter)
1322    case 'q': quiet = TRUE; break;    case 'q': quiet = TRUE; break;
1323    case 'r': dee_action = dee_RECURSE; break;    case 'r': dee_action = dee_RECURSE; break;
1324    case 's': silent = TRUE; break;    case 's': silent = TRUE; break;
1325    case 'u': options |= PCRE_UTF8; break;    case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1326    case 'v': invert = TRUE; break;    case 'v': invert = TRUE; break;
1327    case 'w': process_options |= PO_WORD_MATCH; break;    case 'w': process_options |= PO_WORD_MATCH; break;
1328    case 'x': process_options |= PO_LINE_MATCH; break;    case 'x': process_options |= PO_LINE_MATCH; break;
# Line 1206  return FALSE; Line 1437  return FALSE;
1437  *************************************************/  *************************************************/
1438    
1439  /* When the -F option has been used, each string may be a list of strings,  /* When the -F option has been used, each string may be a list of strings,
1440  separated by newlines. They will be matched literally.  separated by line breaks. They will be matched literally.
1441    
1442  Arguments:  Arguments:
1443    pattern        the pattern string    pattern        the pattern string
# Line 1224  compile_pattern(char *pattern, int optio Line 1455  compile_pattern(char *pattern, int optio
1455  {  {
1456  if ((process_options & PO_FIXED_STRINGS) != 0)  if ((process_options & PO_FIXED_STRINGS) != 0)
1457    {    {
1458      char *eop = pattern + strlen(pattern);
1459    char buffer[MBUFTHIRD];    char buffer[MBUFTHIRD];
1460    for(;;)    for(;;)
1461      {      {
1462      char *p = strchr(pattern, '\n');      int ellength;
1463      if (p == NULL)      char *p = end_of_line(pattern, eop, &ellength);
1464        if (ellength == 0)
1465        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1466      sprintf(buffer, "%.*s", p - pattern, pattern);      sprintf(buffer, "%.*s", p - pattern - ellength, pattern);
1467      pattern = p + 1;      pattern = p;
1468      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1469        return FALSE;        return FALSE;
1470      }      }
# Line 1260  char *patterns[MAX_PATTERN_COUNT]; Line 1493  char *patterns[MAX_PATTERN_COUNT];
1493  const char *locale_from = "--locale";  const char *locale_from = "--locale";
1494  const char *error;  const char *error;
1495    
1496    /* Set the default line ending value from the default in the PCRE library;
1497    "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1498    */
1499    
1500    (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1501    switch(i)
1502      {
1503      default:                 newline = (char *)"lf"; break;
1504      case '\r':               newline = (char *)"cr"; break;
1505      case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1506      case -1:                 newline = (char *)"any"; break;
1507      }
1508    
1509  /* Process the options */  /* Process the options */
1510    
1511  for (i = 1; i < argc; i++)  for (i = 1; i < argc; i++)
# Line 1543  if (colour_option != NULL && strcmp(colo Line 1789  if (colour_option != NULL && strcmp(colo
1789      }      }
1790    }    }
1791    
1792    /* Interpret the newline type; the default settings are Unix-like. */
1793    
1794    if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1795      {
1796      pcre_options |= PCRE_NEWLINE_CR;
1797      endlinetype = EL_CR;
1798      }
1799    else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1800      {
1801      pcre_options |= PCRE_NEWLINE_LF;
1802      endlinetype = EL_LF;
1803      }
1804    else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1805      {
1806      pcre_options |= PCRE_NEWLINE_CRLF;
1807      endlinetype = EL_CRLF;
1808      }
1809    else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1810      {
1811      pcre_options |= PCRE_NEWLINE_ANY;
1812      endlinetype = EL_ANY;
1813      }
1814    else
1815      {
1816      fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1817      return 2;
1818      }
1819    
1820  /* Interpret the text values for -d and -D */  /* Interpret the text values for -d and -D */
1821    
1822  if (dee_option != NULL)  if (dee_option != NULL)

Legend:
Removed from v.89  
changed lines
  Added in v.96

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12