/[pcre]/code/trunk/pcregrep.c
ViewVC logotype

Diff of /code/trunk/pcregrep.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 137 by ph10, Thu Mar 29 13:56:00 2007 UTC revision 243 by ph10, Thu Sep 13 09:28:14 2007 UTC
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  */  */
39    
40  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
41  #  include <config.h>  #include "config.h"
42  #endif  #endif
43    
44  #include <ctype.h>  #include <ctype.h>
# Line 50  POSSIBILITY OF SUCH DAMAGE. Line 50  POSSIBILITY OF SUCH DAMAGE.
50    
51  #include <sys/types.h>  #include <sys/types.h>
52  #include <sys/stat.h>  #include <sys/stat.h>
53    
54  #ifdef HAVE_UNISTD_H  #ifdef HAVE_UNISTD_H
55  #  include <unistd.h>  #include <unistd.h>
56  #endif  #endif
57    
58  #include <pcre.h>  #include "pcre.h"
59    
60  #define FALSE 0  #define FALSE 0
61  #define TRUE 1  #define TRUE 1
# Line 88  enum { DEE_READ, DEE_SKIP }; Line 89  enum { DEE_READ, DEE_SKIP };
89    
90  /* Line ending types */  /* Line ending types */
91    
92  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY };  enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
93    
94    
95    
# Line 196  static option_item optionlist[] = { Line 197  static option_item optionlist[] = {
197    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },    { OP_STRING,    N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
198    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },    { OP_STRING,    N_LOCALE, &locale,           "locale=locale", "use the named locale" },
199    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },    { OP_NODATA,    'M',      NULL,              "multiline",     "run in multiline mode" },
200    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LR, CRLF)" },    { OP_STRING,    'N',      &newline,          "newline=type",  "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },    { OP_NODATA,    'n',      NULL,              "line-number",   "print line number with output lines" },
202    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },    { OP_NODATA,    'o',      NULL,              "only-matching", "show only the part of the line that matched" },
203    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },    { OP_NODATA,    'q',      NULL,              "quiet",         "suppress output, just set return code" },
# Line 226  static const char *prefix[] = { Line 227  static const char *prefix[] = {
227  static const char *suffix[] = {  static const char *suffix[] = {
228    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };    "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
229    
230  /* UTF-8 tables - used only when the newline setting is "all". */  /* UTF-8 tables - used only when the newline setting is "any". */
231    
232  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
233    
# Line 280  for (;;) Line 281  for (;;)
281    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)    if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
282      return dent->d_name;      return dent->d_name;
283    }    }
284  return NULL;   /* Keep compiler happy; never executed */  /* Control never reaches here */
285  }  }
286    
287  static void  static void
# Line 545  switch(endlinetype) Line 546  switch(endlinetype)
546      }      }
547    break;    break;
548    
549      case EL_ANYCRLF:
550      while (p < endptr)
551        {
552        int extra = 0;
553        register int c = *((unsigned char *)p);
554    
555        if (utf8 && c >= 0xc0)
556          {
557          int gcii, gcss;
558          extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
559          gcss = 6*extra;
560          c = (c & utf8_table3[extra]) << gcss;
561          for (gcii = 1; gcii <= extra; gcii++)
562            {
563            gcss -= 6;
564            c |= (p[gcii] & 0x3f) << gcss;
565            }
566          }
567    
568        p += 1 + extra;
569    
570        switch (c)
571          {
572          case 0x0a:    /* LF */
573          *lenptr = 1;
574          return p;
575    
576          case 0x0d:    /* CR */
577          if (p < endptr && *p == 0x0a)
578            {
579            *lenptr = 2;
580            p++;
581            }
582          else *lenptr = 1;
583          return p;
584    
585          default:
586          break;
587          }
588        }   /* End of loop for ANYCRLF case */
589    
590      *lenptr = 0;  /* Must have hit the end */
591      return endptr;
592    
593    case EL_ANY:    case EL_ANY:
594    while (p < endptr)    while (p < endptr)
595      {      {
# Line 643  switch(endlinetype) Line 688  switch(endlinetype)
688    return p;   /* But control should never get here */    return p;   /* But control should never get here */
689    
690    case EL_ANY:    case EL_ANY:
691      case EL_ANYCRLF:
692    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;    if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693    if (utf8) while ((*p & 0xc0) == 0x80) p--;    if (utf8) while ((*p & 0xc0) == 0x80) p--;
694    
# Line 671  switch(endlinetype) Line 717  switch(endlinetype)
717        }        }
718      else c = *((unsigned char *)pp);      else c = *((unsigned char *)pp);
719    
720      switch (c)      if (endlinetype == EL_ANYCRLF) switch (c)
721          {
722          case 0x0a:    /* LF */
723          case 0x0d:    /* CR */
724          return p;
725    
726          default:
727          break;
728          }
729    
730        else switch (c)
731        {        {
732        case 0x0a:    /* LF */        case 0x0a:    /* LF */
733        case 0x0b:    /* VT */        case 0x0b:    /* VT */
# Line 800  while (ptr < endptr) Line 856  while (ptr < endptr)
856    
857    t = end_of_line(t, endptr, &endlinelength);    t = end_of_line(t, endptr, &endlinelength);
858    linelength = t - ptr - endlinelength;    linelength = t - ptr - endlinelength;
859    length = multiline? endptr - ptr : linelength;    length = multiline? (size_t)(endptr - ptr) : linelength;
860    
861    /* Extra processing for Jeffrey Friedl's debugging. */    /* Extra processing for Jeffrey Friedl's debugging. */
862    
# Line 1008  while (ptr < endptr) Line 1064  while (ptr < endptr)
1064    
1065        /* In multiline mode, we want to print to the end of the line in which        /* In multiline mode, we want to print to the end of the line in which
1066        the end of the matched string is found, so we adjust linelength and the        the end of the matched string is found, so we adjust linelength and the
1067        line number appropriately. Because the PCRE_FIRSTLINE option is set, the        line number appropriately, but only when there actually was a match
1068        start of the match will always be before the first newline sequence. */        (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1069          the match will always be before the first newline sequence. */
1070    
1071        if (multiline)        if (multiline)
1072          {          {
1073          int ellength;          int ellength;
1074          char *endmatch = ptr + offsets[1];          char *endmatch = ptr;
1075          t = ptr;          if (!invert)
         while (t < endmatch)  
1076            {            {
1077            t = end_of_line(t, endptr, &ellength);            endmatch += offsets[1];
1078            if (t <= endmatch) linenumber++; else break;            t = ptr;
1079              while (t < endmatch)
1080                {
1081                t = end_of_line(t, endptr, &ellength);
1082                if (t <= endmatch) linenumber++; else break;
1083                }
1084            }            }
1085          endmatch = end_of_line(endmatch, endptr, &ellength);          endmatch = end_of_line(endmatch, endptr, &ellength);
1086          linelength = endmatch - ptr - ellength;          linelength = endmatch - ptr - ellength;
# Line 1052  while (ptr < endptr) Line 1113  while (ptr < endptr)
1113          fprintf(stdout, "%c[%sm", 0x1b, colour_string);          fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1114          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);          fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1115          fprintf(stdout, "%c[00m", 0x1b);          fprintf(stdout, "%c[00m", 0x1b);
1116          fwrite(ptr + offsets[1], 1, linelength - offsets[1], stdout);          fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1117              stdout);
1118          }          }
1119        else fwrite(ptr, 1, linelength + endlinelength, stdout);        else fwrite(ptr, 1, linelength + endlinelength, stdout);
1120        }        }
# Line 1068  while (ptr < endptr) Line 1130  while (ptr < endptr)
1130      lastmatchnumber = linenumber + 1;      lastmatchnumber = linenumber + 1;
1131      }      }
1132    
1133      /* For a match in multiline inverted mode (which of course did not cause
1134      anything to be printed), we have to move on to the end of the match before
1135      proceeding. */
1136    
1137      if (multiline && invert && match)
1138        {
1139        int ellength;
1140        char *endmatch = ptr + offsets[1];
1141        t = ptr;
1142        while (t < endmatch)
1143          {
1144          t = end_of_line(t, endptr, &ellength);
1145          if (t <= endmatch) linenumber++; else break;
1146          }
1147        endmatch = end_of_line(endmatch, endptr, &ellength);
1148        linelength = endmatch - ptr - ellength;
1149        }
1150    
1151    /* Advance to after the newline and increment the line number. */    /* Advance to after the newline and increment the line number. */
1152    
1153    ptr += linelength + endlinelength;    ptr += linelength + endlinelength;
# Line 1408  sprintf(buffer, "%s%.*s%s", prefix[proce Line 1488  sprintf(buffer, "%s%.*s%s", prefix[proce
1488    suffix[process_options]);    suffix[process_options]);
1489  pattern_list[pattern_count] =  pattern_list[pattern_count] =
1490    pcre_compile(buffer, options, &error, &errptr, pcretables);    pcre_compile(buffer, options, &error, &errptr, pcretables);
1491  if (pattern_list[pattern_count++] != NULL) return TRUE;  if (pattern_list[pattern_count] != NULL)
1492      {
1493      pattern_count++;
1494      return TRUE;
1495      }
1496    
1497  /* Handle compile errors */  /* Handle compile errors */
1498    
# Line 1466  if ((process_options & PO_FIXED_STRINGS) Line 1550  if ((process_options & PO_FIXED_STRINGS)
1550      char *p = end_of_line(pattern, eop, &ellength);      char *p = end_of_line(pattern, eop, &ellength);
1551      if (ellength == 0)      if (ellength == 0)
1552        return compile_single_pattern(pattern, options, filename, count);        return compile_single_pattern(pattern, options, filename, count);
1553      sprintf(buffer, "%.*s", p - pattern - ellength, pattern);      sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1554      pattern = p;      pattern = p;
1555      if (!compile_single_pattern(buffer, options, filename, count))      if (!compile_single_pattern(buffer, options, filename, count))
1556        return FALSE;        return FALSE;
# Line 1490  int i, j; Line 1574  int i, j;
1574  int rc = 1;  int rc = 1;
1575  int pcre_options = 0;  int pcre_options = 0;
1576  int cmd_pattern_count = 0;  int cmd_pattern_count = 0;
1577    int hint_count = 0;
1578  int errptr;  int errptr;
1579  BOOL only_one_at_top;  BOOL only_one_at_top;
1580  char *patterns[MAX_PATTERN_COUNT];  char *patterns[MAX_PATTERN_COUNT];
# Line 1507  switch(i) Line 1592  switch(i)
1592    case '\r':               newline = (char *)"cr"; break;    case '\r':               newline = (char *)"cr"; break;
1593    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;    case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1594    case -1:                 newline = (char *)"any"; break;    case -1:                 newline = (char *)"any"; break;
1595      case -2:                 newline = (char *)"anycrlf"; break;
1596    }    }
1597    
1598  /* Process the options */  /* Process the options */
# Line 1564  for (i = 1; i < argc; i++) Line 1650  for (i = 1; i < argc; i++)
1650          else                 /* Special case xxx=data */          else                 /* Special case xxx=data */
1651            {            {
1652            int oplen = equals - op->long_name;            int oplen = equals - op->long_name;
1653            int arglen = (argequals == NULL)? strlen(arg) : argequals - arg;            int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1654            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)            if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1655              {              {
1656              option_data = arg + arglen;              option_data = arg + arglen;
# Line 1583  for (i = 1; i < argc; i++) Line 1669  for (i = 1; i < argc; i++)
1669          char buff2[24];          char buff2[24];
1670          int baselen = opbra - op->long_name;          int baselen = opbra - op->long_name;
1671          sprintf(buff1, "%.*s", baselen, op->long_name);          sprintf(buff1, "%.*s", baselen, op->long_name);
1672          sprintf(buff2, "%s%.*s", buff1, strlen(op->long_name) - baselen - 2,          sprintf(buff2, "%s%.*s", buff1,
1673            opbra + 1);            (int)strlen(op->long_name) - baselen - 2, opbra + 1);
1674          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)          if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1675            break;            break;
1676          }          }
# Line 1814  else if (strcmp(newline, "any") == 0 || Line 1900  else if (strcmp(newline, "any") == 0 ||
1900    pcre_options |= PCRE_NEWLINE_ANY;    pcre_options |= PCRE_NEWLINE_ANY;
1901    endlinetype = EL_ANY;    endlinetype = EL_ANY;
1902    }    }
1903    else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1904      {
1905      pcre_options |= PCRE_NEWLINE_ANYCRLF;
1906      endlinetype = EL_ANYCRLF;
1907      }
1908  else  else
1909    {    {
1910    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);    fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
# Line 1942  for (j = 0; j < pattern_count; j++) Line 2033  for (j = 0; j < pattern_count; j++)
2033      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);      fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2034      goto EXIT2;      goto EXIT2;
2035      }      }
2036      hint_count++;
2037    }    }
2038    
2039  /* If there are include or exclude patterns, compile them. */  /* If there are include or exclude patterns, compile them. */
# Line 2001  if (pattern_list != NULL) Line 2093  if (pattern_list != NULL)
2093    }    }
2094  if (hints_list != NULL)  if (hints_list != NULL)
2095    {    {
2096    for (i = 0; i < pattern_count; i++) free(hints_list[i]);    for (i = 0; i < hint_count; i++) free(hints_list[i]);
2097    free(hints_list);    free(hints_list);
2098    }    }
2099  return rc;  return rc;

Legend:
Removed from v.137  
changed lines
  Added in v.243

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12