/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2006 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 116  static const short int escapes[] = { Line 116  static const short int escapes[] = {
116    
117    
118  /* Tables of names of POSIX character classes and their lengths. The list is  /* Tables of names of POSIX character classes and their lengths. The list is
119  terminated by a zero length entry. The first three must be alpha, upper, lower,  terminated by a zero length entry. The first three must be alpha, lower, upper,
120  as this is assumed for handling case independence. */  as this is assumed for handling case independence. */
121    
122  static const char *const posix_names[] = {  static const char *const posix_names[] = {
# Line 127  static const char *const posix_names[] = Line 127  static const char *const posix_names[] =
127  static const uschar posix_name_lengths[] = {  static const uschar posix_name_lengths[] = {
128    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
129    
130  /* Table of class bit maps for each POSIX class; up to three may be combined  /* Table of class bit maps for each POSIX class. Each class is formed from a
131  to form the class. The table for [:blank:] is dynamically modified to remove  base map, with an optional addition or removal of another map. Then, for some
132  the vertical space characters. */  classes, there is some additional tweaking: for [:blank:] the vertical space
133    characters are removed, and for [:alpha:] and [:alnum:] the underscore
134    character is removed. The triples in the table consist of the base map offset,
135    second map offset or -1 if no second map, and a non-negative value for map
136    addition or a negative value for map subtraction (if there are two maps). The
137    absolute value of the third field has these meanings: 0 => no tweaking, 1 =>
138    remove vertical space characters, 2 => remove underscore. */
139    
140  static const int posix_class_maps[] = {  static const int posix_class_maps[] = {
141    cbit_lower, cbit_upper, -1,             /* alpha */    cbit_word,  cbit_digit, -2,             /* alpha */
142    cbit_lower, -1,         -1,             /* lower */    cbit_lower, -1,          0,             /* lower */
143    cbit_upper, -1,         -1,             /* upper */    cbit_upper, -1,          0,             /* upper */
144    cbit_digit, cbit_lower, cbit_upper,     /* alnum */    cbit_word,  -1,          2,             /* alnum - word without underscore */
145    cbit_print, cbit_cntrl, -1,             /* ascii */    cbit_print, cbit_cntrl,  0,             /* ascii */
146    cbit_space, -1,         -1,             /* blank - a GNU extension */    cbit_space, -1,          1,             /* blank - a GNU extension */
147    cbit_cntrl, -1,         -1,             /* cntrl */    cbit_cntrl, -1,          0,             /* cntrl */
148    cbit_digit, -1,         -1,             /* digit */    cbit_digit, -1,          0,             /* digit */
149    cbit_graph, -1,         -1,             /* graph */    cbit_graph, -1,          0,             /* graph */
150    cbit_print, -1,         -1,             /* print */    cbit_print, -1,          0,             /* print */
151    cbit_punct, -1,         -1,             /* punct */    cbit_punct, -1,          0,             /* punct */
152    cbit_space, -1,         -1,             /* space */    cbit_space, -1,          0,             /* space */
153    cbit_word,  -1,         -1,             /* word - a Perl extension */    cbit_word,  -1,          0,             /* word - a Perl extension */
154    cbit_xdigit,-1,         -1              /* xdigit */    cbit_xdigit,-1,          0              /* xdigit */
155  };  };
156    
157    
# Line 371  static int Line 377  static int
377  check_escape(const uschar **ptrptr, int *errorcodeptr, int bracount,  check_escape(const uschar **ptrptr, int *errorcodeptr, int bracount,
378    int options, BOOL isclass)    int options, BOOL isclass)
379  {  {
380  const uschar *ptr = *ptrptr;  BOOL utf8 = (options & PCRE_UTF8) != 0;
381    const uschar *ptr = *ptrptr + 1;
382  int c, i;  int c, i;
383    
384    GETCHARINCTEST(c, ptr);           /* Get character value, increment pointer */
385    ptr--;                            /* Set pointer back to the last byte */
386    
387  /* If backslash is at the end of the pattern, it's an error. */  /* If backslash is at the end of the pattern, it's an error. */
388    
 c = *(++ptr);  
389  if (c == 0) *errorcodeptr = ERR1;  if (c == 0) *errorcodeptr = ERR1;
390    
391  /* Non-alphamerics are literals. For digits or letters, do an initial lookup in  /* Non-alphamerics are literals. For digits or letters, do an initial lookup in
# Line 460  else Line 469  else
469      c &= 255;     /* Take least significant 8 bits */      c &= 255;     /* Take least significant 8 bits */
470      break;      break;
471    
472      /* \x is complicated when UTF-8 is enabled. \x{ddd} is a character number      /* \x is complicated. \x{ddd} is a character number which can be greater
473      which can be greater than 0xff, but only if the ddd are hex digits. */      than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is
474        treated as a data character. */
475    
476      case 'x':      case 'x':
477  #ifdef SUPPORT_UTF8      if (ptr[1] == '{')
     if (ptr[1] == '{' && (options & PCRE_UTF8) != 0)  
478        {        {
479        const uschar *pt = ptr + 2;        const uschar *pt = ptr + 2;
480        register int count = 0;        int count = 0;
481    
482        c = 0;        c = 0;
483        while ((digitab[*pt] & ctype_xdigit) != 0)        while ((digitab[*pt] & ctype_xdigit) != 0)
484          {          {
485          int cc = *pt++;          register int cc = *pt++;
486            if (c == 0 && cc == '0') continue;     /* Leading zeroes */
487          count++;          count++;
488    
489  #if !EBCDIC    /* ASCII coding */  #if !EBCDIC    /* ASCII coding */
490          if (cc >= 'a') cc -= 32;               /* Convert to upper case */          if (cc >= 'a') cc -= 32;               /* Convert to upper case */
491          c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
492  #else          /* EBCDIC coding */  #else          /* EBCDIC coding */
493          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */
494          c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));
495  #endif  #endif
496          }          }
497    
498        if (*pt == '}')        if (*pt == '}')
499          {          {
500          if (c < 0 || count > 8) *errorcodeptr = ERR34;          if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
501          ptr = pt;          ptr = pt;
502          break;          break;
503          }          }
504    
505        /* If the sequence of hex digits does not end with '}', then we don't        /* If the sequence of hex digits does not end with '}', then we don't
506        recognize this construct; fall through to the normal \x handling. */        recognize this construct; fall through to the normal \x handling. */
507        }        }
 #endif  
508    
509      /* Read just a single hex char */      /* Read just a single-byte hex-defined char */
510    
511      c = 0;      c = 0;
512      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)
# Line 569  escape sequence. Line 582  escape sequence.
582  Argument:  Argument:
583    ptrptr         points to the pattern position pointer    ptrptr         points to the pattern position pointer
584    negptr         points to a boolean that is set TRUE for negation else FALSE    negptr         points to a boolean that is set TRUE for negation else FALSE
585      dptr           points to an int that is set to the detailed property value
586    errorcodeptr   points to the error code variable    errorcodeptr   points to the error code variable
587    
588  Returns:     value from ucp_type_table, or -1 for an invalid type  Returns:         type value from ucp_type_table, or -1 for an invalid type
589  */  */
590    
591  static int  static int
592  get_ucp(const uschar **ptrptr, BOOL *negptr, int *errorcodeptr)  get_ucp(const uschar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
593  {  {
594  int c, i, bot, top;  int c, i, bot, top;
595  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
596  char name[4];  char name[32];
597    
598  c = *(++ptr);  c = *(++ptr);
599  if (c == 0) goto ERROR_RETURN;  if (c == 0) goto ERROR_RETURN;
600    
601  *negptr = FALSE;  *negptr = FALSE;
602    
603  /* \P or \p can be followed by a one- or two-character name in {}, optionally  /* \P or \p can be followed by a name in {}, optionally preceded by ^ for
604  preceded by ^ for negation. */  negation. */
605    
606  if (c == '{')  if (c == '{')
607    {    {
# Line 596  if (c == '{') Line 610  if (c == '{')
610      *negptr = TRUE;      *negptr = TRUE;
611      ptr++;      ptr++;
612      }      }
613    for (i = 0; i <= 2; i++)    for (i = 0; i < sizeof(name) - 1; i++)
614      {      {
615      c = *(++ptr);      c = *(++ptr);
616      if (c == 0) goto ERROR_RETURN;      if (c == 0) goto ERROR_RETURN;
617      if (c == '}') break;      if (c == '}') break;
618      name[i] = c;      name[i] = c;
619      }      }
620    if (c !='}')   /* Try to distinguish error cases */    if (c !='}') goto ERROR_RETURN;
     {  
     while (*(++ptr) != 0 && *ptr != '}');  
     if (*ptr == '}') goto UNKNOWN_RETURN; else goto ERROR_RETURN;  
     }  
621    name[i] = 0;    name[i] = 0;
622    }    }
623    
# Line 628  top = _pcre_utt_size; Line 638  top = _pcre_utt_size;
638    
639  while (bot < top)  while (bot < top)
640    {    {
641    i = (bot + top)/2;    i = (bot + top) >> 1;
642    c = strcmp(name, _pcre_utt[i].name);    c = strcmp(name, _pcre_utt[i].name);
643    if (c == 0) return _pcre_utt[i].value;    if (c == 0)
644        {
645        *dptr = _pcre_utt[i].value;
646        return _pcre_utt[i].type;
647        }
648    if (c > 0) bot = i + 1; else top = i;    if (c > 0) bot = i + 1; else top = i;
649    }    }
650    
 UNKNOWN_RETURN:  
651  *errorcodeptr = ERR47;  *errorcodeptr = ERR47;
652  *ptrptr = ptr;  *ptrptr = ptr;
653  return -1;  return -1;
# Line 937  for (;;) Line 950  for (;;)
950    
951      case OP_PROP:      case OP_PROP:
952      case OP_NOTPROP:      case OP_NOTPROP:
953      cc++;      cc += 2;
954      /* Fall through */      /* Fall through */
955    
956      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
# Line 1497  Yield: TRUE when range returned; Line 1510  Yield: TRUE when range returned;
1510  static BOOL  static BOOL
1511  get_othercase_range(int *cptr, int d, int *ocptr, int *odptr)  get_othercase_range(int *cptr, int d, int *ocptr, int *odptr)
1512  {  {
1513  int c, chartype, othercase, next;  int c, othercase, next;
1514    
1515  for (c = *cptr; c <= d; c++)  for (c = *cptr; c <= d; c++)
1516    {    { if ((othercase = _pcre_ucp_othercase(c)) >= 0) break; }
   if (_pcre_ucp_findchar(c, &chartype, &othercase) == ucp_L && othercase != 0)  
     break;  
   }  
1517    
1518  if (c > d) return FALSE;  if (c > d) return FALSE;
1519    
# Line 1512  next = othercase + 1; Line 1522  next = othercase + 1;
1522    
1523  for (++c; c <= d; c++)  for (++c; c <= d; c++)
1524    {    {
1525    if (_pcre_ucp_findchar(c, &chartype, &othercase) != ucp_L ||    if (_pcre_ucp_othercase(c) != next) break;
         othercase != next)  
     break;  
1526    next++;    next++;
1527    }    }
1528    
# Line 1731  for (;; ptr++) Line 1739  for (;; ptr++)
1739      *code++ = OP_ANY;      *code++ = OP_ANY;
1740      break;      break;
1741    
1742      /* Character classes. If the included characters are all < 255 in value, we      /* Character classes. If the included characters are all < 256, we build a
1743      build a 32-byte bitmap of the permitted characters, except in the special      32-byte bitmap of the permitted characters, except in the special case
1744      case where there is only one such character. For negated classes, we build      where there is only one such character. For negated classes, we build the
1745      the map as usual, then invert it at the end. However, we use a different      map as usual, then invert it at the end. However, we use a different opcode
1746      opcode so that data characters > 255 can be handled correctly.      so that data characters > 255 can be handled correctly.
1747    
1748      If the class contains characters outside the 0-255 range, a different      If the class contains characters outside the 0-255 range, a different
1749      opcode is compiled. It may optionally have a bit map for characters < 256,      opcode is compiled. It may optionally have a bit map for characters < 256,
# Line 1826  for (;; ptr++) Line 1834  for (;; ptr++)
1834            check_posix_syntax(ptr, &tempptr, cd))            check_posix_syntax(ptr, &tempptr, cd))
1835          {          {
1836          BOOL local_negate = FALSE;          BOOL local_negate = FALSE;
1837          int posix_class, i;          int posix_class, taboffset, tabopt;
1838          register const uschar *cbits = cd->cbits;          register const uschar *cbits = cd->cbits;
1839            uschar pbits[32];
1840    
1841          if (ptr[1] != ':')          if (ptr[1] != ':')
1842            {            {
# Line 1856  for (;; ptr++) Line 1865  for (;; ptr++)
1865          if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)          if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
1866            posix_class = 0;            posix_class = 0;
1867    
1868          /* Or into the map we are building up to 3 of the static class          /* We build the bit map for the POSIX class in a chunk of local store
1869          tables, or their negations. The [:blank:] class sets up the same          because we may be adding and subtracting from it, and we don't want to
1870          chars as the [:space:] class (all white space). We remove the vertical          subtract bits that may be in the main map already. At the end we or the
1871          white space chars afterwards. */          result into the bit map that is being built. */
1872    
1873          posix_class *= 3;          posix_class *= 3;
1874          for (i = 0; i < 3; i++)  
1875            /* Copy in the first table (always present) */
1876    
1877            memcpy(pbits, cbits + posix_class_maps[posix_class],
1878              32 * sizeof(uschar));
1879    
1880            /* If there is a second table, add or remove it as required. */
1881    
1882            taboffset = posix_class_maps[posix_class + 1];
1883            tabopt = posix_class_maps[posix_class + 2];
1884    
1885            if (taboffset >= 0)
1886            {            {
1887            BOOL blankclass = strncmp((char *)ptr, "blank", 5) == 0;            if (tabopt >= 0)
1888            int taboffset = posix_class_maps[posix_class + i];              for (c = 0; c < 32; c++) pbits[c] |= cbits[c + taboffset];
           if (taboffset < 0) break;  
           if (local_negate)  
             {  
             if (i == 0)  
               for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+taboffset];  
             else  
               for (c = 0; c < 32; c++) classbits[c] &= ~cbits[c+taboffset];  
             if (blankclass) classbits[1] |= 0x3c;  
             }  
1889            else            else
1890              {              for (c = 0; c < 32; c++) pbits[c] &= ~cbits[c + taboffset];
             for (c = 0; c < 32; c++) classbits[c] |= cbits[c+taboffset];  
             if (blankclass) classbits[1] &= ~0x3c;  
             }  
1891            }            }
1892    
1893            /* Not see if we need to remove any special characters. An option
1894            value of 1 removes vertical space and 2 removes underscore. */
1895    
1896            if (tabopt < 0) tabopt = -tabopt;
1897            if (tabopt == 1) pbits[1] &= ~0x3c;
1898              else if (tabopt == 2) pbits[11] &= 0x7f;
1899    
1900            /* Add the POSIX table or its complement into the main table that is
1901            being built and we are done. */
1902    
1903            if (local_negate)
1904              for (c = 0; c < 32; c++) classbits[c] |= ~pbits[c];
1905            else
1906              for (c = 0; c < 32; c++) classbits[c] |= pbits[c];
1907    
1908          ptr = tempptr + 1;          ptr = tempptr + 1;
1909          class_charcount = 10;  /* Set > 1; assumes more than 1 per class */          class_charcount = 10;  /* Set > 1; assumes more than 1 per class */
1910          continue;    /* End of POSIX syntax handling */          continue;    /* End of POSIX syntax handling */
# Line 1948  for (;; ptr++) Line 1971  for (;; ptr++)
1971              case ESC_P:              case ESC_P:
1972                {                {
1973                BOOL negated;                BOOL negated;
1974                int property = get_ucp(&ptr, &negated, errorcodeptr);                int pdata;
1975                if (property < 0) goto FAILED;                int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
1976                  if (ptype < 0) goto FAILED;
1977                class_utf8 = TRUE;                class_utf8 = TRUE;
1978                *class_utf8data++ = ((-c == ESC_p) != negated)?                *class_utf8data++ = ((-c == ESC_p) != negated)?
1979                  XCL_PROP : XCL_NOTPROP;                  XCL_PROP : XCL_NOTPROP;
1980                *class_utf8data++ = property;                *class_utf8data++ = ptype;
1981                  *class_utf8data++ = pdata;
1982                class_charcount -= 2;   /* Not a < 256 character */                class_charcount -= 2;   /* Not a < 256 character */
1983                }                }
1984              continue;              continue;
# Line 2135  for (;; ptr++) Line 2160  for (;; ptr++)
2160  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2161          if ((options & PCRE_CASELESS) != 0)          if ((options & PCRE_CASELESS) != 0)
2162            {            {
           int chartype;  
2163            int othercase;            int othercase;
2164            if (_pcre_ucp_findchar(c, &chartype, &othercase) >= 0 &&            if ((othercase = _pcre_ucp_othercase(c)) >= 0)
                othercase > 0)  
2165              {              {
2166              *class_utf8data++ = XCL_SINGLE;              *class_utf8data++ = XCL_SINGLE;
2167              class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);              class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
# Line 2423  for (;; ptr++) Line 2446  for (;; ptr++)
2446      else if (*previous < OP_EODN)      else if (*previous < OP_EODN)
2447        {        {
2448        uschar *oldcode;        uschar *oldcode;
2449        int prop_type;        int prop_type, prop_value;
2450        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
2451        c = *previous;        c = *previous;
2452    
2453        OUTPUT_SINGLE_REPEAT:        OUTPUT_SINGLE_REPEAT:
2454        prop_type = (*previous == OP_PROP || *previous == OP_NOTPROP)?        if (*previous == OP_PROP || *previous == OP_NOTPROP)
2455          previous[1] : -1;          {
2456            prop_type = previous[1];
2457            prop_value = previous[2];
2458            }
2459          else prop_type = prop_value = -1;
2460    
2461        oldcode = code;        oldcode = code;
2462        code = previous;                  /* Usually overwrite previous item */        code = previous;                  /* Usually overwrite previous item */
# Line 2490  for (;; ptr++) Line 2517  for (;; ptr++)
2517    
2518          /* If the maximum is unlimited, insert an OP_STAR. Before doing so,          /* If the maximum is unlimited, insert an OP_STAR. Before doing so,
2519          we have to insert the character for the previous code. For a repeated          we have to insert the character for the previous code. For a repeated
2520          Unicode property match, there is an extra byte that defines the          Unicode property match, there are two extra bytes that define the
2521          required property. In UTF-8 mode, long characters have their length in          required property. In UTF-8 mode, long characters have their length in
2522          c, with the 0x80 bit as a flag. */          c, with the 0x80 bit as a flag. */
2523    
# Line 2506  for (;; ptr++) Line 2533  for (;; ptr++)
2533  #endif  #endif
2534              {              {
2535              *code++ = c;              *code++ = c;
2536              if (prop_type >= 0) *code++ = prop_type;              if (prop_type >= 0)
2537                  {
2538                  *code++ = prop_type;
2539                  *code++ = prop_value;
2540                  }
2541              }              }
2542            *code++ = OP_STAR + repeat_type;            *code++ = OP_STAR + repeat_type;
2543            }            }
# Line 2525  for (;; ptr++) Line 2556  for (;; ptr++)
2556            else            else
2557  #endif  #endif
2558            *code++ = c;            *code++ = c;
2559            if (prop_type >= 0) *code++ = prop_type;            if (prop_type >= 0)
2560                {
2561                *code++ = prop_type;
2562                *code++ = prop_value;
2563                }
2564            repeat_max -= repeat_min;            repeat_max -= repeat_min;
2565            *code++ = OP_UPTO + repeat_type;            *code++ = OP_UPTO + repeat_type;
2566            PUT2INC(code, 0, repeat_max);            PUT2INC(code, 0, repeat_max);
# Line 2544  for (;; ptr++) Line 2579  for (;; ptr++)
2579  #endif  #endif
2580        *code++ = c;        *code++ = c;
2581    
2582        /* For a repeated Unicode property match, there is an extra byte that        /* For a repeated Unicode property match, there are two extra bytes that
2583        defines the required property. */        define the required property. */
2584    
2585  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2586        if (prop_type >= 0) *code++ = prop_type;        if (prop_type >= 0)
2587            {
2588            *code++ = prop_type;
2589            *code++ = prop_value;
2590            }
2591  #endif  #endif
2592        }        }
2593    
# Line 3016  for (;; ptr++) Line 3055  for (;; ptr++)
3055              goto FAILED;              goto FAILED;
3056              }              }
3057    
3058            /* Insert the recursion/subroutine item */            /* Insert the recursion/subroutine item, automatically wrapped inside
3059              "once" brackets. */
3060    
3061              *code = OP_ONCE;
3062              PUT(code, 1, 2 + 2*LINK_SIZE);
3063              code += 1 + LINK_SIZE;
3064    
3065            *code = OP_RECURSE;            *code = OP_RECURSE;
3066            PUT(code, 1, called - cd->start_code);            PUT(code, 1, called - cd->start_code);
3067            code += 1 + LINK_SIZE;            code += 1 + LINK_SIZE;
3068    
3069              *code = OP_KET;
3070              PUT(code, 1, 2 + 2*LINK_SIZE);
3071              code += 1 + LINK_SIZE;
3072            }            }
3073          continue;          continue;
3074    
# Line 3290  for (;; ptr++) Line 3338  for (;; ptr++)
3338        else if (-c == ESC_P || -c == ESC_p)        else if (-c == ESC_P || -c == ESC_p)
3339          {          {
3340          BOOL negated;          BOOL negated;
3341          int value = get_ucp(&ptr, &negated, errorcodeptr);          int pdata;
3342            int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
3343          previous = code;          previous = code;
3344          *code++ = ((-c == ESC_p) != negated)? OP_PROP : OP_NOTPROP;          *code++ = ((-c == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
3345          *code++ = value;          *code++ = ptype;
3346            *code++ = pdata;
3347          }          }
3348  #endif  #endif
3349    
# Line 3848  Returns: pointer to compiled data Line 3898  Returns: pointer to compiled data
3898                  with errorptr and erroroffset set                  with errorptr and erroroffset set
3899  */  */
3900    
3901  PCRE_EXPORT pcre *  PCRE_DATA_SCOPE pcre *
3902  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
3903    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
3904  {  {
# Line 3856  return pcre_compile2(pattern, options, N Line 3906  return pcre_compile2(pattern, options, N
3906  }  }
3907    
3908    
3909  PCRE_EXPORT pcre *  PCRE_DATA_SCOPE pcre *
3910  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
3911    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
3912  {  {
# Line 4049  while ((c = *(++ptr)) != 0) Line 4099  while ((c = *(++ptr)) != 0)
4099  #endif  #endif
4100    
4101      /* \P and \p are for Unicode properties, but only when the support has      /* \P and \p are for Unicode properties, but only when the support has
4102      been compiled. Each item needs 2 bytes. */      been compiled. Each item needs 3 bytes. */
4103    
4104      else if (-c == ESC_P || -c == ESC_p)      else if (-c == ESC_P || -c == ESC_p)
4105        {        {
4106  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4107        BOOL negated;        BOOL negated;
4108        length += 2;        BOOL pdata;
4109        lastitemlength = 2;        length += 3;
4110        if (get_ucp(&ptr, &negated, &errorcode) < 0) goto PCRE_ERROR_RETURN;        lastitemlength = 3;
4111          if (get_ucp(&ptr, &negated, &pdata, &errorcode) < 0)
4112            goto PCRE_ERROR_RETURN;
4113        continue;        continue;
4114  #else  #else
4115        errorcode = ERR45;        errorcode = ERR45;
# Line 4223  while ((c = *(++ptr)) != 0) Line 4275  while ((c = *(++ptr)) != 0)
4275                class_utf8 = TRUE;                class_utf8 = TRUE;
4276                length += LINK_SIZE + 2;                length += LINK_SIZE + 2;
4277                }                }
4278              length += 2;              length += 3;
4279              }              }
4280  #endif  #endif
4281            }            }
# Line 4486  while ((c = *(++ptr)) != 0) Line 4538  while ((c = *(++ptr)) != 0)
4538            errorcode = ERR29;            errorcode = ERR29;
4539            goto PCRE_ERROR_RETURN;            goto PCRE_ERROR_RETURN;
4540            }            }
4541          length += 1 + LINK_SIZE;          length += 3 + 3*LINK_SIZE;  /* Allows for the automatic "once" */
4542    
4543          /* If this item is quantified, it will get wrapped inside brackets so          /* If this item is quantified, it will get wrapped inside brackets so
4544          as to use the code for quantified brackets. We jump down and use the          as to use the code for quantified brackets. We jump down and use the
# Line 4542  while ((c = *(++ptr)) != 0) Line 4594  while ((c = *(++ptr)) != 0)
4594    
4595          if (*ptr == '=' || *ptr == '>')          if (*ptr == '=' || *ptr == '>')
4596            {            {
4597              length += 2 + 2*LINK_SIZE;  /* Allow for the automatic "once" */
4598            while ((compile_block.ctypes[*(++ptr)] & ctype_word) != 0);            while ((compile_block.ctypes[*(++ptr)] & ctype_word) != 0);
4599            if (*ptr != ')')            if (*ptr != ')')
4600              {              {
# Line 5035  if (re->options != 0) Line 5088  if (re->options != 0)
5088  if ((re->options & PCRE_FIRSTSET) != 0)  if ((re->options & PCRE_FIRSTSET) != 0)
5089    {    {
5090    int ch = re->first_byte & 255;    int ch = re->first_byte & 255;
5091    const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)? "" : " (caseless)";    const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)?
5092        "" : " (caseless)";
5093    if (isprint(ch)) printf("First char = %c%s\n", ch, caseless);    if (isprint(ch)) printf("First char = %c%s\n", ch, caseless);
5094      else printf("First char = \\x%02x%s\n", ch, caseless);      else printf("First char = \\x%02x%s\n", ch, caseless);
5095    }    }
# Line 5043  if ((re->options & PCRE_FIRSTSET) != 0) Line 5097  if ((re->options & PCRE_FIRSTSET) != 0)
5097  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->options & PCRE_REQCHSET) != 0)
5098    {    {
5099    int ch = re->req_byte & 255;    int ch = re->req_byte & 255;
5100    const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)? "" : " (caseless)";    const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)?
5101        "" : " (caseless)";
5102    if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless);    if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless);
5103      else printf("Req char = \\x%02x%s\n", ch, caseless);      else printf("Req char = \\x%02x%s\n", ch, caseless);
5104    }    }

Legend:
Removed from v.85  
changed lines
  Added in v.87

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12