/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 243 by ph10, Thu Sep 13 09:28:14 2007 UTC revision 268 by ph10, Thu Nov 15 10:28:09 2007 UTC
# Line 300  static const char error_texts[] = Line 300  static const char error_texts[] =
300    "(*VERB) with an argument is not supported\0"    "(*VERB) with an argument is not supported\0"
301    /* 60 */    /* 60 */
302    "(*VERB) not recognized\0"    "(*VERB) not recognized\0"
303    "number is too big";    "number is too big\0"
304      "subpattern name expected after (?&";
305    
306    
307  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 2383  req_caseopt = ((options & PCRE_CASELESS) Line 2384  req_caseopt = ((options & PCRE_CASELESS)
2384  for (;; ptr++)  for (;; ptr++)
2385    {    {
2386    BOOL negate_class;    BOOL negate_class;
2387      BOOL should_flip_negation;
2388    BOOL possessive_quantifier;    BOOL possessive_quantifier;
2389    BOOL is_quantifier;    BOOL is_quantifier;
2390    BOOL is_recurse;    BOOL is_recurse;
# Line 2631  for (;; ptr++) Line 2633  for (;; ptr++)
2633        else break;        else break;
2634        }        }
2635    
2636        /* If a class contains a negative special such as \S, we need to flip the
2637        negation flag at the end, so that support for characters > 255 works
2638        correctly (they are all included in the class). */
2639    
2640        should_flip_negation = FALSE;
2641    
2642      /* Keep a count of chars with values < 256 so that we can optimize the case      /* Keep a count of chars with values < 256 so that we can optimize the case
2643      of just a single character (as long as it's < 256). However, For higher      of just a single character (as long as it's < 256). However, For higher
2644      valued UTF-8 characters, we don't yet do any optimization. */      valued UTF-8 characters, we don't yet do any optimization. */
# Line 2703  for (;; ptr++) Line 2711  for (;; ptr++)
2711          if (*ptr == '^')          if (*ptr == '^')
2712            {            {
2713            local_negate = TRUE;            local_negate = TRUE;
2714              should_flip_negation = TRUE;  /* Note negative special */
2715            ptr++;            ptr++;
2716            }            }
2717    
# Line 2805  for (;; ptr++) Line 2814  for (;; ptr++)
2814              continue;              continue;
2815    
2816              case ESC_D:              case ESC_D:
2817                should_flip_negation = TRUE;
2818              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];
2819              continue;              continue;
2820    
# Line 2813  for (;; ptr++) Line 2823  for (;; ptr++)
2823              continue;              continue;
2824    
2825              case ESC_W:              case ESC_W:
2826                should_flip_negation = TRUE;
2827              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
2828              continue;              continue;
2829    
# Line 2822  for (;; ptr++) Line 2833  for (;; ptr++)
2833              continue;              continue;
2834    
2835              case ESC_S:              case ESC_S:
2836                should_flip_negation = TRUE;
2837              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];
2838              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
2839              continue;              continue;
# Line 3327  we set the flag only if there is a liter Line 3339  we set the flag only if there is a liter
3339      zeroreqbyte = reqbyte;      zeroreqbyte = reqbyte;
3340    
3341      /* If there are characters with values > 255, we have to compile an      /* If there are characters with values > 255, we have to compile an
3342      extended class, with its own opcode. If there are no characters < 256,      extended class, with its own opcode, unless there was a negated special
3343      we can omit the bitmap in the actual compiled code. */      such as \S in the class, because in that case all characters > 255 are in
3344        the class, so any that were explicitly given as well can be ignored. If
3345        (when there are explicit characters > 255 that must be listed) there are no
3346        characters < 256, we can omit the bitmap in the actual compiled code. */
3347    
3348  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3349      if (class_utf8)      if (class_utf8 && !should_flip_negation)
3350        {        {
3351        *class_utf8data++ = XCL_END;    /* Marks the end of extra data */        *class_utf8data++ = XCL_END;    /* Marks the end of extra data */
3352        *code++ = OP_XCLASS;        *code++ = OP_XCLASS;
# Line 3357  we set the flag only if there is a liter Line 3372  we set the flag only if there is a liter
3372        }        }
3373  #endif  #endif
3374    
3375      /* If there are no characters > 255, negate the 32-byte map if necessary,      /* If there are no characters > 255, set the opcode to OP_CLASS or
3376      and copy it into the code vector. If this is the first thing in the branch,      OP_NCLASS, depending on whether the whole class was negated and whether
3377      there can be no first char setting, whatever the repeat count. Any reqbyte      there were negative specials such as \S in the class. Then copy the 32-byte
3378      setting must remain unchanged after any kind of repeat. */      map into the code vector, negating it if necessary. */
3379    
3380        *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
3381      if (negate_class)      if (negate_class)
3382        {        {
       *code++ = OP_NCLASS;  
3383        if (lengthptr == NULL)    /* Save time in the pre-compile phase */        if (lengthptr == NULL)    /* Save time in the pre-compile phase */
3384          for (c = 0; c < 32; c++) code[c] = ~classbits[c];          for (c = 0; c < 32; c++) code[c] = ~classbits[c];
3385        }        }
3386      else      else
3387        {        {
       *code++ = OP_CLASS;  
3388        memcpy(code, classbits, 32);        memcpy(code, classbits, 32);
3389        }        }
3390      code += 32;      code += 32;
# Line 4522  we set the flag only if there is a liter Line 4536  we set the flag only if there is a liter
4536    
4537          if (lengthptr != NULL)          if (lengthptr != NULL)
4538            {            {
4539              if (namelen == 0)
4540                {
4541                *errorcodeptr = ERR62;
4542                goto FAILED;
4543                }
4544            if (*ptr != terminator)            if (*ptr != terminator)
4545              {              {
4546              *errorcodeptr = ERR42;              *errorcodeptr = ERR42;
# Line 4535  we set the flag only if there is a liter Line 4554  we set the flag only if there is a liter
4554            recno = 0;            recno = 0;
4555            }            }
4556    
4557          /* In the real compile, seek the name in the table */          /* In the real compile, seek the name in the table. We check the name
4558            first, and then check that we have reached the end of the name in the
4559            table. That way, if the name that is longer than any in the table,
4560            the comparison will fail without reading beyond the table entry. */
4561    
4562          else          else
4563            {            {
4564            slot = cd->name_table;            slot = cd->name_table;
4565            for (i = 0; i < cd->names_found; i++)            for (i = 0; i < cd->names_found; i++)
4566              {              {
4567              if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;              if (strncmp((char *)name, (char *)slot+2, namelen) == 0 &&
4568                    slot[2+namelen] == 0)
4569                  break;
4570              slot += cd->name_entry_size;              slot += cd->name_entry_size;
4571              }              }
4572    

Legend:
Removed from v.243  
changed lines
  Added in v.268

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12