/[pcre]/code/trunk/pcre_study.c
ViewVC logotype

Diff of /code/trunk/pcre_study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 519 by ph10, Tue Mar 30 11:11:52 2010 UTC revision 520 by ph10, Sat May 22 18:54:05 2010 UTC
# Line 440  for (;;) Line 440  for (;;)
440  *      Set a bit and maybe its alternate case    *  *      Set a bit and maybe its alternate case    *
441  *************************************************/  *************************************************/
442    
443  /* Given a character, set its bit in the table, and also the bit for the other  /* Given a character, set its first byte's bit in the table, and also the
444  version of a letter if we are caseless.  corresponding bit for the other version of a letter if we are caseless. In
445    UTF-8 mode, for characters greater than 127, we can only do the caseless thing
446    when Unicode property support is available.
447    
448  Arguments:  Arguments:
449    start_bits    points to the bit map    start_bits    points to the bit map
450    c             is the character    p             points to the character
451    caseless      the caseless flag    caseless      the caseless flag
452    cd            the block with char table pointers    cd            the block with char table pointers
453      utf8          TRUE for UTF-8 mode
454    
455  Returns:        nothing  Returns:        pointer after the character
456  */  */
457    
458  static void  static const uschar *
459  set_table_bit(uschar *start_bits, unsigned int c, BOOL caseless,  set_table_bit(uschar *start_bits, const uschar *p, BOOL caseless,
460    compile_data *cd)    compile_data *cd, BOOL utf8)
461  {  {
462    unsigned int c = *p;
463  start_bits[c/8] |= (1 << (c&7));  start_bits[c/8] |= (1 << (c&7));
464    
465    #ifdef SUPPORT_UTF8
466    if (utf8 && c > 127)
467      {
468      GETCHARINC(c, p);
469    #ifdef SUPPORT_UCP
470      if (caseless)
471        {
472        uschar buff[8];
473        c = UCD_OTHERCASE(c);
474        (void)_pcre_ord2utf8(c, buff);
475        c = buff[0];
476        start_bits[c/8] |= (1 << (c&7));
477        }
478    #endif
479      return p;
480      }
481    #endif
482    
483    /* Not UTF-8 mode, or character is less than 127. */
484    
485  if (caseless && (cd->ctypes[c] & ctype_letter) != 0)  if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
486    start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));    start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));
487    return p + 1;
488  }  }
489    
490    
# Line 616  do Line 642  do
642        case OP_QUERY:        case OP_QUERY:
643        case OP_MINQUERY:        case OP_MINQUERY:
644        case OP_POSQUERY:        case OP_POSQUERY:
645        set_table_bit(start_bits, tcode[1], caseless, cd);        tcode = set_table_bit(start_bits, tcode + 1, caseless, cd, utf8);
       tcode += 2;  
 #ifdef SUPPORT_UTF8  
       if (utf8 && tcode[-1] >= 0xc0)  
         tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];  
 #endif  
646        break;        break;
647    
648        /* Single-char upto sets the bit and tries the next */        /* Single-char upto sets the bit and tries the next */
# Line 629  do Line 650  do
650        case OP_UPTO:        case OP_UPTO:
651        case OP_MINUPTO:        case OP_MINUPTO:
652        case OP_POSUPTO:        case OP_POSUPTO:
653        set_table_bit(start_bits, tcode[3], caseless, cd);        tcode = set_table_bit(start_bits, tcode + 3, caseless, cd, utf8);
       tcode += 4;  
 #ifdef SUPPORT_UTF8  
       if (utf8 && tcode[-1] >= 0xc0)  
         tcode += _pcre_utf8_table4[tcode[-1] & 0x3f];  
 #endif  
654        break;        break;
655    
656        /* At least one single char sets the bit and stops */        /* At least one single char sets the bit and stops */
# Line 647  do Line 663  do
663        case OP_PLUS:        case OP_PLUS:
664        case OP_MINPLUS:        case OP_MINPLUS:
665        case OP_POSPLUS:        case OP_POSPLUS:
666        set_table_bit(start_bits, tcode[1], caseless, cd);        (void)set_table_bit(start_bits, tcode + 1, caseless, cd, utf8);
667        try_next = FALSE;        try_next = FALSE;
668        break;        break;
669    
670        /* Single character type sets the bits and stops */        /* Single character types set the bits and stop. Note that if PCRE_UCP
671          is set, we do not see these op codes because \d etc are converted to
672          properties. Therefore, these apply in the case when only ASCII characters
673          are recognized to match the types. */
674    
675        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
676        for (c = 0; c < 32; c++)        for (c = 0; c < 32; c++)

Legend:
Removed from v.519  
changed lines
  Added in v.520

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12