/[pcre]/code/trunk/pcre_study.c
ViewVC logotype

Diff of /code/trunk/pcre_study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 534 by ph10, Mon May 24 17:06:28 2010 UTC revision 535 by ph10, Thu Jun 3 19:18:24 2010 UTC
# Line 441  for (;;) Line 441  for (;;)
441  *      Set a bit and maybe its alternate case    *  *      Set a bit and maybe its alternate case    *
442  *************************************************/  *************************************************/
443    
444  /* Given a character, set its first byte's bit in the table, and also the  /* Given a character, set its first byte's bit in the table, and also the
445  corresponding bit for the other version of a letter if we are caseless. In  corresponding bit for the other version of a letter if we are caseless. In
446  UTF-8 mode, for characters greater than 127, we can only do the caseless thing  UTF-8 mode, for characters greater than 127, we can only do the caseless thing
447  when Unicode property support is available.  when Unicode property support is available.
# Line 451  Arguments: Line 451  Arguments:
451    p             points to the character    p             points to the character
452    caseless      the caseless flag    caseless      the caseless flag
453    cd            the block with char table pointers    cd            the block with char table pointers
454    utf8          TRUE for UTF-8 mode    utf8          TRUE for UTF-8 mode
455    
456  Returns:        pointer after the character  Returns:        pointer after the character
457  */  */
# Line 471  if (utf8 && c > 127) Line 471  if (utf8 && c > 127)
471  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
472    if (caseless)    if (caseless)
473      {      {
474      uschar buff[8];      uschar buff[8];
475      c = UCD_OTHERCASE(c);      c = UCD_OTHERCASE(c);
476      (void)_pcre_ord2utf8(c, buff);      (void)_pcre_ord2utf8(c, buff);
477      SET_BIT(buff[0]);      SET_BIT(buff[0]);
478      }      }
479  #endif  #endif
480    return p;    return p;
481    }    }
482  #endif  #endif
483    
484  /* Not UTF-8 mode, or character is less than 127. */  /* Not UTF-8 mode, or character is less than 127. */
485    
# Line 666  do Line 666  do
666        (void)set_table_bit(start_bits, tcode + 1, caseless, cd, utf8);        (void)set_table_bit(start_bits, tcode + 1, caseless, cd, utf8);
667        try_next = FALSE;        try_next = FALSE;
668        break;        break;
669    
670        /* Special spacing and line-terminating items. These recognize specific        /* Special spacing and line-terminating items. These recognize specific
671        lists of characters. The difference between VSPACE and ANYNL is that the        lists of characters. The difference between VSPACE and ANYNL is that the
672        latter can match the two-character CRLF sequence, but that is not        latter can match the two-character CRLF sequence, but that is not
673        relevant for finding the first character, so their code here is        relevant for finding the first character, so their code here is
674        identical. */        identical. */
675    
676        case OP_HSPACE:        case OP_HSPACE:
677        SET_BIT(0x09);        SET_BIT(0x09);
678        SET_BIT(0x20);        SET_BIT(0x20);
679        SET_BIT(0xA0);        SET_BIT(0xA0);
680        if (utf8)        if (utf8)
681          {          {
682          SET_BIT(0xE1);  /* For U+1680, U+180E */          SET_BIT(0xE1);  /* For U+1680, U+180E */
683          SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */          SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
684          SET_BIT(0xE3);  /* For U+3000 */          SET_BIT(0xE3);  /* For U+3000 */
685          }          }
686        try_next = FALSE;        try_next = FALSE;
687        break;        break;
688    
689        case OP_ANYNL:        case OP_ANYNL:
690        case OP_VSPACE:        case OP_VSPACE:
691        SET_BIT(0x0A);        SET_BIT(0x0A);
692        SET_BIT(0x0B);        SET_BIT(0x0B);
693        SET_BIT(0x0C);        SET_BIT(0x0C);
694        SET_BIT(0x0D);        SET_BIT(0x0D);
695        SET_BIT(0x85);        SET_BIT(0x85);
696        if (utf8) SET_BIT(0xE2);    /* For U+2028, U+2029 */        if (utf8) SET_BIT(0xE2);    /* For U+2028, U+2029 */
697        try_next = FALSE;        try_next = FALSE;
698        break;        break;
699    
700        /* Single character types set the bits and stop. Note that if PCRE_UCP        /* Single character types set the bits and stop. Note that if PCRE_UCP
701        is set, we do not see these op codes because \d etc are converted to        is set, we do not see these op codes because \d etc are converted to
702        properties. Therefore, these apply in the case when only ASCII characters        properties. Therefore, these apply in the case when only ASCII characters
703        are recognized to match the types. */        are recognized to match the types. */
704    
705        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
# Line 757  do Line 757  do
757    
758        case OP_TYPEPLUS:        case OP_TYPEPLUS:
759        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
760        case OP_TYPEPOSPLUS:        case OP_TYPEPOSPLUS:
761        tcode++;        tcode++;
762        break;        break;
763    
# Line 785  do Line 785  do
785          case OP_ANY:          case OP_ANY:
786          case OP_ALLANY:          case OP_ALLANY:
787          return SSB_FAIL;          return SSB_FAIL;
788    
789          case OP_HSPACE:          case OP_HSPACE:
790          SET_BIT(0x09);          SET_BIT(0x09);
791          SET_BIT(0x20);          SET_BIT(0x20);
792          SET_BIT(0xA0);          SET_BIT(0xA0);
793          if (utf8)          if (utf8)
794            {            {
795            SET_BIT(0xE1);  /* For U+1680, U+180E */            SET_BIT(0xE1);  /* For U+1680, U+180E */
796            SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */            SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
797            SET_BIT(0xE3);  /* For U+3000 */            SET_BIT(0xE3);  /* For U+3000 */
798            }            }
799          break;          break;
800    
801          case OP_ANYNL:          case OP_ANYNL:
802          case OP_VSPACE:          case OP_VSPACE:
803          SET_BIT(0x0A);          SET_BIT(0x0A);
804          SET_BIT(0x0B);          SET_BIT(0x0B);
805          SET_BIT(0x0C);          SET_BIT(0x0C);
806          SET_BIT(0x0D);          SET_BIT(0x0D);
807          SET_BIT(0x85);          SET_BIT(0x85);
808          if (utf8) SET_BIT(0xE2);    /* For U+2028, U+2029 */          if (utf8) SET_BIT(0xE2);    /* For U+2028, U+2029 */
809          break;          break;
810    
811          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
812          for (c = 0; c < 32; c++)          for (c = 0; c < 32; c++)
813            start_bits[c] |= ~cd->cbits[c+cbit_digit];            start_bits[c] |= ~cd->cbits[c+cbit_digit];

Legend:
Removed from v.534  
changed lines
  Added in v.535

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12