/[pcre]/code/trunk/pcre_study.c
ViewVC logotype

Diff of /code/trunk/pcre_study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 523 by ph10, Sun May 23 18:48:54 2010 UTC revision 524 by ph10, Mon May 24 17:06:28 2010 UTC
# Line 48  supporting functions. */ Line 48  supporting functions. */
48    
49  #include "pcre_internal.h"  #include "pcre_internal.h"
50    
51    #define SET_BIT(c) start_bits[c/8] |= (1 << (c&7))
52    
53  /* Returns from set_start_bits() */  /* Returns from set_start_bits() */
54    
# Line 460  set_table_bit(uschar *start_bits, const Line 461  set_table_bit(uschar *start_bits, const
461    compile_data *cd, BOOL utf8)    compile_data *cd, BOOL utf8)
462  {  {
463  unsigned int c = *p;  unsigned int c = *p;
464  start_bits[c/8] |= (1 << (c&7));  
465    SET_BIT(c);
466    
467  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
468  if (utf8 && c > 127)  if (utf8 && c > 127)
# Line 472  if (utf8 && c > 127) Line 474  if (utf8 && c > 127)
474      uschar buff[8];      uschar buff[8];
475      c = UCD_OTHERCASE(c);      c = UCD_OTHERCASE(c);
476      (void)_pcre_ord2utf8(c, buff);      (void)_pcre_ord2utf8(c, buff);
477      c = buff[0];      SET_BIT(buff[0]);
     start_bits[c/8] |= (1 << (c&7));  
478      }      }
479  #endif  #endif
480    return p;    return p;
# Line 482  if (utf8 && c > 127) Line 483  if (utf8 && c > 127)
483    
484  /* Not UTF-8 mode, or character is less than 127. */  /* Not UTF-8 mode, or character is less than 127. */
485    
486  if (caseless && (cd->ctypes[c] & ctype_letter) != 0)  if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
   start_bits[cd->fcc[c]/8] |= (1 << (cd->fcc[c]&7));  
487  return p + 1;  return p + 1;
488  }  }
489    
# Line 666  do Line 666  do
666        (void)set_table_bit(start_bits, tcode + 1, caseless, cd, utf8);        (void)set_table_bit(start_bits, tcode + 1, caseless, cd, utf8);
667        try_next = FALSE;        try_next = FALSE;
668        break;        break;
669    
670          /* Special spacing and line-terminating items. These recognize specific
671          lists of characters. The difference between VSPACE and ANYNL is that the
672          latter can match the two-character CRLF sequence, but that is not
673          relevant for finding the first character, so their code here is
674          identical. */
675    
676          case OP_HSPACE:
677          SET_BIT(0x09);
678          SET_BIT(0x20);
679          SET_BIT(0xA0);
680          if (utf8)
681            {
682            SET_BIT(0xE1);  /* For U+1680, U+180E */
683            SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
684            SET_BIT(0xE3);  /* For U+3000 */
685            }
686          try_next = FALSE;
687          break;
688    
689          case OP_ANYNL:
690          case OP_VSPACE:
691          SET_BIT(0x0A);
692          SET_BIT(0x0B);
693          SET_BIT(0x0C);
694          SET_BIT(0x0D);
695          SET_BIT(0x85);
696          if (utf8) SET_BIT(0xE2);    /* For U+2028, U+2029 */
697          try_next = FALSE;
698          break;
699    
700        /* Single character types set the bits and stop. Note that if PCRE_UCP        /* Single character types set the bits and stop. Note that if PCRE_UCP
701        is set, we do not see these op codes because \d etc are converted to        is set, we do not see these op codes because \d etc are converted to
# Line 727  do Line 757  do
757    
758        case OP_TYPEPLUS:        case OP_TYPEPLUS:
759        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
760          case OP_TYPEPOSPLUS:
761        tcode++;        tcode++;
762        break;        break;
763    
# Line 754  do Line 785  do
785          case OP_ANY:          case OP_ANY:
786          case OP_ALLANY:          case OP_ALLANY:
787          return SSB_FAIL;          return SSB_FAIL;
788    
789            case OP_HSPACE:
790            SET_BIT(0x09);
791            SET_BIT(0x20);
792            SET_BIT(0xA0);
793            if (utf8)
794              {
795              SET_BIT(0xE1);  /* For U+1680, U+180E */
796              SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
797              SET_BIT(0xE3);  /* For U+3000 */
798              }
799            break;
800    
801            case OP_ANYNL:
802            case OP_VSPACE:
803            SET_BIT(0x0A);
804            SET_BIT(0x0B);
805            SET_BIT(0x0C);
806            SET_BIT(0x0D);
807            SET_BIT(0x85);
808            if (utf8) SET_BIT(0xE2);    /* For U+2028, U+2029 */
809            break;
810    
811          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
812          for (c = 0; c < 32; c++)          for (c = 0; c < 32; c++)
813            start_bits[c] |= ~cd->cbits[c+cbit_digit];            start_bits[c] |= ~cd->cbits[c+cbit_digit];

Legend:
Removed from v.523  
changed lines
  Added in v.524

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12