/[pcre]/code/branches/pcre16/pcre_study.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 539 by ph10, Sun Jun 13 21:35:04 2010 UTC revision 545 by ph10, Wed Jun 16 10:51:15 2010 UTC
# Line 504  Arguments: Line 504  Arguments:
504    start_bits     the starting bitmap    start_bits     the starting bitmap
505    cbit type      the type of character wanted    cbit type      the type of character wanted
506    table_limit    32 for non-UTF-8; 16 for UTF-8    table_limit    32 for non-UTF-8; 16 for UTF-8
507    cd             the block with char table pointers    cd             the block with char table pointers
508    
509  Returns:         nothing  Returns:         nothing
510  */  */
# Line 513  static void Line 513  static void
513  set_type_bits(uschar *start_bits, int cbit_type, int table_limit,  set_type_bits(uschar *start_bits, int cbit_type, int table_limit,
514    compile_data *cd)    compile_data *cd)
515  {  {
516  register int c;  register int c;
517  for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];  for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
518  if (table_limit == 32) return;  if (table_limit == 32) return;
519  for (c = 128; c < 256; c++)  for (c = 128; c < 256; c++)
# Line 522  for (c = 128; c < 256; c++) Line 522  for (c = 128; c < 256; c++)
522      {      {
523      uschar buff[8];      uschar buff[8];
524      (void)_pcre_ord2utf8(c, buff);      (void)_pcre_ord2utf8(c, buff);
525      SET_BIT(buff[0]);      SET_BIT(buff[0]);
526      }      }
527    }    }
528  }  }
529    
530    
# Line 535  for (c = 128; c < 256; c++) Line 535  for (c = 128; c < 256; c++)
535  /* This function sets starting bits for a negative character type such as \D.  /* This function sets starting bits for a negative character type such as \D.
536  In UTF-8 mode, we can only do a direct setting for bytes less than 128, as  In UTF-8 mode, we can only do a direct setting for bytes less than 128, as
537  otherwise there can be confusion with bytes in the middle of UTF-8 characters.  otherwise there can be confusion with bytes in the middle of UTF-8 characters.
538  Unlike in the positive case, where we can set appropriate starting bits for  Unlike in the positive case, where we can set appropriate starting bits for
539  specific high-valued UTF-8 characters, in this case we have to set the bits for  specific high-valued UTF-8 characters, in this case we have to set the bits for
540  all high-valued characters. The lowest is 0xc2, but we overkill by starting at  all high-valued characters. The lowest is 0xc2, but we overkill by starting at
541  0xc0 (192) for simplicity.  0xc0 (192) for simplicity.
542    
543  Arguments:  Arguments:
544    start_bits     the starting bitmap    start_bits     the starting bitmap
545    cbit type      the type of character wanted    cbit type      the type of character wanted
546    table_limit    32 for non-UTF-8; 16 for UTF-8    table_limit    32 for non-UTF-8; 16 for UTF-8
547    cd             the block with char table pointers    cd             the block with char table pointers
548    
549  Returns:         nothing  Returns:         nothing
550  */  */
# Line 553  static void Line 553  static void
553  set_nottype_bits(uschar *start_bits, int cbit_type, int table_limit,  set_nottype_bits(uschar *start_bits, int cbit_type, int table_limit,
554    compile_data *cd)    compile_data *cd)
555  {  {
556  register int c;  register int c;
557  for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];  for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];
558  if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;  if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
559  }  }
# Line 750  do Line 750  do
750        SET_BIT(0x20);        SET_BIT(0x20);
751        if (utf8)        if (utf8)
752          {          {
753          SET_BIT(0xC2);  /* For U+00A0 */          SET_BIT(0xC2);  /* For U+00A0 */
754          SET_BIT(0xE1);  /* For U+1680, U+180E */          SET_BIT(0xE1);  /* For U+1680, U+180E */
755          SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */          SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
756          SET_BIT(0xE3);  /* For U+3000 */          SET_BIT(0xE3);  /* For U+3000 */
# Line 765  do Line 765  do
765        SET_BIT(0x0B);        SET_BIT(0x0B);
766        SET_BIT(0x0C);        SET_BIT(0x0C);
767        SET_BIT(0x0D);        SET_BIT(0x0D);
768        if (utf8)        if (utf8)
769          {          {
770          SET_BIT(0xC2);  /* For U+0085 */          SET_BIT(0xC2);  /* For U+0085 */
771          SET_BIT(0xE2);  /* For U+2028, U+2029 */          SET_BIT(0xE2);  /* For U+2028, U+2029 */
772          }          }
773        else SET_BIT(0x85);        else SET_BIT(0x85);
774        try_next = FALSE;        try_next = FALSE;
775        break;        break;
776    
777        /* Single character types set the bits and stop. Note that if PCRE_UCP        /* Single character types set the bits and stop. Note that if PCRE_UCP
778        is set, we do not see these op codes because \d etc are converted to        is set, we do not see these op codes because \d etc are converted to
779        properties. Therefore, these apply in the case when only characters less        properties. Therefore, these apply in the case when only characters less
780        than 256 are recognized to match the types. */        than 256 are recognized to match the types. */
781    
782        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
# Line 817  do Line 817  do
817        set_type_bits(start_bits, cbit_word, table_limit, cd);        set_type_bits(start_bits, cbit_word, table_limit, cd);
818        try_next = FALSE;        try_next = FALSE;
819        break;        break;
820    
821        /* One or more character type fudges the pointer and restarts, knowing        /* One or more character type fudges the pointer and restarts, knowing
822        it will hit a single character type and stop there. */        it will hit a single character type and stop there. */
823    
# Line 857  do Line 857  do
857          SET_BIT(0x20);          SET_BIT(0x20);
858          if (utf8)          if (utf8)
859            {            {
860            SET_BIT(0xC2);  /* For U+00A0 */            SET_BIT(0xC2);  /* For U+00A0 */
861            SET_BIT(0xE1);  /* For U+1680, U+180E */            SET_BIT(0xE1);  /* For U+1680, U+180E */
862            SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */            SET_BIT(0xE2);  /* For U+2000 - U+200A, U+202F, U+205F */
863            SET_BIT(0xE3);  /* For U+3000 */            SET_BIT(0xE3);  /* For U+3000 */
# Line 871  do Line 871  do
871          SET_BIT(0x0B);          SET_BIT(0x0B);
872          SET_BIT(0x0C);          SET_BIT(0x0C);
873          SET_BIT(0x0D);          SET_BIT(0x0D);
874          if (utf8)          if (utf8)
875            {            {
876            SET_BIT(0xC2);  /* For U+0085 */            SET_BIT(0xC2);  /* For U+0085 */
877            SET_BIT(0xE2);  /* For U+2028, U+2029 */            SET_BIT(0xE2);  /* For U+2028, U+2029 */
878            }            }
879          else SET_BIT(0x85);          else SET_BIT(0x85);
880          break;          break;
881    
# Line 892  do Line 892  do
892    
893          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
894          set_nottype_bits(start_bits, cbit_space, table_limit, cd);          set_nottype_bits(start_bits, cbit_space, table_limit, cd);
895          start_bits[1] |= 0x08;          start_bits[1] |= 0x08;
896          break;          break;
897    
898          /* The cbit_space table has vertical tab as whitespace; we have to          /* The cbit_space table has vertical tab as whitespace; we have to

Legend:
Removed from v.539  
changed lines
  Added in v.545

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12