/[pcre]/code/trunk/pcre_study.c
ViewVC logotype

Diff of /code/trunk/pcre_study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 602 by ph10, Wed May 25 08:29:03 2011 UTC revision 603 by ph10, Fri May 27 10:14:09 2011 UTC
# Line 73  Arguments: Line 73  Arguments:
73  Returns:   the minimum length  Returns:   the minimum length
74             -1 if \C was encountered             -1 if \C was encountered
75             -2 internal error (missing capturing bracket)             -2 internal error (missing capturing bracket)
76               -3 internal error (opcode not listed)
77  */  */
78    
79  static int  static int
# Line 191  for (;;) Line 192  for (;;)
192      case OP_NOT:      case OP_NOT:
193      case OP_NOTI:      case OP_NOTI:
194      case OP_PLUS:      case OP_PLUS:
195        case OP_PLUSI:
196      case OP_MINPLUS:      case OP_MINPLUS:
197        case OP_MINPLUSI:
198      case OP_POSPLUS:      case OP_POSPLUS:
199        case OP_POSPLUSI:
200      case OP_NOTPLUS:      case OP_NOTPLUS:
201        case OP_NOTPLUSI:
202      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
203        case OP_NOTMINPLUSI:
204      case OP_NOTPOSPLUS:      case OP_NOTPOSPLUS:
205        case OP_NOTPOSPLUSI:
206      branchlength++;      branchlength++;
207      cc += 2;      cc += 2;
208  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 214  for (;;) Line 221  for (;;)
221      need to skip over a multibyte character in UTF8 mode.  */      need to skip over a multibyte character in UTF8 mode.  */
222    
223      case OP_EXACT:      case OP_EXACT:
224        case OP_EXACTI:
225      case OP_NOTEXACT:      case OP_NOTEXACT:
226        case OP_NOTEXACTI:
227      branchlength += GET2(cc,1);      branchlength += GET2(cc,1);
228      cc += 4;      cc += 4;
229  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 397  for (;;) Line 406  for (;;)
406      of a character, we must take special action for UTF-8 characters. As it      of a character, we must take special action for UTF-8 characters. As it
407      happens, the "NOT" versions of these opcodes are used at present only for      happens, the "NOT" versions of these opcodes are used at present only for
408      ASCII characters, so they could be omitted from this list. However, in      ASCII characters, so they could be omitted from this list. However, in
409      future that may change, so we leave them in this special case. */      future that may change, so we include them here so as not to leave a
410        gotcha for a future maintainer. */
411    
412      case OP_UPTO:      case OP_UPTO:
413      case OP_UPTOI:      case OP_UPTOI:
# Line 456  for (;;) Line 466  for (;;)
466      cc += _pcre_OP_lengths[op] + cc[1+LINK_SIZE];      cc += _pcre_OP_lengths[op] + cc[1+LINK_SIZE];
467      break;      break;
468    
469      /* For the record, these are the opcodes that are matched by "default":      /* The remaining opcodes are just skipped over. */
     OP_ACCEPT, OP_CLOSE, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_SET_SOM, OP_SKIP,  
     OP_THEN. */  
470    
471      default:      case OP_ACCEPT:
472        case OP_CLOSE:
473        case OP_COMMIT:
474        case OP_FAIL:
475        case OP_PRUNE:
476        case OP_SET_SOM:
477        case OP_SKIP:
478        case OP_THEN:
479      cc += _pcre_OP_lengths[op];      cc += _pcre_OP_lengths[op];
480      break;      break;
481    
482        /* This should not occur: we list all opcodes explicitly so that when
483        new ones get added they are properly considered. */
484    
485        default:
486        return -3;
487      }      }
488    }    }
489  /* Control never gets here */  /* Control never gets here */
# Line 608  function fails unless the result is SSB_ Line 629  function fails unless the result is SSB_
629  Arguments:  Arguments:
630    code         points to an expression    code         points to an expression
631    start_bits   points to a 32-byte table, initialized to 0    start_bits   points to a 32-byte table, initialized to 0
   caseless     the current state of the caseless flag  
632    utf8         TRUE if in UTF-8 mode    utf8         TRUE if in UTF-8 mode
633    cd           the block with char table pointers    cd           the block with char table pointers
634    
# Line 618  Returns: SSB_FAIL => Failed to Line 638  Returns: SSB_FAIL => Failed to
638  */  */
639    
640  static int  static int
641  set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,  set_start_bits(const uschar *code, uschar *start_bits, BOOL utf8,
642    BOOL utf8, compile_data *cd)    compile_data *cd)
643  {  {
644  register int c;  register int c;
645  int yield = SSB_DONE;  int yield = SSB_DONE;
# Line 668  do Line 688  do
688        case OP_SCBRA:        case OP_SCBRA:
689        case OP_ONCE:        case OP_ONCE:
690        case OP_ASSERT:        case OP_ASSERT:
691        rc = set_start_bits(tcode, start_bits, caseless, utf8, cd);        rc = set_start_bits(tcode, start_bits, utf8, cd);
692        if (rc == SSB_FAIL) return SSB_FAIL;        if (rc == SSB_FAIL) return SSB_FAIL;
693        if (rc == SSB_DONE) try_next = FALSE; else        if (rc == SSB_DONE) try_next = FALSE; else
694          {          {
# Line 713  do Line 733  do
733    
734        case OP_BRAZERO:        case OP_BRAZERO:
735        case OP_BRAMINZERO:        case OP_BRAMINZERO:
736        if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL)        if (set_start_bits(++tcode, start_bits, utf8, cd) == SSB_FAIL)
737          return SSB_FAIL;          return SSB_FAIL;
738  /* =========================================================================  /* =========================================================================
739        See the comment at the head of this function concerning the next line,        See the comment at the head of this function concerning the next line,
# Line 740  do Line 760  do
760        case OP_QUERY:        case OP_QUERY:
761        case OP_MINQUERY:        case OP_MINQUERY:
762        case OP_POSQUERY:        case OP_POSQUERY:
763        tcode = set_table_bit(start_bits, tcode + 1, caseless, cd, utf8);        tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);
764        break;        break;
765    
766        case OP_STARI:        case OP_STARI:
# Line 757  do Line 777  do
777        case OP_UPTO:        case OP_UPTO:
778        case OP_MINUPTO:        case OP_MINUPTO:
779        case OP_POSUPTO:        case OP_POSUPTO:
780        tcode = set_table_bit(start_bits, tcode + 3, caseless, cd, utf8);        tcode = set_table_bit(start_bits, tcode + 3, FALSE, cd, utf8);
781        break;        break;
782    
783        case OP_UPTOI:        case OP_UPTOI:
# Line 775  do Line 795  do
795        case OP_PLUS:        case OP_PLUS:
796        case OP_MINPLUS:        case OP_MINPLUS:
797        case OP_POSPLUS:        case OP_POSPLUS:
798        (void)set_table_bit(start_bits, tcode + 1, caseless, cd, utf8);        (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8);
799        try_next = FALSE;        try_next = FALSE;
800        break;        break;
801    
802          case OP_EXACTI:
803          tcode += 2;
804          /* Fall through */
805        case OP_CHARI:        case OP_CHARI:
806        case OP_PLUSI:        case OP_PLUSI:
807        case OP_MINPLUSI:        case OP_MINPLUSI:
# Line 1120  if ((re->options & PCRE_ANCHORED) == 0 & Line 1143  if ((re->options & PCRE_ANCHORED) == 0 &
1143    /* See if we can find a fixed set of initial characters for the pattern. */    /* See if we can find a fixed set of initial characters for the pattern. */
1144    
1145    memset(start_bits, 0, 32 * sizeof(uschar));    memset(start_bits, 0, 32 * sizeof(uschar));
1146    bits_set = set_start_bits(code, start_bits,    bits_set = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
     (re->options & PCRE_CASELESS) != 0, (re->options & PCRE_UTF8) != 0,  
1147      &compile_block) == SSB_DONE;      &compile_block) == SSB_DONE;
1148    }    }
1149    
1150  /* Find the minimum length of subject string. */  /* Find the minimum length of subject string. */
1151    
1152  min = find_minlength(code, code, re->options);  switch(min = find_minlength(code, code, re->options))
1153      {
1154      case -2: *errorptr = "internal error: missing capturing bracket"; break;
1155      case -3: *errorptr = "internal error: opcode not recognized"; break;
1156      default: break;
1157      }
1158    
1159  /* Return NULL if no optimization is possible. */  /* Return NULL if no optimization is possible. */
1160    

Legend:
Removed from v.602  
changed lines
  Added in v.603

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12