/[pcre]/code/branches/pcre16/pcre_study.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 604 by ph10, Thu Jun 2 19:04:54 2011 UTC revision 605 by ph10, Fri Jun 3 18:18:30 2011 UTC
# Line 52  supporting functions. */ Line 52  supporting functions. */
52    
53  /* Returns from set_start_bits() */  /* Returns from set_start_bits() */
54    
55  enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };  enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
56    
57    
58    
# Line 73  Arguments: Line 73  Arguments:
73  Returns:   the minimum length  Returns:   the minimum length
74             -1 if \C was encountered             -1 if \C was encountered
75             -2 internal error (missing capturing bracket)             -2 internal error (missing capturing bracket)
76             -3 internal error (opcode not listed)             -3 internal error (opcode not listed)
77  */  */
78    
79  static int  static int
# Line 140  for (;;) Line 140  for (;;)
140      case OP_KET:      case OP_KET:
141      case OP_KETRMAX:      case OP_KETRMAX:
142      case OP_KETRMIN:      case OP_KETRMIN:
143      case OP_KETRPOS:      case OP_KETRPOS:
144      case OP_END:      case OP_END:
145      if (length < 0 || (!had_recurse && branchlength < length))      if (length < 0 || (!had_recurse && branchlength < length))
146        length = branchlength;        length = branchlength;
# Line 382  for (;;) Line 382  for (;;)
382        min = 0;        min = 0;
383        cc++;        cc++;
384        break;        break;
385    
386        case OP_CRPLUS:        case OP_CRPLUS:
387        case OP_CRMINPLUS:        case OP_CRMINPLUS:
388        min = 1;        min = 1;
389        cc++;        cc++;
390        break;        break;
391    
392        case OP_CRRANGE:        case OP_CRRANGE:
393        case OP_CRMINRANGE:        case OP_CRMINRANGE:
394        min = GET2(cc, 1);        min = GET2(cc, 1);
# Line 419  for (;;) Line 419  for (;;)
419      of a character, we must take special action for UTF-8 characters. As it      of a character, we must take special action for UTF-8 characters. As it
420      happens, the "NOT" versions of these opcodes are used at present only for      happens, the "NOT" versions of these opcodes are used at present only for
421      ASCII characters, so they could be omitted from this list. However, in      ASCII characters, so they could be omitted from this list. However, in
422      future that may change, so we include them here so as not to leave a      future that may change, so we include them here so as not to leave a
423      gotcha for a future maintainer. */      gotcha for a future maintainer. */
424    
425      case OP_UPTO:      case OP_UPTO:
# Line 488  for (;;) Line 488  for (;;)
488      case OP_PRUNE:      case OP_PRUNE:
489      case OP_SET_SOM:      case OP_SET_SOM:
490      case OP_SKIP:      case OP_SKIP:
491      case OP_THEN:      case OP_THEN:
492      cc += _pcre_OP_lengths[op];      cc += _pcre_OP_lengths[op];
493      break;      break;
494    
495      /* This should not occur: we list all opcodes explicitly so that when      /* This should not occur: we list all opcodes explicitly so that when
496      new ones get added they are properly considered. */      new ones get added they are properly considered. */
497    
# Line 648  Arguments: Line 648  Arguments:
648  Returns:       SSB_FAIL     => Failed to find any starting bytes  Returns:       SSB_FAIL     => Failed to find any starting bytes
649                 SSB_DONE     => Found mandatory starting bytes                 SSB_DONE     => Found mandatory starting bytes
650                 SSB_CONTINUE => Found optional starting bytes                 SSB_CONTINUE => Found optional starting bytes
651                   SSB_UNKNOWN  => Hit an unrecognized opcode
652  */  */
653    
654  static int  static int
655  set_start_bits(const uschar *code, uschar *start_bits, BOOL utf8,  set_start_bits(const uschar *code, uschar *start_bits, BOOL utf8,
656    compile_data *cd)    compile_data *cd)
657  {  {
658  register int c;  register int c;
# Line 679  do Line 680  do
680    {    {
681    BOOL try_next = TRUE;    BOOL try_next = TRUE;
682    const uschar *tcode = code + 1 + LINK_SIZE;    const uschar *tcode = code + 1 + LINK_SIZE;
683    
684    if (*code == OP_CBRA || *code == OP_SCBRA ||    if (*code == OP_CBRA || *code == OP_SCBRA ||
685        *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += 2;        *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += 2;
686    
# Line 688  do Line 689  do
689      int rc;      int rc;
690      switch(*tcode)      switch(*tcode)
691        {        {
692        /* Fail if we reach something we don't understand */        /* If we reach something we don't understand, it means a new opcode has
693          been created that hasn't been added to this code. Hopefully this problem
694          will be discovered during testing. */
695    
696        default:        default:
697          return SSB_UNKNOWN;
698    
699          /* Fail for a valid opcode that implies no starting bits. */
700    
701          case OP_ACCEPT:
702          case OP_ALLANY:
703          case OP_ANY:
704          case OP_ANYBYTE:
705          case OP_CIRC:
706          case OP_CIRCM:
707          case OP_CLOSE:
708          case OP_COMMIT:
709          case OP_COND:
710          case OP_CREF:
711          case OP_DEF:
712          case OP_DOLL:
713          case OP_DOLLM:
714          case OP_END:
715          case OP_EOD:
716          case OP_EODN:
717          case OP_EXTUNI:
718          case OP_FAIL:
719          case OP_MARK:
720          case OP_NCREF:
721          case OP_NOT:
722          case OP_NOTEXACT:
723          case OP_NOTEXACTI:
724          case OP_NOTI:
725          case OP_NOTMINPLUS:
726          case OP_NOTMINPLUSI:
727          case OP_NOTMINQUERY:
728          case OP_NOTMINQUERYI:
729          case OP_NOTMINSTAR:
730          case OP_NOTMINSTARI:
731          case OP_NOTMINUPTO:
732          case OP_NOTMINUPTOI:
733          case OP_NOTPLUS:
734          case OP_NOTPLUSI:
735          case OP_NOTPOSPLUS:
736          case OP_NOTPOSPLUSI:
737          case OP_NOTPOSQUERY:
738          case OP_NOTPOSQUERYI:
739          case OP_NOTPOSSTAR:
740          case OP_NOTPOSSTARI:
741          case OP_NOTPOSUPTO:
742          case OP_NOTPOSUPTOI:
743          case OP_NOTPROP:
744          case OP_NOTQUERY:
745          case OP_NOTQUERYI:
746          case OP_NOTSTAR:
747          case OP_NOTSTARI:
748          case OP_NOTUPTO:
749          case OP_NOTUPTOI:
750          case OP_NOT_HSPACE:
751          case OP_NOT_VSPACE:
752          case OP_NOT_WORD_BOUNDARY:
753          case OP_NRREF:
754          case OP_PROP:
755          case OP_PRUNE:
756          case OP_PRUNE_ARG:
757          case OP_RECURSE:
758          case OP_REF:
759          case OP_REFI:
760          case OP_REVERSE:
761          case OP_RREF:
762          case OP_SCOND:
763          case OP_SET_SOM:
764          case OP_SKIP:
765          case OP_SKIP_ARG:
766          case OP_SOD:
767          case OP_SOM:
768          case OP_THEN:
769          case OP_THEN_ARG:
770          case OP_WORD_BOUNDARY:
771          case OP_XCLASS:
772        return SSB_FAIL;        return SSB_FAIL;
773    
774        /* If we hit a bracket or a positive lookahead assertion, recurse to set        /* If we hit a bracket or a positive lookahead assertion, recurse to set
# Line 709  do Line 787  do
787        case OP_ONCE:        case OP_ONCE:
788        case OP_ASSERT:        case OP_ASSERT:
789        rc = set_start_bits(tcode, start_bits, utf8, cd);        rc = set_start_bits(tcode, start_bits, utf8, cd);
790        if (rc == SSB_FAIL) return SSB_FAIL;        if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
791        if (rc == SSB_DONE) try_next = FALSE; else        if (rc == SSB_DONE) try_next = FALSE; else
792          {          {
793          do tcode += GET(tcode, 1); while (*tcode == OP_ALT);          do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
# Line 732  do Line 810  do
810        case OP_KET:        case OP_KET:
811        case OP_KETRMAX:        case OP_KETRMAX:
812        case OP_KETRMIN:        case OP_KETRMIN:
813        case OP_KETRPOS:        case OP_KETRPOS:
814        return SSB_CONTINUE;        return SSB_CONTINUE;
815    
816        /* Skip over callout */        /* Skip over callout */
# Line 755  do Line 833  do
833        case OP_BRAZERO:        case OP_BRAZERO:
834        case OP_BRAMINZERO:        case OP_BRAMINZERO:
835        case OP_BRAPOSZERO:        case OP_BRAPOSZERO:
836        if (set_start_bits(++tcode, start_bits, utf8, cd) == SSB_FAIL)        rc = set_start_bits(++tcode, start_bits, utf8, cd);
837          return SSB_FAIL;        if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
838  /* =========================================================================  /* =========================================================================
839        See the comment at the head of this function concerning the next line,        See the comment at the head of this function concerning the next line,
840        which was an old fudge for the benefit of OS/2.        which was an old fudge for the benefit of OS/2.
# Line 1058  do Line 1136  do
1136            for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];            for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
1137            }            }
1138    
1139          /* Advance past the bit map, and act on what follows */          /* Advance past the bit map, and act on what follows. For a zero
1140            minimum repeat, continue; otherwise stop processing. */
1141    
1142          tcode += 32;          tcode += 32;
1143          switch (*tcode)          switch (*tcode)
# Line 1075  do Line 1154  do
1154            if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;            if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
1155              else try_next = FALSE;              else try_next = FALSE;
1156            break;            break;
1157    
1158            default:            default:
1159            try_next = FALSE;            try_next = FALSE;
1160            break;            break;
# Line 1094  return yield; Line 1173  return yield;
1173    
1174    
1175    
1176    
1177    
1178  /*************************************************  /*************************************************
1179  *          Study a compiled expression           *  *          Study a compiled expression           *
1180  *************************************************/  *************************************************/
# Line 1150  seeking a list of starting bytes. */ Line 1231  seeking a list of starting bytes. */
1231  if ((re->options & PCRE_ANCHORED) == 0 &&  if ((re->options & PCRE_ANCHORED) == 0 &&
1232      (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0)      (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0)
1233    {    {
1234      int rc;
1235    
1236    /* Set the character tables in the block that is passed around */    /* Set the character tables in the block that is passed around */
1237    
1238    tables = re->tables;    tables = re->tables;
# Line 1165  if ((re->options & PCRE_ANCHORED) == 0 & Line 1248  if ((re->options & PCRE_ANCHORED) == 0 &
1248    /* See if we can find a fixed set of initial characters for the pattern. */    /* See if we can find a fixed set of initial characters for the pattern. */
1249    
1250    memset(start_bits, 0, 32 * sizeof(uschar));    memset(start_bits, 0, 32 * sizeof(uschar));
1251    bits_set = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,    rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
1252      &compile_block) == SSB_DONE;      &compile_block);
1253      bits_set = rc == SSB_DONE;
1254      if (rc == SSB_UNKNOWN) *errorptr = "internal error: opcode not recognized";
1255    }    }
1256    
1257  /* Find the minimum length of subject string. */  /* Find the minimum length of subject string. */
# Line 1174  if ((re->options & PCRE_ANCHORED) == 0 & Line 1259  if ((re->options & PCRE_ANCHORED) == 0 &
1259  switch(min = find_minlength(code, code, re->options))  switch(min = find_minlength(code, code, re->options))
1260    {    {
1261    case -2: *errorptr = "internal error: missing capturing bracket"; break;    case -2: *errorptr = "internal error: missing capturing bracket"; break;
1262    case -3: *errorptr = "internal error: opcode not recognized"; break;    case -3: *errorptr = "internal error: opcode not recognized"; break;
1263    default: break;    default: break;
1264    }    }
1265    
1266  /* Return NULL if no optimization is possible. */  /* Return NULL if there's been an error or if no optimization is possible. */
1267    
1268  if (!bits_set && min < 0) return NULL;  if (*errorptr != NULL || (!bits_set && min < 0)) return NULL;
1269    
1270  /* Get a pcre_extra block and a pcre_study_data block. The study data is put in  /* Get a pcre_extra block and a pcre_study_data block. The study data is put in
1271  the latter, which is pointed to by the former, which may also get additional  the latter, which is pointed to by the former, which may also get additional

Legend:
Removed from v.604  
changed lines
  Added in v.605

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12