/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 427 by ph10, Fri Aug 28 09:55:54 2009 UTC revision 435 by ph10, Sat Sep 5 10:20:44 2009 UTC
# Line 389  if (*first_op == OP_REVERSE) Line 389  if (*first_op == OP_REVERSE)
389        current_subject - start_subject : max_back;        current_subject - start_subject : max_back;
390      current_subject -= gone_back;      current_subject -= gone_back;
391      }      }
392    
393      /* Save the earliest consulted character */
394    
395      if (current_subject < md->start_used_ptr)
396        md->start_used_ptr = current_subject;
397    
398    /* Now we can process the individual branches. */    /* Now we can process the individual branches. */
399    
# Line 454  for (;;) Line 459  for (;;)
459    int i, j;    int i, j;
460    int clen, dlen;    int clen, dlen;
461    unsigned int c, d;    unsigned int c, d;
462      int forced_fail = 0;
463      int reached_end = 0;
464    
465    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
466    new state list. */    new state list. */
# Line 624  for (;;) Line 631  for (;;)
631            ADD_ACTIVE(state_offset - GET(code, 1), 0);            ADD_ACTIVE(state_offset - GET(code, 1), 0);
632            }            }
633          }          }
634        else if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)        else
635          {          {
636          if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;          reached_end++;    /* Count branches that reach the end */
637            else if (match_count > 0 && ++match_count * 2 >= offsetcount)          if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)
638              match_count = 0;            {
639          count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
640          if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));              else if (match_count > 0 && ++match_count * 2 >= offsetcount)
641          if (offsetcount >= 2)                match_count = 0;
642            {            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
643            offsets[0] = current_subject - start_subject;            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
644            offsets[1] = ptr - start_subject;            if (offsetcount >= 2)
645            DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              {
646              offsets[1] - offsets[0], current_subject));              offsets[0] = current_subject - start_subject;
647            }              offsets[1] = ptr - start_subject;
648          if ((md->moptions & PCRE_DFA_SHORTEST) != 0)              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
649            {                offsets[1] - offsets[0], current_subject));
650            DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"              }
651              "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
652              match_count, rlevel*2-2, SP));              {
653            return match_count;              DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
654            }                "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
655                  match_count, rlevel*2-2, SP));
656                return match_count;
657                }
658              }
659          }          }
660        break;        break;
661    
# Line 794  for (;;) Line 805  for (;;)
805          if (ptr > start_subject)          if (ptr > start_subject)
806            {            {
807            const uschar *temp = ptr - 1;            const uschar *temp = ptr - 1;
808              if (temp < md->start_used_ptr) md->start_used_ptr = temp;
809  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
810            if (utf8) BACKCHAR(temp);            if (utf8) BACKCHAR(temp);
811  #endif  #endif
# Line 802  for (;;) Line 814  for (;;)
814            }            }
815          else left_word = 0;          else left_word = 0;
816    
817          if (clen > 0) right_word = c < 256 && (ctypes[c] & ctype_word) != 0;          if (clen > 0)
818            else right_word = 0;            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
819            else              /* This is a fudge to ensure that if this is the */
820              {               /* last item in the pattern, we don't count it as */
821              reached_end--;  /* reached, thus disabling a partial match. */
822              right_word = 0;
823              }
824    
825          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
826            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
# Line 2162  for (;;) Line 2179  for (;;)
2179        though the other "backtracking verbs" are not supported. */        though the other "backtracking verbs" are not supported. */
2180    
2181        case OP_FAIL:        case OP_FAIL:
2182          forced_fail++;    /* Count FAILs for multiple states */
2183        break;        break;
2184    
2185        case OP_ASSERT:        case OP_ASSERT:
# Line 2469  for (;;) Line 2487  for (;;)
2487    /* We have finished the processing at the current subject character. If no    /* We have finished the processing at the current subject character. If no
2488    new states have been set for the next character, we have found all the    new states have been set for the next character, we have found all the
2489    matches that we are going to find. If we are at the top level and partial    matches that we are going to find. If we are at the top level and partial
2490    matching has been requested, check for appropriate conditions. */    matching has been requested, check for appropriate conditions. The "forced_
2491      fail" variable counts the number of (*F) encountered for the character. If it
2492      is equal to the original active_count (saved in workspace[1]) it means that
2493      (*F) was found on every active state. In this case we don't want to give a
2494      partial match. */
2495    
2496    if (new_count <= 0)    if (new_count <= 0)
2497      {      {
2498      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
2499            reached_end != workspace[1] &&               /* Not all reached end */
2500            forced_fail != workspace[1] &&               /* Not all forced fail & */
2501          (                                            /* either... */          (                                            /* either... */
2502          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
2503          ||                                           /* or... */          ||                                           /* or... */
# Line 2485  for (;;) Line 2509  for (;;)
2509        {        {
2510        if (offsetcount >= 2)        if (offsetcount >= 2)
2511          {          {
2512          offsets[0] = current_subject - start_subject;          offsets[0] = md->start_used_ptr - start_subject;
2513          offsets[1] = end_subject - start_subject;          offsets[1] = end_subject - start_subject;
2514          }          }
2515        match_count = PCRE_ERROR_PARTIAL;        match_count = PCRE_ERROR_PARTIAL;
# Line 2871  for (;;) Line 2895  for (;;)
2895    don't do this when the string is sufficiently long.    don't do this when the string is sufficiently long.
2896    
2897    ALSO: this processing is disabled when partial matching is requested, and can    ALSO: this processing is disabled when partial matching is requested, and can
2898    also be explicitly deactivated. */    also be explicitly deactivated. Furthermore, we have to disable when
2899      restarting after a partial match, because the required character may have
2900      already been matched. */
2901    
2902    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
2903        req_byte >= 0 &&        req_byte >= 0 &&
2904        end_subject - current_subject < REQ_BYTE_MAX &&        end_subject - current_subject < REQ_BYTE_MAX &&
2905        (options & PCRE_PARTIAL) == 0)        (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_RESTART)) == 0)
2906      {      {
2907      register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);      register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
2908    
# Line 2916  for (;;) Line 2942  for (;;)
2942    
2943    /* OK, now we can do the business */    /* OK, now we can do the business */
2944    
2945      md->start_used_ptr = current_subject;
2946    
2947    rc = internal_dfa_exec(    rc = internal_dfa_exec(
2948      md,                                /* fixed match data */      md,                                /* fixed match data */
2949      md->start_code,                    /* this subexpression's code */      md->start_code,                    /* this subexpression's code */

Legend:
Removed from v.427  
changed lines
  Added in v.435

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12