/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 653 by ph10, Sun Jul 31 17:02:18 2011 UTC revision 654 by ph10, Tue Aug 2 11:00:40 2011 UTC
# Line 540  else Line 540  else
540      {      {
541      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
542        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
543          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)?          *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)?
544          2:0);          2:0);
545      do      do
546        {        {
# Line 621  for (;;) Line 621  for (;;)
621    for (i = 0; i < active_count; i++)    for (i = 0; i < active_count; i++)
622      {      {
623      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
624      BOOL caseless = FALSE;      BOOL caseless = FALSE;
625      const uschar *code;      const uschar *code;
626      int state_offset = current_state->offset;      int state_offset = current_state->offset;
627      int count, codevalue, rrc;      int count, codevalue, rrc;
# Line 738  for (;;) Line 738  for (;;)
738    
739  /* ========================================================================== */  /* ========================================================================== */
740        /* Reached a closing bracket. If not at the end of the pattern, carry        /* Reached a closing bracket. If not at the end of the pattern, carry
741        on with the next opcode. For repeating opcodes, also add the repeat        on with the next opcode. For repeating opcodes, also add the repeat
742        state. Note that KETRPOS will always be encountered at the end of the        state. Note that KETRPOS will always be encountered at the end of the
743        subpattern, because the possessive subpattern repeats are always handled        subpattern, because the possessive subpattern repeats are always handled
744        using recursive calls. Thus, it never adds any new states.        using recursive calls. Thus, it never adds any new states.
745    
746        At the end of the (sub)pattern, unless we have an empty string and        At the end of the (sub)pattern, unless we have an empty string and
747        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
748        start of the subject, save the match data, shifting up all previous        start of the subject, save the match data, shifting up all previous
# Line 751  for (;;) Line 751  for (;;)
751        case OP_KET:        case OP_KET:
752        case OP_KETRMIN:        case OP_KETRMIN:
753        case OP_KETRMAX:        case OP_KETRMAX:
754        case OP_KETRPOS:        case OP_KETRPOS:
755        if (code != end_code)        if (code != end_code)
756          {          {
757          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
# Line 2179  for (;;) Line 2179  for (;;)
2179        checking (c) can be multibyte. */        checking (c) can be multibyte. */
2180    
2181        case OP_NOTI:        case OP_NOTI:
2182        if (clen > 0 && c != d && c != fcc[d])        if (clen > 0 && c != d && c != fcc[d])
2183          { ADD_NEW(state_offset + dlen + 1, 0); }          { ADD_NEW(state_offset + dlen + 1, 0); }
2184        break;        break;
2185    
# Line 2192  for (;;) Line 2192  for (;;)
2192        case OP_NOTPOSPLUSI:        case OP_NOTPOSPLUSI:
2193        caseless = TRUE;        caseless = TRUE;
2194        codevalue -= OP_STARI - OP_STAR;        codevalue -= OP_STARI - OP_STAR;
2195    
2196        /* Fall through */        /* Fall through */
2197        case OP_PLUS:        case OP_PLUS:
2198        case OP_MINPLUS:        case OP_MINPLUS:
# Line 2560  for (;;) Line 2560  for (;;)
2560              cb.capture_top      = 1;              cb.capture_top      = 1;
2561              cb.capture_last     = -1;              cb.capture_last     = -1;
2562              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2563              cb.mark             = NULL;   /* No (*MARK) support */              cb.mark             = NULL;   /* No (*MARK) support */
2564              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
2565              }              }
2566            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
# Line 2587  for (;;) Line 2587  for (;;)
2587            {            {
2588            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2589            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2590            if (md->recursive != NULL)            if (md->recursive != NULL)
2591              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2592            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2593            }            }
# Line 2626  for (;;) Line 2626  for (;;)
2626        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2627        case OP_RECURSE:        case OP_RECURSE:
2628          {          {
2629          dfa_recursion_info *ri;          dfa_recursion_info *ri;
2630          int local_offsets[1000];          int local_offsets[1000];
2631          int local_workspace[1000];          int local_workspace[1000];
2632          const uschar *callpat = start_code + GET(code, 1);          const uschar *callpat = start_code + GET(code, 1);
2633          int recno = (callpat == md->start_code)? 0 :          int recno = (callpat == md->start_code)? 0 :
2634            GET2(callpat, 1 + LINK_SIZE);            GET2(callpat, 1 + LINK_SIZE);
2635          int rc;          int rc;
2636    
2637          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2638    
2639          /* Check for repeating a recursion without advancing the subject          /* Check for repeating a recursion without advancing the subject
2640          pointer. This should catch convoluted mutual recursions. (Some simple          pointer. This should catch convoluted mutual recursions. (Some simple
2641          cases are caught at compile time.) */          cases are caught at compile time.) */
   
         for (ri = md->recursive; ri != NULL; ri = ri->prevrec)  
           if (recno == ri->group_num && ptr == ri->subject_position)  
             return PCRE_ERROR_RECURSELOOP;  
2642    
2643          /* Remember this recursion and where we started it so as to          for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2644              if (recno == ri->group_num && ptr == ri->subject_position)
2645                return PCRE_ERROR_RECURSELOOP;
2646    
2647            /* Remember this recursion and where we started it so as to
2648          catch infinite loops. */          catch infinite loops. */
2649    
2650          new_recursive.group_num = recno;          new_recursive.group_num = recno;
2651          new_recursive.subject_position = ptr;          new_recursive.subject_position = ptr;
2652          new_recursive.prevrec = md->recursive;          new_recursive.prevrec = md->recursive;
2653          md->recursive = &new_recursive;          md->recursive = &new_recursive;
2654    
2655          rc = internal_dfa_exec(          rc = internal_dfa_exec(
2656            md,                                   /* fixed match data */            md,                                   /* fixed match data */
# Line 2665  for (;;) Line 2665  for (;;)
2665    
2666          md->recursive = new_recursive.prevrec;  /* Done this recursion */          md->recursive = new_recursive.prevrec;  /* Done this recursion */
2667    
2668          DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,          DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2669            rc));            rc));
2670    
2671          /* Ran out of internal offsets */          /* Ran out of internal offsets */
# Line 2703  for (;;) Line 2703  for (;;)
2703        case OP_SBRAPOS:        case OP_SBRAPOS:
2704        case OP_CBRAPOS:        case OP_CBRAPOS:
2705        case OP_SCBRAPOS:        case OP_SCBRAPOS:
2706        case OP_BRAPOSZERO:        case OP_BRAPOSZERO:
2707          {          {
2708          int charcount, matched_count;          int charcount, matched_count;
2709          const uschar *local_ptr = ptr;          const uschar *local_ptr = ptr;
2710          BOOL allow_zero;          BOOL allow_zero;
2711    
2712          if (codevalue == OP_BRAPOSZERO)          if (codevalue == OP_BRAPOSZERO)
2713            {            {
2714            allow_zero = TRUE;            allow_zero = TRUE;
2715            codevalue = *(++code);  /* Codevalue will be one of above BRAs */            codevalue = *(++code);  /* Codevalue will be one of above BRAs */
2716            }            }
2717          else allow_zero = FALSE;          else allow_zero = FALSE;
2718    
2719          /* Loop to match the subpattern as many times as possible as if it were          /* Loop to match the subpattern as many times as possible as if it were
2720          a complete pattern. */          a complete pattern. */
2721    
2722          for (matched_count = 0;; matched_count++)          for (matched_count = 0;; matched_count++)
2723            {            {
2724            int local_offsets[2];            int local_offsets[2];
2725            int local_workspace[1000];            int local_workspace[1000];
2726    
2727            int rc = internal_dfa_exec(            int rc = internal_dfa_exec(
2728              md,                                   /* fixed match data */              md,                                   /* fixed match data */
2729              code,                                 /* this subexpression's code */              code,                                 /* this subexpression's code */
# Line 2734  for (;;) Line 2734  for (;;)
2734              local_workspace,                      /* workspace vector */              local_workspace,                      /* workspace vector */
2735              sizeof(local_workspace)/sizeof(int),  /* size of same */              sizeof(local_workspace)/sizeof(int),  /* size of same */
2736              rlevel);                              /* function recursion level */              rlevel);                              /* function recursion level */
2737    
2738            /* Failed to match */            /* Failed to match */
2739    
2740            if (rc < 0)            if (rc < 0)
2741              {              {
2742              if (rc != PCRE_ERROR_NOMATCH) return rc;              if (rc != PCRE_ERROR_NOMATCH) return rc;
2743              break;              break;
2744              }              }
2745    
2746            /* Matched: break the loop if zero characters matched. */            /* Matched: break the loop if zero characters matched. */
2747    
2748            charcount = local_offsets[1] - local_offsets[0];            charcount = local_offsets[1] - local_offsets[0];
2749            if (charcount == 0) break;            if (charcount == 0) break;
2750            local_ptr += charcount;    /* Advance temporary position ptr */            local_ptr += charcount;    /* Advance temporary position ptr */
2751            }            }
2752    
2753          /* At this point we have matched the subpattern matched_count          /* At this point we have matched the subpattern matched_count
2754          times, and local_ptr is pointing to the character after the end of the          times, and local_ptr is pointing to the character after the end of the
2755          last match. */          last match. */
2756    
2757          if (matched_count > 0 || allow_zero)          if (matched_count > 0 || allow_zero)
2758            {            {
2759            const uschar *end_subpattern = code;            const uschar *end_subpattern = code;
2760            int next_state_offset;            int next_state_offset;
2761    
2762            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
2763              while (*end_subpattern == OP_ALT);              while (*end_subpattern == OP_ALT);
2764            next_state_offset =            next_state_offset =
# Line 2779  for (;;) Line 2779  for (;;)
2779              {              {
2780              const uschar *p = ptr;              const uschar *p = ptr;
2781              const uschar *pp = local_ptr;              const uschar *pp = local_ptr;
2782              charcount = pp - p;              charcount = pp - p;
2783              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
2784              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2785              }              }
2786            }            }
2787          }          }
2788        break;        break;
2789    
2790        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2791        case OP_ONCE:        case OP_ONCE:
2792          {          {
# Line 2892  for (;;) Line 2892  for (;;)
2892          cb.capture_top      = 1;          cb.capture_top      = 1;
2893          cb.capture_last     = -1;          cb.capture_last     = -1;
2894          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
2895          cb.mark             = NULL;   /* No (*MARK) support */          cb.mark             = NULL;   /* No (*MARK) support */
2896          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
2897          }          }
2898        if (rrc == 0)        if (rrc == 0)
# Line 3143  back the character offset. */ Line 3143  back the character offset. */
3143  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3144  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3145    {    {
3146    int erroroffset;    int erroroffset;
3147    int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);    int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);
3148    if (errorcode != 0)    if (errorcode != 0)
3149      {      {
# Line 3151  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3151  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3151        {        {
3152        offsets[0] = erroroffset;        offsets[0] = erroroffset;
3153        offsets[1] = errorcode;        offsets[1] = errorcode;
3154        }        }
3155      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?      return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
3156        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3157      }      }
3158    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
3159          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)
3160      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
3161    }    }
3162  #endif  #endif
# Line 3395  for (;;) Line 3395  for (;;)
3395    /* OK, now we can do the business */    /* OK, now we can do the business */
3396    
3397    md->start_used_ptr = current_subject;    md->start_used_ptr = current_subject;
3398    md->recursive = NULL;    md->recursive = NULL;
3399    
3400    rc = internal_dfa_exec(    rc = internal_dfa_exec(
3401      md,                                /* fixed match data */      md,                                /* fixed match data */

Legend:
Removed from v.653  
changed lines
  Added in v.654

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12