/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 606 by ph10, Mon Jun 6 17:46:22 2011 UTC revision 614 by ph10, Sat Jul 9 10:48:16 2011 UTC
# Line 276  enum { RM1=1, RM2, RM3, RM4, RM5, RM Line 276  enum { RM1=1, RM2, RM3, RM4, RM5, RM
276         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
277         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
278         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
279         RM61,  RM62, RM63, RM64 };         RM61,  RM62, RM63};
280    
281  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
282  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 847  for (;;) Line 847  for (;;)
847          if (rrc != MATCH_NOMATCH &&          if (rrc != MATCH_NOMATCH &&
848              (rrc != MATCH_THEN || md->start_match_ptr != ecode))              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
849            RRETURN(rrc);            RRETURN(rrc);
850    
851            /* If md->end_offset_top is greater than offset_top, it means that the
852            branch we have just failed to match did manage to match some capturing
853            parentheses within an atomic group or an assertion. Although offset_top
854            reverts to its original value at this level, we must unset the captured
855            values in case a later match sets a higher capturing number. Example:
856            matching /((?>(a))b|(a)c)/ against "ac". This captures 3, but we need
857            to ensure that 2 - which was captured in the atomic matching - is
858            unset. */
859    
860            if (md->end_offset_top > offset_top)
861              {
862              register int *iptr = md->offset_vector + offset_top;
863              register int *iend = md->offset_vector + md->end_offset_top;
864              while (iptr < iend) *iptr++ = -1;
865              }
866    
867          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
868          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
869          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
# Line 858  for (;;) Line 875  for (;;)
875        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
876        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
877    
878        if (rrc != MATCH_THEN) md->mark = markptr;        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
879        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
880        }        }
881    
# Line 875  for (;;) Line 892  for (;;)
892    
893      /* Non-capturing bracket, except for possessive with unlimited repeat. Loop      /* Non-capturing bracket, except for possessive with unlimited repeat. Loop
894      for all the alternatives. When we get to the final alternative within the      for all the alternatives. When we get to the final alternative within the
895      brackets, we would return the result of a recursive call to match()      brackets, we used to return the result of a recursive call to match()
896      whatever happened. We can reduce stack usage by turning this into a tail      whatever happened so it was possible to reduce stack usage by turning this
897      recursion, except in the case of a possibly empty group.*/      into a tail recursion, except in the case of a possibly empty group.
898        However, now that there is the possiblity of (*THEN) occurring in the final
899        alternative, this optimization is no longer possible. */
900    
901      case OP_BRA:      case OP_BRA:
902      case OP_SBRA:      case OP_SBRA:
903      DPRINTF(("start non-capturing bracket\n"));      DPRINTF(("start non-capturing bracket\n"));
904      for (;;)      for (;;)
905        {        {
       if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */  
         {  
         if (op >= OP_SBRA)   /* Possibly empty group */  
           {  
           md->match_function_type = MATCH_CBEGROUP;  
           RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,  
             RM48);  
           if (rrc == MATCH_NOMATCH) md->mark = markptr;  
           RRETURN(rrc);  
           }  
         /* Not a possibly empty group; use tail recursion */  
         ecode += _pcre_OP_lengths[*ecode];  
         DPRINTF(("bracket 0 tail recursion\n"));  
         goto TAIL_RECURSE;  
         }  
   
       /* For non-final alternatives, continue the loop for a NOMATCH result;  
       otherwise return. */  
   
906        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
907        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
908          RM2);          RM2);
909        if (rrc != MATCH_NOMATCH &&        if (rrc != MATCH_NOMATCH &&
910            (rrc != MATCH_THEN || md->start_match_ptr != ecode))            (rrc != MATCH_THEN || md->start_match_ptr != ecode))
911          RRETURN(rrc);          RRETURN(rrc);
912    
913          /* See explanatory comment above under OP_CBRA. */
914    
915          if (md->end_offset_top > offset_top)
916            {
917            register int *iptr = md->offset_vector + offset_top;
918            register int *iend = md->offset_vector + md->end_offset_top;
919            while (iptr < iend) *iptr++ = -1;
920            }
921    
922        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
923          if (*ecode != OP_ALT) break;
924        }        }
925      /* Control never reaches here. */  
926        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
927        RRETURN(MATCH_NOMATCH);
928    
929      /* Handle possessive capturing brackets with an unlimited repeat. We come      /* Handle possessive capturing brackets with an unlimited repeat. We come
930      here from BRAZERO with allow_zero set TRUE. The offset_vector values are      here from BRAZERO with allow_zero set TRUE. The offset_vector values are
# Line 976  for (;;) Line 989  for (;;)
989          if (rrc != MATCH_NOMATCH &&          if (rrc != MATCH_NOMATCH &&
990              (rrc != MATCH_THEN || md->start_match_ptr != ecode))              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
991            RRETURN(rrc);            RRETURN(rrc);
992    
993            /* See explanatory comment above under OP_CBRA. */
994    
995            if (md->end_offset_top > offset_top)
996              {
997              register int *iptr = md->offset_vector + offset_top;
998              register int *iend = md->offset_vector + md->end_offset_top;
999              while (iptr < iend) *iptr++ = -1;
1000              }
1001    
1002          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
1003          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
1004          if (*ecode != OP_ALT) break;          if (*ecode != OP_ALT) break;
1005          }          }
1006    
1007        if (!matched_once)        if (!matched_once)
1008          {          {
1009          md->offset_vector[offset] = save_offset1;          md->offset_vector[offset] = save_offset1;
# Line 988  for (;;) Line 1011  for (;;)
1011          md->offset_vector[md->offset_end - number] = save_offset3;          md->offset_vector[md->offset_end - number] = save_offset3;
1012          }          }
1013    
1014        if (rrc != MATCH_THEN) md->mark = markptr;        if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr;
1015        if (allow_zero || matched_once)        if (allow_zero || matched_once)
1016          {          {
1017          ecode += 1 + LINK_SIZE;          ecode += 1 + LINK_SIZE;
# Line 1026  for (;;) Line 1049  for (;;)
1049        {        {
1050        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;        if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1051        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
1052          eptrb, RM64);          eptrb, RM48);
1053        if (rrc == MATCH_KETRPOS)        if (rrc == MATCH_KETRPOS)
1054          {          {
1055            offset_top = md->end_offset_top;
1056          eptr = md->end_match_ptr;          eptr = md->end_match_ptr;
1057          ecode = md->start_code + code_offset;          ecode = md->start_code + code_offset;
1058          matched_once = TRUE;          matched_once = TRUE;
# Line 1037  for (;;) Line 1061  for (;;)
1061        if (rrc != MATCH_NOMATCH &&        if (rrc != MATCH_NOMATCH &&
1062            (rrc != MATCH_THEN || md->start_match_ptr != ecode))            (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1063          RRETURN(rrc);          RRETURN(rrc);
1064    
1065          /* See explanatory comment above under OP_CBRA. */
1066    
1067          if (md->end_offset_top > offset_top)
1068            {
1069            register int *iptr = md->offset_vector + offset_top;
1070            register int *iend = md->offset_vector + md->end_offset_top;
1071            while (iptr < iend) *iptr++ = -1;
1072            }
1073    
1074        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1075        if (*ecode != OP_ALT) break;        if (*ecode != OP_ALT) break;
1076        }        }
1077    
1078      if (matched_once || allow_zero)      if (matched_once || allow_zero)
1079        {        {
1080        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
# Line 1053  for (;;) Line 1087  for (;;)
1087      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
1088      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
1089      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
1090      exactly what going to the ket would do. As there is only one branch to be      exactly what going to the ket would do. */
     obeyed, we can use tail recursion to avoid using another stack frame. */  
1091    
1092      case OP_COND:      case OP_COND:
1093      case OP_SCOND:      case OP_SCOND:
# Line 1259  for (;;) Line 1292  for (;;)
1292        }        }
1293    
1294      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1295      we can use tail recursion to avoid using another stack frame, except when      we used to use tail recursion to avoid using another stack frame, except
1296      we have an unlimited repeat of a possibly empty group. If the second      when there was unlimited repeat of a possibly empty group. However, that
1297      alternative doesn't exist, we can just plough on. */      strategy no longer works because of the possibilty of (*THEN) being
1298        encountered in the branch. A recursive call to match() is always required,
1299        unless the second alternative doesn't exist, in which case we can just
1300        plough on. */
1301    
1302      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1303        {        {
1304        ecode += 1 + LINK_SIZE;        if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP;
1305        if (op == OP_SCOND)        /* Possibly empty group */        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49);
1306          {        if (rrc == MATCH_THEN && md->start_match_ptr == ecode)
1307          md->match_function_type = MATCH_CBEGROUP;          rrc = MATCH_NOMATCH;
1308          RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);        RRETURN(rrc);
         RRETURN(rrc);  
         }  
       else goto TAIL_RECURSE;  
1309        }        }
1310      else                         /* Condition false & no alternative */      else                         /* Condition false & no alternative */
1311        {        {
# Line 1305  for (;;) Line 1338  for (;;)
1338      break;      break;
1339    
1340    
1341      /* End of the pattern, either real or forced. If we are in a top-level      /* End of the pattern, either real or forced. If we are in a recursion, we
1342      recursion, we should restore the offsets appropriately and continue from      should restore the offsets appropriately, and if it's a top-level
1343      after the call. */      recursion, continue from after the call. */
1344    
1345      case OP_ACCEPT:      case OP_ACCEPT:
1346        case OP_ASSERT_ACCEPT:
1347      case OP_END:      case OP_END:
1348      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL)
1349        {        {
1350        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
       DPRINTF(("End of pattern in a (?0) recursion\n"));  
1351        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1352        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1353          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1354        offset_top = rec->save_offset_top;        offset_top = rec->save_offset_top;
1355        ecode = rec->after_call;        if (rec->group_num == 0)
1356        break;          {
1357            ecode = rec->after_call;
1358            break;
1359            }
1360        }        }
1361    
1362      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is      /* Otherwise, if we have matched an empty string, fail if not in an
1363      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of      assertion and if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1364      the subject. In both cases, backtracking will then try other alternatives,      is set and we have matched at the start of the subject. In both cases,
1365      if any. */      backtracking will then try other alternatives, if any. */
1366    
1367      if (eptr == mstart &&      else if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
1368          (md->notempty ||          (md->notempty ||
1369            (md->notempty_atstart &&            (md->notempty_atstart &&
1370              mstart == md->start_subject + md->start_offset)))              mstart == md->start_subject + md->start_offset)))
1371        MRRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1372    
1373      /* Otherwise, we have a match. */      /* Otherwise, we have a match. */
1374    
1375      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
1376      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1377      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
# Line 1377  for (;;) Line 1413  for (;;)
1413        if (rrc != MATCH_NOMATCH &&        if (rrc != MATCH_NOMATCH &&
1414            (rrc != MATCH_THEN || md->start_match_ptr != ecode))            (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1415          RRETURN(rrc);          RRETURN(rrc);
1416    
1417          /* See explanatory comment above under OP_CBRA. */
1418    
1419          if (md->end_offset_top > offset_top)
1420            {
1421            register int *iptr = md->offset_vector + offset_top;
1422            register int *iend = md->offset_vector + md->end_offset_top;
1423            while (iptr < iend) *iptr++ = -1;
1424            }
1425    
1426        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1427        }        }
1428      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1538  for (;;) Line 1584  for (;;)
1584        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1585              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1586        new_recursive.save_offset_top = offset_top;        new_recursive.save_offset_top = offset_top;
1587    
1588        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1589        restore the offset and recursion data. */        restore the offset and recursion data. */
1590    
# Line 1604  for (;;) Line 1650  for (;;)
1650        if (rrc != MATCH_NOMATCH &&        if (rrc != MATCH_NOMATCH &&
1651            (rrc != MATCH_THEN || md->start_match_ptr != ecode))            (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1652          RRETURN(rrc);          RRETURN(rrc);
1653    
1654          /* See explanatory comment above under OP_CBRA. */
1655    
1656          if (md->end_offset_top > offset_top)
1657            {
1658            register int *iptr = md->offset_vector + offset_top;
1659            register int *iend = md->offset_vector + md->end_offset_top;
1660            while (iptr < iend) *iptr++ = -1;
1661            }
1662    
1663        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1664        }        }
1665      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1612  for (;;) Line 1668  for (;;)
1668    
1669      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1670    
1671      /* Continue as from after the assertion, updating the offsets high water      /* Continue after the group, updating the offsets high water mark, since
1672      mark, since extracts may have been taken. */      extracts may have been taken. */
1673    
1674      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1675    
# Line 5701  switch (frame->Xwhere) Line 5757  switch (frame->Xwhere)
5757    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5758    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5759    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5760    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63)
5761  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5762    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5763    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
# Line 5911  utf8 = md->utf8 = (re->options & PCRE_UT Line 5967  utf8 = md->utf8 = (re->options & PCRE_UT
5967  md->use_ucp = (re->options & PCRE_UCP) != 0;  md->use_ucp = (re->options & PCRE_UCP) != 0;
5968  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5969    
5970    /* Some options are unpacked into BOOL variables in the hope that testing
5971    them will be faster than individual option bits. */
5972    
5973  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
5974  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
5975  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
5976  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
5977  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
5978                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;                ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
5979    
5980    
5981  md->hitend = FALSE;  md->hitend = FALSE;
5982  md->mark = NULL;                        /* In case never set */  md->mark = NULL;                        /* In case never set */
5983    
# Line 6304  for(;;) Line 6365  for(;;)
6365    md->start_used_ptr = start_match;    md->start_used_ptr = start_match;
6366    md->match_call_count = 0;    md->match_call_count = 0;
6367    md->match_function_type = 0;    md->match_function_type = 0;
6368      md->end_offset_top = 0;
6369    rc = match(start_match, md->start_code, start_match, NULL, 2, md, NULL, 0);    rc = match(start_match, md->start_code, start_match, NULL, 2, md, NULL, 0);
6370    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
6371    

Legend:
Removed from v.606  
changed lines
  Added in v.614

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12