/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 511 by ph10, Mon Mar 29 09:25:38 2010 UTC revision 538 by ph10, Wed Jun 9 19:30:57 2010 UTC
# Line 255  enum { RM1=1, RM2, RM3, RM4, RM5, RM Line 255  enum { RM1=1, RM2, RM3, RM4, RM5, RM
255         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258         RM51,  RM52, RM53, RM54 };         RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259           RM61,  RM62 };
260    
261  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
262  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
# Line 294  argument of match(), which never changes Line 295  argument of match(), which never changes
295  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296    {\    {\
297    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
298      if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299    frame->Xwhere = rw; \    frame->Xwhere = rw; \
300    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
301    newframe->Xecode = rb;\    newframe->Xecode = rb;\
# Line 314  argument of match(), which never changes Line 316  argument of match(), which never changes
316    
317  #define RRETURN(ra)\  #define RRETURN(ra)\
318    {\    {\
319    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
320    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
321    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
322    if (frame != NULL)\    if (frame != NULL)\
323      {\      {\
324      rrc = ra;\      rrc = ra;\
# Line 487  heap whenever RMATCH() does a "recursion Line 489  heap whenever RMATCH() does a "recursion
489    
490  #ifdef NO_RECURSE  #ifdef NO_RECURSE
491  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
492    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
493  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
494    
495  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 685  for (;;) Line 688  for (;;)
688      case OP_MARK:      case OP_MARK:
689      markptr = ecode + 2;      markptr = ecode + 2;
690      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
691        ims, eptrb, flags, RM51);        ims, eptrb, flags, RM55);
692    
693      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an      /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
694      argument, and we must check whether that argument matches this MARK's      argument, and we must check whether that argument matches this MARK's
695      argument. It is passed back in md->start_match_ptr (an overloading of that      argument. It is passed back in md->start_match_ptr (an overloading of that
696      variable). If it does match, we reset that variable to the current subject      variable). If it does match, we reset that variable to the current subject
697      position and return MATCH_SKIP. Otherwise, pass back the return code      position and return MATCH_SKIP. Otherwise, pass back the return code
698      unaltered. */      unaltered. */
699    
700      if (rrc == MATCH_SKIP_ARG &&      if (rrc == MATCH_SKIP_ARG &&
701          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)          strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
702        {        {
703        md->start_match_ptr = eptr;        md->start_match_ptr = eptr;
704        RRETURN(MATCH_SKIP);        RRETURN(MATCH_SKIP);
705        }        }
706    
707      if (md->mark == NULL) md->mark = markptr;      if (md->mark == NULL) md->mark = markptr;
708      RRETURN(rrc);      RRETURN(rrc);
709    
710      case OP_FAIL:      case OP_FAIL:
# Line 721  for (;;) Line 724  for (;;)
724    
725      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
726      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
727        ims, eptrb, flags, RM51);        ims, eptrb, flags, RM56);
728      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
729      md->mark = ecode + 2;      md->mark = ecode + 2;
730      RRETURN(MATCH_PRUNE);      RRETURN(MATCH_PRUNE);
# Line 735  for (;;) Line 738  for (;;)
738    
739      case OP_SKIP_ARG:      case OP_SKIP_ARG:
740      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
741        ims, eptrb, flags, RM53);        ims, eptrb, flags, RM57);
742      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
743    
744      /* Pass back the current skip name by overloading md->start_match_ptr and      /* Pass back the current skip name by overloading md->start_match_ptr and
745      returning the special MATCH_SKIP_ARG return code. This will either be      returning the special MATCH_SKIP_ARG return code. This will either be
746      caught by a matching MARK, or get to the top, where it is treated the same      caught by a matching MARK, or get to the top, where it is treated the same
747      as PRUNE. */      as PRUNE. */
748    
749      md->start_match_ptr = ecode + 2;      md->start_match_ptr = ecode + 2;
750      RRETURN(MATCH_SKIP_ARG);      RRETURN(MATCH_SKIP_ARG);
751    
752      case OP_THEN:      case OP_THEN:
753      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
754        ims, eptrb, flags, RM54);        ims, eptrb, flags, RM54);
# Line 754  for (;;) Line 757  for (;;)
757    
758      case OP_THEN_ARG:      case OP_THEN_ARG:
759      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,      RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
760        ims, eptrb, flags, RM54);        ims, eptrb, flags, RM58);
761      if (rrc != MATCH_NOMATCH) RRETURN(rrc);      if (rrc != MATCH_NOMATCH) RRETURN(rrc);
762      md->mark = ecode + 2;      md->mark = ecode + 2;
763      RRETURN(MATCH_THEN);      RRETURN(MATCH_THEN);
# Line 793  for (;;) Line 796  for (;;)
796        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
797    
798        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
799        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
800            (int)(eptr - md->start_subject);
801    
802        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
803        do        do
# Line 852  for (;;) Line 856  for (;;)
856    
857          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
858            eptrb, flags, RM48);            eptrb, flags, RM48);
859          if (rrc == MATCH_NOMATCH) md->mark = markptr;          if (rrc == MATCH_NOMATCH) md->mark = markptr;
860          RRETURN(rrc);          RRETURN(rrc);
861          }          }
862    
863        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
# Line 888  for (;;) Line 892  for (;;)
892          cb.callout_number   = ecode[LINK_SIZE+2];          cb.callout_number   = ecode[LINK_SIZE+2];
893          cb.offset_vector    = md->offset_vector;          cb.offset_vector    = md->offset_vector;
894          cb.subject          = (PCRE_SPTR)md->start_subject;          cb.subject          = (PCRE_SPTR)md->start_subject;
895          cb.subject_length   = md->end_subject - md->start_subject;          cb.subject_length   = (int)(md->end_subject - md->start_subject);
896          cb.start_match      = mstart - md->start_subject;          cb.start_match      = (int)(mstart - md->start_subject);
897          cb.current_position = eptr - md->start_subject;          cb.current_position = (int)(eptr - md->start_subject);
898          cb.pattern_position = GET(ecode, LINK_SIZE + 3);          cb.pattern_position = GET(ecode, LINK_SIZE + 3);
899          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);          cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
900          cb.capture_top      = offset_top/2;          cb.capture_top      = offset_top/2;
# Line 1116  for (;;) Line 1120  for (;;)
1120        {        {
1121        md->offset_vector[offset] =        md->offset_vector[offset] =
1122          md->offset_vector[md->offset_end - number];          md->offset_vector[md->offset_end - number];
1123        md->offset_vector[offset+1] = eptr - md->start_subject;        md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1124        if (offset_top <= offset) offset_top = offset + 2;        if (offset_top <= offset) offset_top = offset + 2;
1125        }        }
1126      ecode += 3;      ecode += 3;
# Line 1158  for (;;) Line 1162  for (;;)
1162      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
1163      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1164      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1165      MRRETURN(((op == OP_END)? MATCH_MATCH : MATCH_ACCEPT));  
1166        /* For some reason, the macros don't work properly if an expression is
1167        given as the argument to MRRETURN when the heap is in use. */
1168    
1169        rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1170        MRRETURN(rrc);
1171    
1172      /* Change option settings */      /* Change option settings */
1173    
# Line 1274  for (;;) Line 1283  for (;;)
1283        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1284        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1285        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1286        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1287        cb.start_match      = mstart - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1288        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1289        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1290        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1291        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
# Line 1553  for (;;) Line 1562  for (;;)
1562          {          {
1563          md->offset_vector[offset] =          md->offset_vector[offset] =
1564            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1565          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1566          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1567          }          }
1568    
# Line 1713  for (;;) Line 1722  for (;;)
1722  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1723        if (utf8)        if (utf8)
1724          {          {
1725            /* Get status of previous character */
1726    
1727          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1728            {            {
1729            USPTR lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1730            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1731            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1732            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1733    #ifdef SUPPORT_UCP
1734              if (md->use_ucp)
1735                {
1736                if (c == '_') prev_is_word = TRUE; else
1737                  {
1738                  int cat = UCD_CATEGORY(c);
1739                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1740                  }
1741                }
1742              else
1743    #endif
1744            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1745            }            }
1746    
1747            /* Get status of next character */
1748    
1749          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1750            {            {
1751            SCHECK_PARTIAL();            SCHECK_PARTIAL();
# Line 1729  for (;;) Line 1754  for (;;)
1754          else          else
1755            {            {
1756            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1757    #ifdef SUPPORT_UCP
1758              if (md->use_ucp)
1759                {
1760                if (c == '_') cur_is_word = TRUE; else
1761                  {
1762                  int cat = UCD_CATEGORY(c);
1763                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1764                  }
1765                }
1766              else
1767    #endif
1768            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1769            }            }
1770          }          }
1771        else        else
1772  #endif  #endif
1773    
1774        /* Not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1775          consistency with the behaviour of \w we do use it in this case. */
1776    
1777          {          {
1778            /* Get status of previous character */
1779    
1780          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1781            {            {
1782            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1783    #ifdef SUPPORT_UCP
1784              if (md->use_ucp)
1785                {
1786                c = eptr[-1];
1787                if (c == '_') prev_is_word = TRUE; else
1788                  {
1789                  int cat = UCD_CATEGORY(c);
1790                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1791                  }
1792                }
1793              else
1794    #endif
1795            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1796            }            }
1797    
1798            /* Get status of next character */
1799    
1800          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1801            {            {
1802            SCHECK_PARTIAL();            SCHECK_PARTIAL();
1803            cur_is_word = FALSE;            cur_is_word = FALSE;
1804            }            }
1805          else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          else
1806    #ifdef SUPPORT_UCP
1807            if (md->use_ucp)
1808              {
1809              c = *eptr;
1810              if (c == '_') cur_is_word = TRUE; else
1811                {
1812                int cat = UCD_CATEGORY(c);
1813                cur_is_word = (cat == ucp_L || cat == ucp_N);
1814                }
1815              }
1816            else
1817    #endif
1818            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1819          }          }
1820    
1821        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 2055  for (;;) Line 2122  for (;;)
2122               prop->chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2123               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2124            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2125           break;          break;
2126    
2127          case PT_GC:          case PT_GC:
2128          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
# Line 2072  for (;;) Line 2139  for (;;)
2139            MRRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2140          break;          break;
2141    
2142            /* These are specials */
2143    
2144            case PT_ALNUM:
2145            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2146                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2147              MRRETURN(MATCH_NOMATCH);
2148            break;
2149    
2150            case PT_SPACE:    /* Perl space */
2151            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2152                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2153                   == (op == OP_NOTPROP))
2154              MRRETURN(MATCH_NOMATCH);
2155            break;
2156    
2157            case PT_PXSPACE:  /* POSIX space */
2158            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2159                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2160                 c == CHAR_FF || c == CHAR_CR)
2161                   == (op == OP_NOTPROP))
2162              MRRETURN(MATCH_NOMATCH);
2163            break;
2164    
2165            case PT_WORD:
2166            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2167                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2168                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2169              MRRETURN(MATCH_NOMATCH);
2170            break;
2171    
2172            /* This should never occur */
2173    
2174          default:          default:
2175          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
2176          }          }
# Line 2137  for (;;) Line 2236  for (;;)
2236        referenced subpattern. */        referenced subpattern. */
2237    
2238        if (offset >= offset_top || md->offset_vector[offset] < 0)        if (offset >= offset_top || md->offset_vector[offset] < 0)
2239          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;          length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2240        else        else
2241          length = md->offset_vector[offset+1] - md->offset_vector[offset];          length = md->offset_vector[offset+1] - md->offset_vector[offset];
2242    
# Line 3488  for (;;) Line 3587  for (;;)
3587              }              }
3588            break;            break;
3589    
3590              case PT_ALNUM:
3591              for (i = 1; i <= min; i++)
3592                {
3593                if (eptr >= md->end_subject)
3594                  {
3595                  SCHECK_PARTIAL();
3596                  MRRETURN(MATCH_NOMATCH);
3597                  }
3598                GETCHARINCTEST(c, eptr);
3599                prop_category = UCD_CATEGORY(c);
3600                if ((prop_category == ucp_L || prop_category == ucp_N)
3601                       == prop_fail_result)
3602                  MRRETURN(MATCH_NOMATCH);
3603                }
3604              break;
3605    
3606              case PT_SPACE:    /* Perl space */
3607              for (i = 1; i <= min; i++)
3608                {
3609                if (eptr >= md->end_subject)
3610                  {
3611                  SCHECK_PARTIAL();
3612                  MRRETURN(MATCH_NOMATCH);
3613                  }
3614                GETCHARINCTEST(c, eptr);
3615                prop_category = UCD_CATEGORY(c);
3616                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3617                     c == CHAR_FF || c == CHAR_CR)
3618                       == prop_fail_result)
3619                  MRRETURN(MATCH_NOMATCH);
3620                }
3621              break;
3622    
3623              case PT_PXSPACE:  /* POSIX space */
3624              for (i = 1; i <= min; i++)
3625                {
3626                if (eptr >= md->end_subject)
3627                  {
3628                  SCHECK_PARTIAL();
3629                  MRRETURN(MATCH_NOMATCH);
3630                  }
3631                GETCHARINCTEST(c, eptr);
3632                prop_category = UCD_CATEGORY(c);
3633                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3634                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3635                       == prop_fail_result)
3636                  MRRETURN(MATCH_NOMATCH);
3637                }
3638              break;
3639    
3640              case PT_WORD:
3641              for (i = 1; i <= min; i++)
3642                {
3643                if (eptr >= md->end_subject)
3644                  {
3645                  SCHECK_PARTIAL();
3646                  MRRETURN(MATCH_NOMATCH);
3647                  }
3648                GETCHARINCTEST(c, eptr);
3649                prop_category = UCD_CATEGORY(c);
3650                if ((prop_category == ucp_L || prop_category == ucp_N ||
3651                     c == CHAR_UNDERSCORE)
3652                       == prop_fail_result)
3653                  MRRETURN(MATCH_NOMATCH);
3654                }
3655              break;
3656    
3657              /* This should not occur */
3658    
3659            default:            default:
3660            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
3661            }            }
# Line 4048  for (;;) Line 4216  for (;;)
4216                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4217                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4218                }                }
4219              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4220              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4221              }              }
4222            /* Control never gets here */            /* Control never gets here */
# Line 4064  for (;;) Line 4232  for (;;)
4232                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4233                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4234                }                }
4235              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4236              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4237              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4238                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4084  for (;;) Line 4252  for (;;)
4252                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4253                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4254                }                }
4255              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4256              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4257              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4258                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4102  for (;;) Line 4270  for (;;)
4270                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4271                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4272                }                }
4273              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4274              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4275              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4276                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
# Line 4120  for (;;) Line 4288  for (;;)
4288                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4289                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4290                }                }
4291              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
4292              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4293              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4294                MRRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4295              }              }
4296            /* Control never gets here */            /* Control never gets here */
4297    
4298              case PT_ALNUM:
4299              for (fi = min;; fi++)
4300                {
4301                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4302                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4303                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4304                if (eptr >= md->end_subject)
4305                  {
4306                  SCHECK_PARTIAL();
4307                  MRRETURN(MATCH_NOMATCH);
4308                  }
4309                GETCHARINCTEST(c, eptr);
4310                prop_category = UCD_CATEGORY(c);
4311                if ((prop_category == ucp_L || prop_category == ucp_N)
4312                       == prop_fail_result)
4313                  MRRETURN(MATCH_NOMATCH);
4314                }
4315              /* Control never gets here */
4316    
4317              case PT_SPACE:    /* Perl space */
4318              for (fi = min;; fi++)
4319                {
4320                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4321                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4322                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4323                if (eptr >= md->end_subject)
4324                  {
4325                  SCHECK_PARTIAL();
4326                  MRRETURN(MATCH_NOMATCH);
4327                  }
4328                GETCHARINCTEST(c, eptr);
4329                prop_category = UCD_CATEGORY(c);
4330                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4331                     c == CHAR_FF || c == CHAR_CR)
4332                       == prop_fail_result)
4333                  MRRETURN(MATCH_NOMATCH);
4334                }
4335              /* Control never gets here */
4336    
4337              case PT_PXSPACE:  /* POSIX space */
4338              for (fi = min;; fi++)
4339                {
4340                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4341                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4342                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4343                if (eptr >= md->end_subject)
4344                  {
4345                  SCHECK_PARTIAL();
4346                  MRRETURN(MATCH_NOMATCH);
4347                  }
4348                GETCHARINCTEST(c, eptr);
4349                prop_category = UCD_CATEGORY(c);
4350                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4351                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4352                       == prop_fail_result)
4353                  MRRETURN(MATCH_NOMATCH);
4354                }
4355              /* Control never gets here */
4356    
4357              case PT_WORD:
4358              for (fi = min;; fi++)
4359                {
4360                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4361                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4362                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4363                if (eptr >= md->end_subject)
4364                  {
4365                  SCHECK_PARTIAL();
4366                  MRRETURN(MATCH_NOMATCH);
4367                  }
4368                GETCHARINCTEST(c, eptr);
4369                prop_category = UCD_CATEGORY(c);
4370                if ((prop_category == ucp_L ||
4371                     prop_category == ucp_N ||
4372                     c == CHAR_UNDERSCORE)
4373                       == prop_fail_result)
4374                  MRRETURN(MATCH_NOMATCH);
4375                }
4376              /* Control never gets here */
4377    
4378              /* This should never occur */
4379    
4380            default:            default:
4381            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
4382            }            }
# Line 4473  for (;;) Line 4723  for (;;)
4723                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4724                break;                break;
4725                }                }
4726              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4727              if (prop_fail_result) break;              if (prop_fail_result) break;
4728              eptr+= len;              eptr+= len;
4729              }              }
# Line 4488  for (;;) Line 4738  for (;;)
4738                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4739                break;                break;
4740                }                }
4741              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4742              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4743              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4744                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 4507  for (;;) Line 4757  for (;;)
4757                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4758                break;                break;
4759                }                }
4760              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4761              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
4762              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4763                break;                break;
# Line 4524  for (;;) Line 4774  for (;;)
4774                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4775                break;                break;
4776                }                }
4777              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4778              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
4779              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4780                break;                break;
# Line 4541  for (;;) Line 4791  for (;;)
4791                SCHECK_PARTIAL();                SCHECK_PARTIAL();
4792                break;                break;
4793                }                }
4794              GETCHARLEN(c, eptr, len);              GETCHARLENTEST(c, eptr, len);
4795              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
4796              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4797                break;                break;
4798              eptr+= len;              eptr+= len;
4799              }              }
4800            break;            break;
4801    
4802              case PT_ALNUM:
4803              for (i = min; i < max; i++)
4804                {
4805                int len = 1;
4806                if (eptr >= md->end_subject)
4807                  {
4808                  SCHECK_PARTIAL();
4809                  break;
4810                  }
4811                GETCHARLENTEST(c, eptr, len);
4812                prop_category = UCD_CATEGORY(c);
4813                if ((prop_category == ucp_L || prop_category == ucp_N)
4814                     == prop_fail_result)
4815                  break;
4816                eptr+= len;
4817                }
4818              break;
4819    
4820              case PT_SPACE:    /* Perl space */
4821              for (i = min; i < max; i++)
4822                {
4823                int len = 1;
4824                if (eptr >= md->end_subject)
4825                  {
4826                  SCHECK_PARTIAL();
4827                  break;
4828                  }
4829                GETCHARLENTEST(c, eptr, len);
4830                prop_category = UCD_CATEGORY(c);
4831                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4832                     c == CHAR_FF || c == CHAR_CR)
4833                     == prop_fail_result)
4834                  break;
4835                eptr+= len;
4836                }
4837              break;
4838    
4839              case PT_PXSPACE:  /* POSIX space */
4840              for (i = min; i < max; i++)
4841                {
4842                int len = 1;
4843                if (eptr >= md->end_subject)
4844                  {
4845                  SCHECK_PARTIAL();
4846                  break;
4847                  }
4848                GETCHARLENTEST(c, eptr, len);
4849                prop_category = UCD_CATEGORY(c);
4850                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4851                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4852                     == prop_fail_result)
4853                  break;
4854                eptr+= len;
4855                }
4856              break;
4857    
4858              case PT_WORD:
4859              for (i = min; i < max; i++)
4860                {
4861                int len = 1;
4862                if (eptr >= md->end_subject)
4863                  {
4864                  SCHECK_PARTIAL();
4865                  break;
4866                  }
4867                GETCHARLENTEST(c, eptr, len);
4868                prop_category = UCD_CATEGORY(c);
4869                if ((prop_category == ucp_L || prop_category == ucp_N ||
4870                     c == CHAR_UNDERSCORE) == prop_fail_result)
4871                  break;
4872                eptr+= len;
4873                }
4874              break;
4875    
4876              default:
4877              RRETURN(PCRE_ERROR_INTERNAL);
4878            }            }
4879    
4880          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
# Line 5133  switch (frame->Xwhere) Line 5460  switch (frame->Xwhere)
5460    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
5461    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
5462    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
5463    LBL(53) LBL(54)    LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58)
5464  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5465    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
5466    LBL(32) LBL(34) LBL(42) LBL(46)    LBL(32) LBL(34) LBL(42) LBL(46)
5467  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
5468    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)    LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
5469      LBL(59) LBL(60) LBL(61) LBL(62)
5470  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
5471  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
5472    default:    default:
# Line 5342  end_subject = md->end_subject; Line 5670  end_subject = md->end_subject;
5670    
5671  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
5672  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
5673    md->use_ucp = (re->options & PCRE_UCP) != 0;
5674  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
5675    
5676  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
# Line 5630  for(;;) Line 5959  for(;;)
5959        while (start_match < end_subject)        while (start_match < end_subject)
5960          {          {
5961          register unsigned int c = *start_match;          register unsigned int c = *start_match;
5962          if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;          if ((start_bits[c/8] & (1 << (c&7))) == 0)
5963            else break;            {
5964              start_match++;
5965    #ifdef SUPPORT_UTF8
5966              if (utf8)
5967                while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5968                  start_match++;
5969    #endif
5970              }
5971            else break;
5972          }          }
5973        }        }
5974      }   /* Starting optimizations */      }   /* Starting optimizations */
# Line 5722  for(;;) Line 6059  for(;;)
6059    
6060    /* OK, we can now run the match. If "hitend" is set afterwards, remember the    /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6061    first starting point for which a partial match was found. */    first starting point for which a partial match was found. */
6062    
6063    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
6064    md->start_used_ptr = start_match;    md->start_used_ptr = start_match;
6065    md->match_call_count = 0;    md->match_call_count = 0;
# Line 5732  for(;;) Line 6069  for(;;)
6069    
6070    switch(rc)    switch(rc)
6071      {      {
6072      /* NOMATCH and PRUNE advance by one character. If MATCH_SKIP_ARG reaches      /* NOMATCH and PRUNE advance by one character. If MATCH_SKIP_ARG reaches
6073      this level it means that a MARK that matched the SKIP's arg was not found.      this level it means that a MARK that matched the SKIP's arg was not found.
6074      We treat this as NOMATCH. THEN at this level acts exactly like PRUNE. */      We treat this as NOMATCH. THEN at this level acts exactly like PRUNE. */
6075    
6076      case MATCH_NOMATCH:      case MATCH_NOMATCH:
6077      case MATCH_PRUNE:      case MATCH_PRUNE:
6078      case MATCH_SKIP_ARG:      case MATCH_SKIP_ARG:
6079      case MATCH_THEN:      case MATCH_THEN:
6080      new_start_match = start_match + 1;      new_start_match = start_match + 1;
6081  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 5850  if (rc == MATCH_MATCH || rc == MATCH_ACC Line 6187  if (rc == MATCH_MATCH || rc == MATCH_ACC
6187    
6188    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
6189      {      {
6190      offsets[0] = md->start_match_ptr - md->start_subject;      offsets[0] = (int)(md->start_match_ptr - md->start_subject);
6191      offsets[1] = md->end_match_ptr - md->start_subject;      offsets[1] = (int)(md->end_match_ptr - md->start_subject);
6192      }      }
6193    
6194    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
# Line 5866  if (using_temporary_offsets) Line 6203  if (using_temporary_offsets)
6203    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
6204    (pcre_free)(md->offset_vector);    (pcre_free)(md->offset_vector);
6205    }    }
6206    
6207  /* For anything other than nomatch or partial match, just return the code. */  /* For anything other than nomatch or partial match, just return the code. */
6208    
6209  if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)  if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
6210    {    {
6211    DPRINTF((">>>> error: returning %d\n", rc));    DPRINTF((">>>> error: returning %d\n", rc));
6212    return rc;    return rc;
6213    }    }
6214    
6215  /* Handle partial matches - disable any mark data */  /* Handle partial matches - disable any mark data */
6216    
6217  if (start_partial != NULL)  if (start_partial != NULL)
6218    {    {
6219    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
6220    md->mark = NULL;    md->mark = NULL;
6221    if (offsetcount > 1)    if (offsetcount > 1)
6222      {      {
6223      offsets[0] = start_partial - (USPTR)subject;      offsets[0] = (int)(start_partial - (USPTR)subject);
6224      offsets[1] = end_subject - (USPTR)subject;      offsets[1] = (int)(end_subject - (USPTR)subject);
6225      }      }
6226    rc = PCRE_ERROR_PARTIAL;    rc = PCRE_ERROR_PARTIAL;
6227    }    }
6228    
6229  /* This is the classic nomatch case */  /* This is the classic nomatch case */
6230    
6231  else  else
6232    {    {
6233    DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));    DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
6234    rc = PCRE_ERROR_NOMATCH;    rc = PCRE_ERROR_NOMATCH;
6235    }    }
6236    
6237  /* Return the MARK data if it has been requested. */  /* Return the MARK data if it has been requested. */
6238    
6239  RETURN_MARK:  RETURN_MARK:
6240    
6241  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)  if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
6242    *(extra_data->mark) = (unsigned char *)(md->mark);    *(extra_data->mark) = (unsigned char *)(md->mark);
6243  return rc;  return rc;
6244  }  }
6245    
6246  /* End of pcre_exec.c */  /* End of pcre_exec.c */

Legend:
Removed from v.511  
changed lines
  Added in v.538

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12