/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 442 by ph10, Fri Sep 11 10:21:02 2009 UTC revision 443 by ph10, Sun Sep 13 16:00:08 2009 UTC
# Line 663  for (;;) Line 663  for (;;)
663    {    {
664    minimize = possessive = FALSE;    minimize = possessive = FALSE;
665    op = *ecode;    op = *ecode;
666    
667    switch(op)    switch(op)
668      {      {
669      case OP_FAIL:      case OP_FAIL:
# Line 934  for (;;) Line 934  for (;;)
934      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
935      the subject. In both cases, backtracking will then try other alternatives,      the subject. In both cases, backtracking will then try other alternatives,
936      if any. */      if any. */
937    
938      if (eptr == mstart &&      if (eptr == mstart &&
939          (md->notempty ||          (md->notempty ||
940            (md->notempty_atstart &&            (md->notempty_atstart &&
941              mstart == md->start_subject + md->start_offset)))              mstart == md->start_subject + md->start_offset)))
942        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
943    
944      /* Otherwise, we have a match. */      /* Otherwise, we have a match. */
945    
946      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
# Line 1478  for (;;) Line 1478  for (;;)
1478    
1479        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1480        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1481        be "non-word" characters. Remember the earliest consulted character for        be "non-word" characters. Remember the earliest consulted character for
1482        partial matching. */        partial matching. */
1483    
1484  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1488  for (;;) Line 1488  for (;;)
1488            {            {
1489            USPTR lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1490            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1491            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;            if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1492            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1493            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1494            }            }
1495          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1496            {            {
1497            SCHECK_PARTIAL();            SCHECK_PARTIAL();
1498            cur_is_word = FALSE;            cur_is_word = FALSE;
1499            }            }
1500          else          else
1501            {            {
# Line 1511  for (;;) Line 1511  for (;;)
1511          {          {
1512          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1513            {            {
1514            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1515            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1516            }            }
1517          if (eptr >= md->end_subject)          if (eptr >= md->end_subject)
1518            {            {
1519            SCHECK_PARTIAL();            SCHECK_PARTIAL();
1520            cur_is_word = FALSE;            cur_is_word = FALSE;
1521            }            }
1522          else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);          else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1523          }          }
# Line 1537  for (;;) Line 1537  for (;;)
1537      /* Fall through */      /* Fall through */
1538    
1539      case OP_ALLANY:      case OP_ALLANY:
1540      if (eptr++ >= md->end_subject)      if (eptr++ >= md->end_subject)
1541        {        {
1542        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1543        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1544        }        }
1545      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1546      ecode++;      ecode++;
1547      break;      break;
# Line 1550  for (;;) Line 1550  for (;;)
1550      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1551    
1552      case OP_ANYBYTE:      case OP_ANYBYTE:
1553      if (eptr++ >= md->end_subject)      if (eptr++ >= md->end_subject)
1554        {        {
1555        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1556        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1557        }        }
1558      ecode++;      ecode++;
1559      break;      break;
1560    
1561      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1562      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
1563        {        {
1564        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1565        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1566        }        }
1567      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1568      if (      if (
1569  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1576  for (;;) Line 1576  for (;;)
1576      break;      break;
1577    
1578      case OP_DIGIT:      case OP_DIGIT:
1579      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
1580        {        {
1581        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1582        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1583        }        }
1584      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1585      if (      if (
1586  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1593  for (;;) Line 1593  for (;;)
1593      break;      break;
1594    
1595      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1596      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
1597        {        {
1598        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1599        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1600        }        }
1601      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1602      if (      if (
1603  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1610  for (;;) Line 1610  for (;;)
1610      break;      break;
1611    
1612      case OP_WHITESPACE:      case OP_WHITESPACE:
1613      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
1614        {        {
1615        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1616        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1617        }        }
1618      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1619      if (      if (
1620  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1627  for (;;) Line 1627  for (;;)
1627      break;      break;
1628    
1629      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1630      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
1631        {        {
1632        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1633        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1634        }        }
1635      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1636      if (      if (
1637  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1644  for (;;) Line 1644  for (;;)
1644      break;      break;
1645    
1646      case OP_WORDCHAR:      case OP_WORDCHAR:
1647      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
1648        {        {
1649        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1650        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1651        }        }
1652      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1653      if (      if (
1654  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1661  for (;;) Line 1661  for (;;)
1661      break;      break;
1662    
1663      case OP_ANYNL:      case OP_ANYNL:
1664      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
1665        {        {
1666        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1667        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1668        }        }
1669      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1670      switch(c)      switch(c)
1671        {        {
# Line 1689  for (;;) Line 1689  for (;;)
1689      break;      break;
1690    
1691      case OP_NOT_HSPACE:      case OP_NOT_HSPACE:
1692      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
1693        {        {
1694        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1695        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1696        }        }
1697      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1698      switch(c)      switch(c)
1699        {        {
# Line 1723  for (;;) Line 1723  for (;;)
1723      break;      break;
1724    
1725      case OP_HSPACE:      case OP_HSPACE:
1726      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
1727        {        {
1728        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1729        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1730        }        }
1731      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1732      switch(c)      switch(c)
1733        {        {
# Line 1757  for (;;) Line 1757  for (;;)
1757      break;      break;
1758    
1759      case OP_NOT_VSPACE:      case OP_NOT_VSPACE:
1760      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
1761        {        {
1762        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1763        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1764        }        }
1765      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1766      switch(c)      switch(c)
1767        {        {
# Line 1779  for (;;) Line 1779  for (;;)
1779      break;      break;
1780    
1781      case OP_VSPACE:      case OP_VSPACE:
1782      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
1783        {        {
1784        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1785        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1786        }        }
1787      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1788      switch(c)      switch(c)
1789        {        {
# Line 1806  for (;;) Line 1806  for (;;)
1806    
1807      case OP_PROP:      case OP_PROP:
1808      case OP_NOTPROP:      case OP_NOTPROP:
1809      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
1810        {        {
1811        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1812        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1813        }        }
1814      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1815        {        {
1816        const ucd_record *prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
# Line 1855  for (;;) Line 1855  for (;;)
1855      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
1856    
1857      case OP_EXTUNI:      case OP_EXTUNI:
1858      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
1859        {        {
1860        SCHECK_PARTIAL();        SCHECK_PARTIAL();
1861        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
1862        }        }
1863      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1864        {        {
1865        int category = UCD_CATEGORY(c);        int category = UCD_CATEGORY(c);
# Line 1939  for (;;) Line 1939  for (;;)
1939          break;          break;
1940    
1941          default:               /* No repeat follows */          default:               /* No repeat follows */
1942          if (!match_ref(offset, eptr, length, md, ims))          if (!match_ref(offset, eptr, length, md, ims))
1943            {            {
1944            CHECK_PARTIAL();            CHECK_PARTIAL();
1945            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1946            }            }
1947          eptr += length;          eptr += length;
1948          continue;              /* With the main loop */          continue;              /* With the main loop */
1949          }          }
1950    
1951        /* If the length of the reference is zero, just continue with the        /* If the length of the reference is zero, just continue with the
1952        main loop. */        main loop. */
1953    
1954        if (length == 0) continue;        if (length == 0) continue;
1955    
1956        /* First, ensure the minimum number of matches are present. We get back        /* First, ensure the minimum number of matches are present. We get back
# Line 2326  for (;;) Line 2326  for (;;)
2326        length = 1;        length = 1;
2327        ecode++;        ecode++;
2328        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2329        if (length > md->end_subject - eptr)        if (length > md->end_subject - eptr)
2330          {          {
2331          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2332          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2333          }          }
2334        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2335        }        }
2336      else      else
# Line 2338  for (;;) Line 2338  for (;;)
2338    
2339      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2340        {        {
2341        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
2342          {          {
2343          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2344          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2345          }          }
2346        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2347        ecode += 2;        ecode += 2;
2348        }        }
# Line 2358  for (;;) Line 2358  for (;;)
2358        ecode++;        ecode++;
2359        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2360    
2361        if (length > md->end_subject - eptr)        if (length > md->end_subject - eptr)
2362          {          {
2363          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */          CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2364          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2365          }          }
2366    
2367        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2368        can use the fast lookup table. */        can use the fast lookup table. */
# Line 2397  for (;;) Line 2397  for (;;)
2397    
2398      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2399        {        {
2400        if (md->end_subject - eptr < 1)        if (md->end_subject - eptr < 1)
2401          {          {
2402          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */          SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2403          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2404          }          }
2405        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2406        ecode += 2;        ecode += 2;
2407        }        }
# Line 2455  for (;;) Line 2455  for (;;)
2455      case OP_MINQUERY:      case OP_MINQUERY:
2456      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2457      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2458    
2459      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2460      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2461      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
# Line 2568  for (;;) Line 2568  for (;;)
2568      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2569    
2570      fc = *ecode++;      fc = *ecode++;
2571    
2572      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2573      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
2574      caseful cases, for speed, since matching characters is likely to be quite      caseful cases, for speed, since matching characters is likely to be quite
# Line 2645  for (;;) Line 2645  for (;;)
2645            }            }
2646          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2647          }          }
2648    
2649        if (min == max) continue;        if (min == max) continue;
2650    
2651        if (minimize)        if (minimize)
2652          {          {
2653          for (fi = min;; fi++)          for (fi = min;; fi++)
# Line 2673  for (;;) Line 2673  for (;;)
2673            eptr++;            eptr++;
2674            }            }
2675          if (possessive) continue;          if (possessive) continue;
2676    
2677          while (eptr >= pp)          while (eptr >= pp)
2678            {            {
2679            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
# Line 2689  for (;;) Line 2689  for (;;)
2689      checking can be multibyte. */      checking can be multibyte. */
2690    
2691      case OP_NOT:      case OP_NOT:
2692      if (eptr >= md->end_subject)      if (eptr >= md->end_subject)
2693        {        {
2694        SCHECK_PARTIAL();        SCHECK_PARTIAL();
2695        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
2696        }        }
2697      ecode++;      ecode++;
2698      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2699      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 3539  for (;;) Line 3539  for (;;)
3539          break;          break;
3540    
3541          case OP_ALLANY:          case OP_ALLANY:
3542          if (eptr > md->end_subject - min)          if (eptr > md->end_subject - min)
3543            {            {
3544            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3545            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3546            }            }
3547          eptr += min;          eptr += min;
3548          break;          break;
3549    
3550          case OP_ANYBYTE:          case OP_ANYBYTE:
3551          if (eptr > md->end_subject - min)          if (eptr > md->end_subject - min)
3552            {            {
3553            SCHECK_PARTIAL();            SCHECK_PARTIAL();
3554            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
3555            }            }
3556          eptr += min;          eptr += min;
3557          break;          break;
3558    
# Line 5293  for(;;) Line 5293  for(;;)
5293    first starting point for which a partial match was found. */    first starting point for which a partial match was found. */
5294    
5295    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
5296    md->start_used_ptr = start_match;    md->start_used_ptr = start_match;
5297    md->match_call_count = 0;    md->match_call_count = 0;
5298    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
5299    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;    if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;

Legend:
Removed from v.442  
changed lines
  Added in v.443

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12