/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 366 by ph10, Mon Jul 14 15:45:32 2008 UTC revision 455 by ph10, Sat Sep 26 19:12:32 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 322  typedef struct heapframe { Line 322  typedef struct heapframe {
322    
323    /* Function arguments that may change */    /* Function arguments that may change */
324    
325    const uschar *Xeptr;    USPTR Xeptr;
326    const uschar *Xecode;    const uschar *Xecode;
327    const uschar *Xmstart;    USPTR Xmstart;
328    int Xoffset_top;    int Xoffset_top;
329    long int Xims;    long int Xims;
330    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 333  typedef struct heapframe { Line 333  typedef struct heapframe {
333    
334    /* Function local variables */    /* Function local variables */
335    
336    const uschar *Xcallpat;    USPTR Xcallpat;
337    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
338    const uschar *Xdata;    USPTR Xcharptr;
339    const uschar *Xnext;  #endif
340    const uschar *Xpp;    USPTR Xdata;
341    const uschar *Xprev;    USPTR Xnext;
342    const uschar *Xsaved_eptr;    USPTR Xpp;
343      USPTR Xprev;
344      USPTR Xsaved_eptr;
345    
346    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
347    
# Line 360  typedef struct heapframe { Line 362  typedef struct heapframe {
362    uschar Xocchars[8];    uschar Xocchars[8];
363  #endif  #endif
364    
365      int Xcodelink;
366    int Xctype;    int Xctype;
367    unsigned int Xfc;    unsigned int Xfc;
368    int Xfi;    int Xfi;
# Line 395  typedef struct heapframe { Line 398  typedef struct heapframe {
398    
399  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
400  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
401  same response.  same response. */
402    
403    /* These macros pack up tests that are used for partial matching, and which
404    appears several times in the code. We set the "hit end" flag if the pointer is
405    at the end of the subject and also past the start of the subject (i.e.
406    something has been matched). For hard partial matching, we then return
407    immediately. The second one is used when we already know we are past the end of
408    the subject. */
409    
410    #define CHECK_PARTIAL()\
411      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
412        {\
413        md->hitend = TRUE;\
414        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
415        }
416    
417    #define SCHECK_PARTIAL()\
418      if (md->partial && eptr > mstart)\
419        {\
420        md->hitend = TRUE;\
421        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
422        }
423    
424  Performance note: It might be tempting to extract commonly used fields from the  
425  md structure (e.g. utf8, end_subject) into individual variables to improve  /* Performance note: It might be tempting to extract commonly used fields from
426    the md structure (e.g. utf8, end_subject) into individual variables to improve
427  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
428  made performance worse.  made performance worse.
429    
# Line 425  Returns: MATCH_MATCH if matched Line 450  Returns: MATCH_MATCH if matched
450  */  */
451    
452  static int  static int
453  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
454    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
455    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
456  {  {
# Line 439  register unsigned int c; /* Character Line 464  register unsigned int c; /* Character
464  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
465    
466  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
467    int condcode;
468    
469  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
470  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 481  HEAP_RECURSE: Line 507  HEAP_RECURSE:
507  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
508  #endif  #endif
509  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
510    #define codelink           frame->Xcodelink
511  #define data               frame->Xdata  #define data               frame->Xdata
512  #define next               frame->Xnext  #define next               frame->Xnext
513  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 561  int oclength; Line 588  int oclength;
588  uschar occhars[8];  uschar occhars[8];
589  #endif  #endif
590    
591    int codelink;
592  int ctype;  int ctype;
593  int length;  int length;
594  int max;  int max;
# Line 636  for (;;) Line 664  for (;;)
664    minimize = possessive = FALSE;    minimize = possessive = FALSE;
665    op = *ecode;    op = *ecode;
666    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > mstart)  
     md->hitend = TRUE;  
   
667    switch(op)    switch(op)
668      {      {
669      case OP_FAIL:      case OP_FAIL:
# Line 787  for (;;) Line 807  for (;;)
807    
808      case OP_COND:      case OP_COND:
809      case OP_SCOND:      case OP_SCOND:
810      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
811    
812        /* Because of the way auto-callout works during compile, a callout item is
813        inserted between OP_COND and an assertion condition. */
814    
815        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
816          {
817          if (pcre_callout != NULL)
818            {
819            pcre_callout_block cb;
820            cb.version          = 1;   /* Version 1 of the callout block */
821            cb.callout_number   = ecode[LINK_SIZE+2];
822            cb.offset_vector    = md->offset_vector;
823            cb.subject          = (PCRE_SPTR)md->start_subject;
824            cb.subject_length   = md->end_subject - md->start_subject;
825            cb.start_match      = mstart - md->start_subject;
826            cb.current_position = eptr - md->start_subject;
827            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
828            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
829            cb.capture_top      = offset_top/2;
830            cb.capture_last     = md->capture_last;
831            cb.callout_data     = md->callout_data;
832            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
833            if (rrc < 0) RRETURN(rrc);
834            }
835          ecode += _pcre_OP_lengths[OP_CALLOUT];
836          }
837    
838        condcode = ecode[LINK_SIZE+1];
839    
840        /* Now see what the actual condition is */
841    
842        if (condcode == OP_RREF)         /* Recursion test */
843        {        {
844        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
845        condition = md->recursive != NULL &&        condition = md->recursive != NULL &&
# Line 795  for (;;) Line 847  for (;;)
847        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
848        }        }
849    
850      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF)    /* Group used test */
851        {        {
852        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
853        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
854        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
855        }        }
856    
857      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
858        {        {
859        condition = FALSE;        condition = FALSE;
860        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 829  for (;;) Line 881  for (;;)
881        else        else
882          {          {
883          condition = FALSE;          condition = FALSE;
884          ecode += GET(ecode, 1);          ecode += codelink;
885          }          }
886        }        }
887    
# Line 852  for (;;) Line 904  for (;;)
904          goto TAIL_RECURSE;          goto TAIL_RECURSE;
905          }          }
906        }        }
907      else                         /* Condition false & no 2nd alternative */      else                         /* Condition false & no alternative */
908        {        {
909        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
910        }        }
911      break;      break;
912    
913    
914        /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
915        to close any currently open capturing brackets. */
916    
917        case OP_CLOSE:
918        number = GET2(ecode, 1);
919        offset = number << 1;
920    
921    #ifdef DEBUG
922          printf("end bracket %d at *ACCEPT", number);
923          printf("\n");
924    #endif
925    
926        md->capture_last = number;
927        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
928          {
929          md->offset_vector[offset] =
930            md->offset_vector[md->offset_end - number];
931          md->offset_vector[offset+1] = eptr - md->start_subject;
932          if (offset_top <= offset) offset_top = offset + 2;
933          }
934        ecode += 3;
935        break;
936    
937    
938      /* End of the pattern, either real or forced. If we are in a top-level      /* End of the pattern, either real or forced. If we are in a top-level
# Line 872  for (;;) Line 948  for (;;)
948        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
949        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
950          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
951          offset_top = rec->offset_top;
952        mstart = rec->save_start;        mstart = rec->save_start;
953        ims = original_ims;        ims = original_ims;
954        ecode = rec->after_call;        ecode = rec->after_call;
955        break;        break;
956        }        }
957    
958      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
959      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
960        the subject. In both cases, backtracking will then try other alternatives,
961        if any. */
962    
963        if (eptr == mstart &&
964            (md->notempty ||
965              (md->notempty_atstart &&
966                mstart == md->start_subject + md->start_offset)))
967          RRETURN(MATCH_NOMATCH);
968    
969        /* Otherwise, we have a match. */
970    
     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);  
971      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
972      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
973      md->start_match_ptr = mstart;       /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
# Line 972  for (;;) Line 1058  for (;;)
1058        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1059        }        }
1060    
1061      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1062    
1063        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1064      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1065      break;      break;
1066    
# Line 1053  for (;;) Line 1140  for (;;)
1140        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1141              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1142        new_recursive.save_start = mstart;        new_recursive.save_start = mstart;
1143          new_recursive.offset_top = offset_top;
1144        mstart = eptr;        mstart = eptr;
1145    
1146        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
# Line 1075  for (;;) Line 1163  for (;;)
1163          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1164            {            {
1165            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1166              if (new_recursive.offset_save != stacksave)
1167                (pcre_free)(new_recursive.offset_save);
1168            RRETURN(rrc);            RRETURN(rrc);
1169            }            }
1170    
# Line 1249  for (;;) Line 1339  for (;;)
1339        {        {
1340        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1341        offset = number << 1;        offset = number << 1;
1342    
1343  #ifdef DEBUG  #ifdef DEBUG
1344        printf("end bracket %d", number);        printf("end bracket %d", number);
1345        printf("\n");        printf("\n");
# Line 1275  for (;;) Line 1365  for (;;)
1365          mstart = rec->save_start;          mstart = rec->save_start;
1366          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1367            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1368            offset_top = rec->offset_top;
1369          ecode = rec->after_call;          ecode = rec->after_call;
1370          ims = original_ims;          ims = original_ims;
1371          break;          break;
# Line 1414  for (;;) Line 1505  for (;;)
1505    
1506        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1507        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1508        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1509          partial matching. */
1510    
1511  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1512        if (utf8)        if (utf8)
1513          {          {
1514          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1515            {            {
1516            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1517            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1518              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1519            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1520            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1521            }            }
1522          if (eptr >= md->end_subject) cur_is_word = FALSE; else          if (eptr >= md->end_subject)
1523              {
1524              SCHECK_PARTIAL();
1525              cur_is_word = FALSE;
1526              }
1527            else
1528            {            {
1529            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1530            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1435  for (;;) Line 1533  for (;;)
1533        else        else
1534  #endif  #endif
1535    
1536        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode */
1537    
1538          {          {
1539          prev_is_word = (eptr != md->start_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1540            ((md->ctypes[eptr[-1]] & ctype_word) != 0);            {
1541          cur_is_word = (eptr < md->end_subject) &&            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1542            ((md->ctypes[*eptr] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1543              }
1544            if (eptr >= md->end_subject)
1545              {
1546              SCHECK_PARTIAL();
1547              cur_is_word = FALSE;
1548              }
1549            else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1550          }          }
1551    
1552        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
# Line 1459  for (;;) Line 1564  for (;;)
1564      /* Fall through */      /* Fall through */
1565    
1566      case OP_ALLANY:      case OP_ALLANY:
1567      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1568          {
1569          SCHECK_PARTIAL();
1570          RRETURN(MATCH_NOMATCH);
1571          }
1572      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1573      ecode++;      ecode++;
1574      break;      break;
# Line 1468  for (;;) Line 1577  for (;;)
1577      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1578    
1579      case OP_ANYBYTE:      case OP_ANYBYTE:
1580      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1581          {
1582          SCHECK_PARTIAL();
1583          RRETURN(MATCH_NOMATCH);
1584          }
1585      ecode++;      ecode++;
1586      break;      break;
1587    
1588      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1589      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1590          {
1591          SCHECK_PARTIAL();
1592          RRETURN(MATCH_NOMATCH);
1593          }
1594      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1595      if (      if (
1596  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1486  for (;;) Line 1603  for (;;)
1603      break;      break;
1604    
1605      case OP_DIGIT:      case OP_DIGIT:
1606      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1607          {
1608          SCHECK_PARTIAL();
1609          RRETURN(MATCH_NOMATCH);
1610          }
1611      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1612      if (      if (
1613  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1499  for (;;) Line 1620  for (;;)
1620      break;      break;
1621    
1622      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1623      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1624          {
1625          SCHECK_PARTIAL();
1626          RRETURN(MATCH_NOMATCH);
1627          }
1628      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1629      if (      if (
1630  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1512  for (;;) Line 1637  for (;;)
1637      break;      break;
1638    
1639      case OP_WHITESPACE:      case OP_WHITESPACE:
1640      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1641          {
1642          SCHECK_PARTIAL();
1643          RRETURN(MATCH_NOMATCH);
1644          }
1645      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1646      if (      if (
1647  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1525  for (;;) Line 1654  for (;;)
1654      break;      break;
1655    
1656      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1657      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1658          {
1659          SCHECK_PARTIAL();
1660          RRETURN(MATCH_NOMATCH);
1661          }
1662      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1663      if (      if (
1664  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1538  for (;;) Line 1671  for (;;)
1671      break;      break;
1672    
1673      case OP_WORDCHAR:      case OP_WORDCHAR:
1674      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1675          {
1676          SCHECK_PARTIAL();
1677          RRETURN(MATCH_NOMATCH);
1678          }
1679      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1680      if (      if (
1681  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1551  for (;;) Line 1688  for (;;)
1688      break;      break;
1689    
1690      case OP_ANYNL:      case OP_ANYNL:
1691      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1692          {
1693          SCHECK_PARTIAL();
1694          RRETURN(MATCH_NOMATCH);
1695          }
1696      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1697      switch(c)      switch(c)
1698        {        {
# Line 1575  for (;;) Line 1716  for (;;)
1716      break;      break;
1717    
1718      case OP_NOT_HSPACE:      case OP_NOT_HSPACE:
1719      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1720          {
1721          SCHECK_PARTIAL();
1722          RRETURN(MATCH_NOMATCH);
1723          }
1724      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1725      switch(c)      switch(c)
1726        {        {
# Line 1605  for (;;) Line 1750  for (;;)
1750      break;      break;
1751    
1752      case OP_HSPACE:      case OP_HSPACE:
1753      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1754          {
1755          SCHECK_PARTIAL();
1756          RRETURN(MATCH_NOMATCH);
1757          }
1758      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1759      switch(c)      switch(c)
1760        {        {
# Line 1635  for (;;) Line 1784  for (;;)
1784      break;      break;
1785    
1786      case OP_NOT_VSPACE:      case OP_NOT_VSPACE:
1787      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1788          {
1789          SCHECK_PARTIAL();
1790          RRETURN(MATCH_NOMATCH);
1791          }
1792      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1793      switch(c)      switch(c)
1794        {        {
# Line 1653  for (;;) Line 1806  for (;;)
1806      break;      break;
1807    
1808      case OP_VSPACE:      case OP_VSPACE:
1809      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1810          {
1811          SCHECK_PARTIAL();
1812          RRETURN(MATCH_NOMATCH);
1813          }
1814      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1815      switch(c)      switch(c)
1816        {        {
# Line 1676  for (;;) Line 1833  for (;;)
1833    
1834      case OP_PROP:      case OP_PROP:
1835      case OP_NOTPROP:      case OP_NOTPROP:
1836      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1837          {
1838          SCHECK_PARTIAL();
1839          RRETURN(MATCH_NOMATCH);
1840          }
1841      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1842        {        {
1843        const ucd_record * prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
1844    
1845        switch(ecode[1])        switch(ecode[1])
1846          {          {
# Line 1721  for (;;) Line 1882  for (;;)
1882      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
1883    
1884      case OP_EXTUNI:      case OP_EXTUNI:
1885      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1886          {
1887          SCHECK_PARTIAL();
1888          RRETURN(MATCH_NOMATCH);
1889          }
1890      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1891        {        {
1892        int category = UCD_CATEGORY(c);        int category = UCD_CATEGORY(c);
# Line 1801  for (;;) Line 1966  for (;;)
1966          break;          break;
1967    
1968          default:               /* No repeat follows */          default:               /* No repeat follows */
1969          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
1970              {
1971              CHECK_PARTIAL();
1972              RRETURN(MATCH_NOMATCH);
1973              }
1974          eptr += length;          eptr += length;
1975          continue;              /* With the main loop */          continue;              /* With the main loop */
1976          }          }
# Line 1817  for (;;) Line 1986  for (;;)
1986    
1987        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
1988          {          {
1989          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
1990              {
1991              CHECK_PARTIAL();
1992              RRETURN(MATCH_NOMATCH);
1993              }
1994          eptr += length;          eptr += length;
1995          }          }
1996    
# Line 1834  for (;;) Line 2007  for (;;)
2007            {            {
2008            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2009            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2010            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) RRETURN(MATCH_NOMATCH);
2011              if (!match_ref(offset, eptr, length, md, ims))
2012                {
2013                CHECK_PARTIAL();
2014              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2015                }
2016            eptr += length;            eptr += length;
2017            }            }
2018          /* Control never gets here */          /* Control never gets here */
# Line 1862  for (;;) Line 2039  for (;;)
2039        }        }
2040      /* Control never gets here */      /* Control never gets here */
2041    
   
   
2042      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2043      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2044      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1918  for (;;) Line 2093  for (;;)
2093          {          {
2094          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2095            {            {
2096            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2097                {
2098                SCHECK_PARTIAL();
2099                RRETURN(MATCH_NOMATCH);
2100                }
2101            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2102            if (c > 255)            if (c > 255)
2103              {              {
# Line 1936  for (;;) Line 2115  for (;;)
2115          {          {
2116          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2117            {            {
2118            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2119                {
2120                SCHECK_PARTIAL();
2121                RRETURN(MATCH_NOMATCH);
2122                }
2123            c = *eptr++;            c = *eptr++;
2124            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2125            }            }
# Line 1960  for (;;) Line 2143  for (;;)
2143              {              {
2144              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2145              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2146              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2147                if (eptr >= md->end_subject)
2148                  {
2149                  SCHECK_PARTIAL();
2150                  RRETURN(MATCH_NOMATCH);
2151                  }
2152              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2153              if (c > 255)              if (c > 255)
2154                {                {
# Line 1980  for (;;) Line 2168  for (;;)
2168              {              {
2169              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2170              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2171              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2172                if (eptr >= md->end_subject)
2173                  {
2174                  SCHECK_PARTIAL();
2175                  RRETURN(MATCH_NOMATCH);
2176                  }
2177              c = *eptr++;              c = *eptr++;
2178              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2179              }              }
# Line 2047  for (;;) Line 2240  for (;;)
2240    
2241    
2242      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2243      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2244        mode, because Unicode properties are supported in non-UTF-8 mode. */
2245    
2246  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2247      case OP_XCLASS:      case OP_XCLASS:
# Line 2088  for (;;) Line 2282  for (;;)
2282    
2283        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2284          {          {
2285          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2286          GETCHARINC(c, eptr);            {
2287              SCHECK_PARTIAL();
2288              RRETURN(MATCH_NOMATCH);
2289              }
2290            GETCHARINCTEST(c, eptr);
2291          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2292          }          }
2293    
# Line 2107  for (;;) Line 2305  for (;;)
2305            {            {
2306            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2307            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2308            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
2309            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2310                {
2311                SCHECK_PARTIAL();
2312                RRETURN(MATCH_NOMATCH);
2313                }
2314              GETCHARINCTEST(c, eptr);
2315            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2316            }            }
2317          /* Control never gets here */          /* Control never gets here */
# Line 2123  for (;;) Line 2326  for (;;)
2326            {            {
2327            int len = 1;            int len = 1;
2328            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2329            GETCHARLEN(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
2330            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2331            eptr += len;            eptr += len;
2332            }            }
# Line 2150  for (;;) Line 2353  for (;;)
2353        length = 1;        length = 1;
2354        ecode++;        ecode++;
2355        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2356        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2357            {
2358            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2359            RRETURN(MATCH_NOMATCH);
2360            }
2361        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
2362        }        }
2363      else      else
# Line 2158  for (;;) Line 2365  for (;;)
2365    
2366      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2367        {        {
2368        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2369            {
2370            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2371            RRETURN(MATCH_NOMATCH);
2372            }
2373        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
2374        ecode += 2;        ecode += 2;
2375        }        }
# Line 2174  for (;;) Line 2385  for (;;)
2385        ecode++;        ecode++;
2386        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2387    
2388        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2389            {
2390            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2391            RRETURN(MATCH_NOMATCH);
2392            }
2393    
2394        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2395        can use the fast lookup table. */        can use the fast lookup table. */
# Line 2209  for (;;) Line 2424  for (;;)
2424    
2425      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2426        {        {
2427        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2428            {
2429            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2430            RRETURN(MATCH_NOMATCH);
2431            }
2432        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2433        ecode += 2;        ecode += 2;
2434        }        }
# Line 2263  for (;;) Line 2482  for (;;)
2482      case OP_MINQUERY:      case OP_MINQUERY:
2483      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2484      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2485    
2486      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2487      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2488      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2489    
2490      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2491    
2492      REPEATCHAR:      REPEATCHAR:
2493  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2278  for (;;) Line 2496  for (;;)
2496        length = 1;        length = 1;
2497        charptr = ecode;        charptr = ecode;
2498        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2499        ecode += length;        ecode += length;
2500    
2501        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2296  for (;;) Line 2513  for (;;)
2513    
2514          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2515            {            {
2516            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2517                memcmp(eptr, charptr, length) == 0) eptr += length;
2518  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2519            /* Need braces because of following else */            else if (oclength > 0 &&
2520            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                     eptr <= md->end_subject - oclength &&
2521                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2522    #endif  /* SUPPORT_UCP */
2523            else            else
2524              {              {
2525              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2526              eptr += oclength;              RRETURN(MATCH_NOMATCH);
2527              }              }
 #else   /* without SUPPORT_UCP */  
           else { RRETURN(MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2528            }            }
2529    
2530          if (min == max) continue;          if (min == max) continue;
# Line 2318  for (;;) Line 2535  for (;;)
2535              {              {
2536              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2537              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2538              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2539              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2540                  memcmp(eptr, charptr, length) == 0) eptr += length;
2541  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2542              /* Need braces because of following else */              else if (oclength > 0 &&
2543              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                       eptr <= md->end_subject - oclength &&
2544                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2545    #endif  /* SUPPORT_UCP */
2546              else              else
2547                {                {
2548                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2549                eptr += oclength;                RRETURN(MATCH_NOMATCH);
2550                }                }
 #else   /* without SUPPORT_UCP */  
             else { RRETURN (MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2551              }              }
2552            /* Control never gets here */            /* Control never gets here */
2553            }            }
# Line 2340  for (;;) Line 2557  for (;;)
2557            pp = eptr;            pp = eptr;
2558            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2559              {              {
2560              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2561              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2562  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2563              else if (oclength == 0) break;              else if (oclength > 0 &&
2564              else                       eptr <= md->end_subject - oclength &&
2565                {                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
               if (memcmp(eptr, occhars, oclength) != 0) break;  
               eptr += oclength;  
               }  
 #else   /* without SUPPORT_UCP */  
             else break;  
2566  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2567                else break;
2568              }              }
2569    
2570            if (possessive) continue;            if (possessive) continue;
2571    
2572            for(;;)            for(;;)
2573             {              {
2574             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2575             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2576             if (eptr == pp) RRETURN(MATCH_NOMATCH);              if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2577  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2578             eptr--;              eptr--;
2579             BACKCHAR(eptr);              BACKCHAR(eptr);
2580  #else   /* without SUPPORT_UCP */  #else   /* without SUPPORT_UCP */
2581             eptr -= length;              eptr -= length;
2582  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2583             }              }
2584            }            }
2585          /* Control never gets here */          /* Control never gets here */
2586          }          }
# Line 2379  for (;;) Line 2593  for (;;)
2593  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2594    
2595      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2596        {  
2597        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2598    
2599      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2600      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2400  for (;;) Line 2612  for (;;)
2612        {        {
2613        fc = md->lcc[fc];        fc = md->lcc[fc];
2614        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2615            {
2616            if (eptr >= md->end_subject)
2617              {
2618              SCHECK_PARTIAL();
2619              RRETURN(MATCH_NOMATCH);
2620              }
2621          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2622            }
2623        if (min == max) continue;        if (min == max) continue;
2624        if (minimize)        if (minimize)
2625          {          {
# Line 2408  for (;;) Line 2627  for (;;)
2627            {            {
2628            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2629            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2630            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) RRETURN(MATCH_NOMATCH);
2631                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2632                {
2633                SCHECK_PARTIAL();
2634              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2635                }
2636              if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2637            }            }
2638          /* Control never gets here */          /* Control never gets here */
2639          }          }
# Line 2422  for (;;) Line 2645  for (;;)
2645            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2646            eptr++;            eptr++;
2647            }            }
2648    
2649          if (possessive) continue;          if (possessive) continue;
2650    
2651          while (eptr >= pp)          while (eptr >= pp)
2652            {            {
2653            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
# Line 2438  for (;;) Line 2663  for (;;)
2663    
2664      else      else
2665        {        {
2666        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2667            {
2668            if (eptr >= md->end_subject)
2669              {
2670              SCHECK_PARTIAL();
2671              RRETURN(MATCH_NOMATCH);
2672              }
2673            if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2674            }
2675    
2676        if (min == max) continue;        if (min == max) continue;
2677    
2678        if (minimize)        if (minimize)
2679          {          {
2680          for (fi = min;; fi++)          for (fi = min;; fi++)
2681            {            {
2682            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2683            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2684            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) RRETURN(MATCH_NOMATCH);
2685              if (eptr >= md->end_subject)
2686                {
2687                SCHECK_PARTIAL();
2688              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2689                }
2690              if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2691            }            }
2692          /* Control never gets here */          /* Control never gets here */
2693          }          }
# Line 2460  for (;;) Line 2700  for (;;)
2700            eptr++;            eptr++;
2701            }            }
2702          if (possessive) continue;          if (possessive) continue;
2703    
2704          while (eptr >= pp)          while (eptr >= pp)
2705            {            {
2706            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
# Line 2475  for (;;) Line 2716  for (;;)
2716      checking can be multibyte. */      checking can be multibyte. */
2717    
2718      case OP_NOT:      case OP_NOT:
2719      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2720          {
2721          SCHECK_PARTIAL();
2722          RRETURN(MATCH_NOMATCH);
2723          }
2724      ecode++;      ecode++;
2725      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2726      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2552  for (;;) Line 2797  for (;;)
2797      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2798      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2799    
2800      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
2801    
2802      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2803      fc = *ecode++;      fc = *ecode++;
2804    
2805      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2582  for (;;) Line 2824  for (;;)
2824          register unsigned int d;          register unsigned int d;
2825          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2826            {            {
2827              if (eptr >= md->end_subject)
2828                {
2829                SCHECK_PARTIAL();
2830                RRETURN(MATCH_NOMATCH);
2831                }
2832            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2833            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
2834            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
# Line 2593  for (;;) Line 2840  for (;;)
2840        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2841          {          {
2842          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2843              {
2844              if (eptr >= md->end_subject)
2845                {
2846                SCHECK_PARTIAL();
2847                RRETURN(MATCH_NOMATCH);
2848                }
2849            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2850              }
2851          }          }
2852    
2853        if (min == max) continue;        if (min == max) continue;
# Line 2609  for (;;) Line 2863  for (;;)
2863              {              {
2864              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2865              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2866              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2867                if (eptr >= md->end_subject)
2868                  {
2869                  SCHECK_PARTIAL();
2870                  RRETURN(MATCH_NOMATCH);
2871                  }
2872              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2873              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2874              if (fc == d) RRETURN(MATCH_NOMATCH);              if (fc == d) RRETURN(MATCH_NOMATCH);
   
2875              }              }
2876            }            }
2877          else          else
# Line 2624  for (;;) Line 2882  for (;;)
2882              {              {
2883              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2884              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2885              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) RRETURN(MATCH_NOMATCH);
2886                if (eptr >= md->end_subject)
2887                  {
2888                  SCHECK_PARTIAL();
2889                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2890                  }
2891                if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2892              }              }
2893            }            }
2894          /* Control never gets here */          /* Control never gets here */
# Line 2694  for (;;) Line 2957  for (;;)
2957          register unsigned int d;          register unsigned int d;
2958          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2959            {            {
2960              if (eptr >= md->end_subject)
2961                {
2962                SCHECK_PARTIAL();
2963                RRETURN(MATCH_NOMATCH);
2964                }
2965            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2966            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
2967            }            }
# Line 2703  for (;;) Line 2971  for (;;)
2971        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2972          {          {
2973          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2974              {
2975              if (eptr >= md->end_subject)
2976                {
2977                SCHECK_PARTIAL();
2978                RRETURN(MATCH_NOMATCH);
2979                }
2980            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2981              }
2982          }          }
2983    
2984        if (min == max) continue;        if (min == max) continue;
# Line 2719  for (;;) Line 2994  for (;;)
2994              {              {
2995              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2996              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2997              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
2998                if (eptr >= md->end_subject)
2999                  {
3000                  SCHECK_PARTIAL();
3001                  RRETURN(MATCH_NOMATCH);
3002                  }
3003              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3004              if (fc == d) RRETURN(MATCH_NOMATCH);              if (fc == d) RRETURN(MATCH_NOMATCH);
3005              }              }
3006            }            }
3007          else          else
# Line 2732  for (;;) Line 3012  for (;;)
3012              {              {
3013              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3014              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3015              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) RRETURN(MATCH_NOMATCH);
3016                if (eptr >= md->end_subject)
3017                  {
3018                  SCHECK_PARTIAL();
3019                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3020                  }
3021                if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3022              }              }
3023            }            }
3024          /* Control never gets here */          /* Control never gets here */
# Line 2867  for (;;) Line 3152  for (;;)
3152    
3153      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3154      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3155      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3156      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3157      and single-bytes. */      and single-bytes. */
3158    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3159      if (min > 0)      if (min > 0)
3160        {        {
3161  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2885  for (;;) Line 3167  for (;;)
3167            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3168            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3169              {              {
3170              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3171                  {
3172                  SCHECK_PARTIAL();
3173                  RRETURN(MATCH_NOMATCH);
3174                  }
3175              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3176              }              }
3177            break;            break;
# Line 2893  for (;;) Line 3179  for (;;)
3179            case PT_LAMP:            case PT_LAMP:
3180            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3181              {              {
3182              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3183                  {
3184                  SCHECK_PARTIAL();
3185                  RRETURN(MATCH_NOMATCH);
3186                  }
3187              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3188              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
3189              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
# Line 2906  for (;;) Line 3196  for (;;)
3196            case PT_GC:            case PT_GC:
3197            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3198              {              {
3199              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3200                  {
3201                  SCHECK_PARTIAL();
3202                  RRETURN(MATCH_NOMATCH);
3203                  }
3204              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3205              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3206              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
# Line 2917  for (;;) Line 3211  for (;;)
3211            case PT_PC:            case PT_PC:
3212            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3213              {              {
3214              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3215                  {
3216                  SCHECK_PARTIAL();
3217                  RRETURN(MATCH_NOMATCH);
3218                  }
3219              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3220              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
3221              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
# Line 2928  for (;;) Line 3226  for (;;)
3226            case PT_SC:            case PT_SC:
3227            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3228              {              {
3229              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3230                  {
3231                  SCHECK_PARTIAL();
3232                  RRETURN(MATCH_NOMATCH);
3233                  }
3234              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3235              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
3236              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
# Line 2948  for (;;) Line 3250  for (;;)
3250          {          {
3251          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3252            {            {
3253              if (eptr >= md->end_subject)
3254                {
3255                SCHECK_PARTIAL();
3256                RRETURN(MATCH_NOMATCH);
3257                }
3258            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3259            prop_category = UCD_CATEGORY(c);            prop_category = UCD_CATEGORY(c);
3260            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3261            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3262              {              {
3263              int len = 1;              int len = 1;
3264              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3265                {                else { GETCHARLEN(c, eptr, len); }
               GETCHARLEN(c, eptr, len);  
               }  
3266              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3267              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3268              eptr += len;              eptr += len;
# Line 2976  for (;;) Line 3281  for (;;)
3281          case OP_ANY:          case OP_ANY:
3282          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3283            {            {
3284            if (eptr >= md->end_subject || IS_NEWLINE(eptr))            if (eptr >= md->end_subject)
3285                {
3286                SCHECK_PARTIAL();
3287              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3288                }
3289              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3290            eptr++;            eptr++;
3291            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3292            }            }
# Line 2986  for (;;) Line 3295  for (;;)
3295          case OP_ALLANY:          case OP_ALLANY:
3296          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3297            {            {
3298            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3299                {
3300                SCHECK_PARTIAL();
3301                RRETURN(MATCH_NOMATCH);
3302                }
3303            eptr++;            eptr++;
3304            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3305            }            }
3306          break;          break;
3307    
3308          case OP_ANYBYTE:          case OP_ANYBYTE:
3309            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3310          eptr += min;          eptr += min;
3311          break;          break;
3312    
3313          case OP_ANYNL:          case OP_ANYNL:
3314          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3315            {            {
3316            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3317                {
3318                SCHECK_PARTIAL();
3319                RRETURN(MATCH_NOMATCH);
3320                }
3321            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3322            switch(c)            switch(c)
3323              {              {
# Line 3025  for (;;) Line 3343  for (;;)
3343          case OP_NOT_HSPACE:          case OP_NOT_HSPACE:
3344          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3345            {            {
3346            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3347                {
3348                SCHECK_PARTIAL();
3349                RRETURN(MATCH_NOMATCH);
3350                }
3351            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3352            switch(c)            switch(c)
3353              {              {
# Line 3057  for (;;) Line 3379  for (;;)
3379          case OP_HSPACE:          case OP_HSPACE:
3380          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3381            {            {
3382            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3383                {
3384                SCHECK_PARTIAL();
3385                RRETURN(MATCH_NOMATCH);
3386                }
3387            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3388            switch(c)            switch(c)
3389              {              {
# Line 3089  for (;;) Line 3415  for (;;)
3415          case OP_NOT_VSPACE:          case OP_NOT_VSPACE:
3416          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3417            {            {
3418            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3419                {
3420                SCHECK_PARTIAL();
3421                RRETURN(MATCH_NOMATCH);
3422                }
3423            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3424            switch(c)            switch(c)
3425              {              {
# Line 3109  for (;;) Line 3439  for (;;)
3439          case OP_VSPACE:          case OP_VSPACE:
3440          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3441            {            {
3442            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3443                {
3444                SCHECK_PARTIAL();
3445                RRETURN(MATCH_NOMATCH);
3446                }
3447            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3448            switch(c)            switch(c)
3449              {              {
# Line 3129  for (;;) Line 3463  for (;;)
3463          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3464          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3465            {            {
3466            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3467                {
3468                SCHECK_PARTIAL();
3469                RRETURN(MATCH_NOMATCH);
3470                }
3471            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3472            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3473              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 3139  for (;;) Line 3477  for (;;)
3477          case OP_DIGIT:          case OP_DIGIT:
3478          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3479            {            {
3480            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3481               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3482                SCHECK_PARTIAL();
3483                RRETURN(MATCH_NOMATCH);
3484                }
3485              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3486              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3487            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3488            }            }
# Line 3149  for (;;) Line 3491  for (;;)
3491          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3492          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3493            {            {
3494            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3495               (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))              {
3496                SCHECK_PARTIAL();
3497                RRETURN(MATCH_NOMATCH);
3498                }
3499              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3500              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3501            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3502            }            }
# Line 3159  for (;;) Line 3505  for (;;)
3505          case OP_WHITESPACE:          case OP_WHITESPACE:
3506          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3507            {            {
3508            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3509               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3510                SCHECK_PARTIAL();
3511                RRETURN(MATCH_NOMATCH);
3512                }
3513              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3514              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3515            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3516            }            }
# Line 3179  for (;;) Line 3529  for (;;)
3529          case OP_WORDCHAR:          case OP_WORDCHAR:
3530          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3531            {            {
3532            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3533               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3534                SCHECK_PARTIAL();
3535                RRETURN(MATCH_NOMATCH);
3536                }
3537              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3538              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3539            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3540            }            }
# Line 3194  for (;;) Line 3548  for (;;)
3548  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3549    
3550        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3551        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
3552    
3553        switch(ctype)        switch(ctype)
3554          {          {
3555          case OP_ANY:          case OP_ANY:
3556          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3557            {            {
3558              if (eptr >= md->end_subject)
3559                {
3560                SCHECK_PARTIAL();
3561                RRETURN(MATCH_NOMATCH);
3562                }
3563            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3564            eptr++;            eptr++;
3565            }            }
3566          break;          break;
3567    
3568          case OP_ALLANY:          case OP_ALLANY:
3569            if (eptr > md->end_subject - min)
3570              {
3571              SCHECK_PARTIAL();
3572              RRETURN(MATCH_NOMATCH);
3573              }
3574          eptr += min;          eptr += min;
3575          break;          break;
3576    
3577          case OP_ANYBYTE:          case OP_ANYBYTE:
3578            if (eptr > md->end_subject - min)
3579              {
3580              SCHECK_PARTIAL();
3581              RRETURN(MATCH_NOMATCH);
3582              }
3583          eptr += min;          eptr += min;
3584          break;          break;
3585    
         /* Because of the CRLF case, we can't assume the minimum number of  
         bytes are present in this case. */  
   
3586          case OP_ANYNL:          case OP_ANYNL:
3587          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3588            {            {
3589            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3590                {
3591                SCHECK_PARTIAL();
3592                RRETURN(MATCH_NOMATCH);
3593                }
3594            switch(*eptr++)            switch(*eptr++)
3595              {              {
3596              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3243  for (;;) Line 3612  for (;;)
3612          case OP_NOT_HSPACE:          case OP_NOT_HSPACE:
3613          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3614            {            {
3615            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3616                {
3617                SCHECK_PARTIAL();
3618                RRETURN(MATCH_NOMATCH);
3619                }
3620            switch(*eptr++)            switch(*eptr++)
3621              {              {
3622              default: break;              default: break;
# Line 3258  for (;;) Line 3631  for (;;)
3631          case OP_HSPACE:          case OP_HSPACE:
3632          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3633            {            {
3634            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3635                {
3636                SCHECK_PARTIAL();
3637                RRETURN(MATCH_NOMATCH);
3638                }
3639            switch(*eptr++)            switch(*eptr++)
3640              {              {
3641              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3273  for (;;) Line 3650  for (;;)
3650          case OP_NOT_VSPACE:          case OP_NOT_VSPACE:
3651          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3652            {            {
3653            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3654                {
3655                SCHECK_PARTIAL();
3656                RRETURN(MATCH_NOMATCH);
3657                }
3658            switch(*eptr++)            switch(*eptr++)
3659              {              {
3660              default: break;              default: break;
# Line 3290  for (;;) Line 3671  for (;;)
3671          case OP_VSPACE:          case OP_VSPACE:
3672          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3673            {            {
3674            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3675                {
3676                SCHECK_PARTIAL();
3677                RRETURN(MATCH_NOMATCH);
3678                }
3679            switch(*eptr++)            switch(*eptr++)
3680              {              {
3681              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3306  for (;;) Line 3691  for (;;)
3691    
3692          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3693          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3694              {
3695              if (eptr >= md->end_subject)
3696                {
3697                SCHECK_PARTIAL();
3698                RRETURN(MATCH_NOMATCH);
3699                }
3700            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3701              }
3702          break;          break;
3703    
3704          case OP_DIGIT:          case OP_DIGIT:
3705          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3706              {
3707              if (eptr >= md->end_subject)
3708                {
3709                SCHECK_PARTIAL();
3710                RRETURN(MATCH_NOMATCH);
3711                }
3712            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3713              }
3714          break;          break;
3715    
3716          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3717          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3718              {
3719              if (eptr >= md->end_subject)
3720                {
3721                SCHECK_PARTIAL();
3722                RRETURN(MATCH_NOMATCH);
3723                }
3724            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3725              }
3726          break;          break;
3727    
3728          case OP_WHITESPACE:          case OP_WHITESPACE:
3729          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3730              {
3731              if (eptr >= md->end_subject)
3732                {
3733                SCHECK_PARTIAL();
3734                RRETURN(MATCH_NOMATCH);
3735                }
3736            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3737              }
3738          break;          break;
3739    
3740          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3741          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3742              {
3743              if (eptr >= md->end_subject)
3744                {
3745                SCHECK_PARTIAL();
3746                RRETURN(MATCH_NOMATCH);
3747                }
3748            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
3749              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3750              }
3751          break;          break;
3752    
3753          case OP_WORDCHAR:          case OP_WORDCHAR:
3754          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3755              {
3756              if (eptr >= md->end_subject)
3757                {
3758                SCHECK_PARTIAL();
3759                RRETURN(MATCH_NOMATCH);
3760                }
3761            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
3762              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3763              }
3764          break;          break;
3765    
3766          default:          default:
# Line 3361  for (;;) Line 3788  for (;;)
3788              {              {
3789              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3790              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3791              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3792                if (eptr >= md->end_subject)
3793                  {
3794                  SCHECK_PARTIAL();
3795                  RRETURN(MATCH_NOMATCH);
3796                  }
3797              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3798              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3799              }              }
# Line 3372  for (;;) Line 3804  for (;;)
3804              {              {
3805              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3806              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3807              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3808                if (eptr >= md->end_subject)
3809                  {
3810                  SCHECK_PARTIAL();
3811                  RRETURN(MATCH_NOMATCH);
3812                  }
3813              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3814              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
3815              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
# Line 3387  for (;;) Line 3824  for (;;)
3824              {              {
3825              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3826              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3827              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3828                if (eptr >= md->end_subject)
3829                  {
3830                  SCHECK_PARTIAL();
3831                  RRETURN(MATCH_NOMATCH);
3832                  }
3833              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3834              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3835              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
# Line 3400  for (;;) Line 3842  for (;;)
3842              {              {
3843              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3844              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3845              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3846                if (eptr >= md->end_subject)
3847                  {
3848                  SCHECK_PARTIAL();
3849                  RRETURN(MATCH_NOMATCH);
3850                  }
3851              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3852              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
3853              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
# Line 3413  for (;;) Line 3860  for (;;)
3860              {              {
3861              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3862              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3863              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) RRETURN(MATCH_NOMATCH);
3864                if (eptr >= md->end_subject)
3865                  {
3866                  SCHECK_PARTIAL();
3867                  RRETURN(MATCH_NOMATCH);
3868                  }
3869              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3870              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
3871              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
# Line 3435  for (;;) Line 3887  for (;;)
3887            {            {
3888            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3889            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3890            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) RRETURN(MATCH_NOMATCH);
3891              if (eptr >= md->end_subject)
3892                {
3893                SCHECK_PARTIAL();
3894                RRETURN(MATCH_NOMATCH);
3895                }
3896            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3897            prop_category = UCD_CATEGORY(c);            prop_category = UCD_CATEGORY(c);
3898            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3899            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3900              {              {
3901              int len = 1;              int len = 1;
3902              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3903                {                else { GETCHARLEN(c, eptr, len); }
               GETCHARLEN(c, eptr, len);  
               }  
3904              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3905              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3906              eptr += len;              eptr += len;
# Line 3464  for (;;) Line 3919  for (;;)
3919            {            {
3920            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3921            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3922            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) RRETURN(MATCH_NOMATCH);
3923                 (ctype == OP_ANY && IS_NEWLINE(eptr)))            if (eptr >= md->end_subject)
3924                {
3925                SCHECK_PARTIAL();
3926                RRETURN(MATCH_NOMATCH);
3927                }
3928              if (ctype == OP_ANY && IS_NEWLINE(eptr))
3929              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
   
3930            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3931            switch(ctype)            switch(ctype)
3932              {              {
# Line 3623  for (;;) Line 4082  for (;;)
4082            {            {
4083            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4084            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4085            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) RRETURN(MATCH_NOMATCH);
4086                 (ctype == OP_ANY && IS_NEWLINE(eptr)))            if (eptr >= md->end_subject)
4087                {
4088                SCHECK_PARTIAL();
4089                RRETURN(MATCH_NOMATCH);
4090                }
4091              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4092              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
   
4093            c = *eptr++;            c = *eptr++;
4094            switch(ctype)            switch(ctype)
4095              {              {
# Line 4409  const uschar *tables; Line 4872  const uschar *tables;
4872  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4873  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4874  USPTR end_subject;  USPTR end_subject;
4875    USPTR start_partial = NULL;
4876  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
4877    
4878  pcre_study_data internal_study;  pcre_study_data internal_study;
# Line 4492  md->jscript_compat = (re->options & PCRE Line 4956  md->jscript_compat = (re->options & PCRE
4956  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4957  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
4958  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
4959  md->partial = (options & PCRE_PARTIAL) != 0;  md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
4960    md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
4961                  ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
4962  md->hitend = FALSE;  md->hitend = FALSE;
4963    
4964  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
# Line 4533  switch ((((options & PCRE_NEWLINE_BITS) Line 4999  switch ((((options & PCRE_NEWLINE_BITS)
4999          (pcre_uint32)options) & PCRE_NEWLINE_BITS)          (pcre_uint32)options) & PCRE_NEWLINE_BITS)
5000    {    {
5001    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
5002    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
5003    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
5004    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
5005         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
5006    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
5007    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5008    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
# Line 4566  else Line 5032  else
5032      }      }
5033    }    }
5034    
5035  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching was originally supported only for a restricted set of
5036  moment. */  regexes; from release 8.00 there are no restrictions, but the bits are still
5037    defined (though never set). So there's no harm in leaving this code. */
5038    
5039  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
5040    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
# Line 4578  back the character offset. */ Line 5045  back the character offset. */
5045  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5046  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
5047    {    {
5048    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
5049      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
5050    if (start_offset > 0 && start_offset < length)    if (start_offset > 0 && start_offset < length)
5051      {      {
5052      int tb = ((uschar *)subject)[start_offset];      int tb = ((USPTR)subject)[start_offset];
5053      if (tb > 127)      if (tb > 127)
5054        {        {
5055        tb &= 0xc0;        tb &= 0xc0;
# Line 4654  if (!anchored) Line 5121  if (!anchored)
5121      }      }
5122    else    else
5123      if (!startline && study != NULL &&      if (!startline && study != NULL &&
5124        (study->options & PCRE_STUDY_MAPPED) != 0)        (study->flags & PCRE_STUDY_MAPPED) != 0)
5125          start_bits = study->start_bits;          start_bits = study->start_bits;
5126    }    }
5127    
# Line 4688  for(;;) Line 5155  for(;;)
5155      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
5156      }      }
5157    
5158    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* If firstline is TRUE, the start of the match is constrained to the first
5159    start of the match is constrained to the first line of a multiline string.    line of a multiline string. That is, the match must be before or at the first
5160    That is, the match must be before or at the first newline. Implement this by    newline. Implement this by temporarily adjusting end_subject so that we stop
5161    temporarily adjusting end_subject so that we stop scanning at a newline. If    scanning at a newline. If the match fails at the newline, later code breaks
5162    the match fails at the newline, later code breaks this loop. */    this loop. */
5163    
5164    if (firstline)    if (firstline)
5165      {      {
5166      USPTR t = start_match;      USPTR t = start_match;
5167  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
5168      if (utf8)      if (utf8)
5169        {        {
5170        while (t < md->end_subject && !IS_NEWLINE(t))        while (t < md->end_subject && !IS_NEWLINE(t))
5171          {          {
5172          t++;          t++;
5173          while (t < end_subject && (*t & 0xc0) == 0x80) t++;          while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5174          }          }
5175        }        }
5176      else      else
5177  #endif  #endif
5178      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5179      end_subject = t;      end_subject = t;
5180      }      }
5181    
5182    /* Now advance to a unique first byte if there is one. */    /* There are some optimizations that avoid running the match if a known
5183      starting point is not found, or if a known later character is not present.
5184      However, there is an option that disables these, for testing and for ensuring
5185      that all callouts do actually occur. */
5186    
5187    if (first_byte >= 0)    if ((options & PCRE_NO_START_OPTIMIZE) == 0)
5188      {      {
5189      if (first_byte_caseless)      /* Advance to a unique first byte if there is one. */
5190        while (start_match < end_subject && md->lcc[*start_match] != first_byte)  
5191          start_match++;      if (first_byte >= 0)
5192      else        {
5193        while (start_match < end_subject && *start_match != first_byte)        if (first_byte_caseless)
5194          start_match++;          while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5195      }            start_match++;
5196          else
5197            while (start_match < end_subject && *start_match != first_byte)
5198              start_match++;
5199          }
5200    
5201    /* Or to just after a linebreak for a multiline match */      /* Or to just after a linebreak for a multiline match */
5202    
5203    else if (startline)      else if (startline)
     {  
     if (start_match > md->start_subject + start_offset)  
5204        {        {
5205  #ifdef SUPPORT_UTF8        if (start_match > md->start_subject + start_offset)
       if (utf8)  
5206          {          {
5207          while (start_match < end_subject && !WAS_NEWLINE(start_match))  #ifdef SUPPORT_UTF8
5208            if (utf8)
5209            {            {
5210            start_match++;            while (start_match < end_subject && !WAS_NEWLINE(start_match))
5211            while(start_match < end_subject && (*start_match & 0xc0) == 0x80)              {
5212              start_match++;              start_match++;
5213            }              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5214                  start_match++;
5215                }
5216              }
5217            else
5218    #endif
5219            while (start_match < end_subject && !WAS_NEWLINE(start_match))
5220              start_match++;
5221    
5222            /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
5223            and we are now at a LF, advance the match position by one more character.
5224            */
5225    
5226            if (start_match[-1] == CHAR_CR &&
5227                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
5228                 start_match < end_subject &&
5229                 *start_match == CHAR_NL)
5230              start_match++;
5231          }          }
       else  
 #endif  
       while (start_match < end_subject && !WAS_NEWLINE(start_match))  
         start_match++;  
   
       /* If we have just passed a CR and the newline option is ANY or ANYCRLF,  
       and we are now at a LF, advance the match position by one more character.  
       */  
   
       if (start_match[-1] == '\r' &&  
            (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&  
            start_match < end_subject &&  
            *start_match == '\n')  
         start_match++;  
5232        }        }
     }  
5233    
5234    /* Or to a non-unique first byte after study */      /* Or to a non-unique first byte after study */
5235    
5236    else if (start_bits != NULL)      else if (start_bits != NULL)
     {  
     while (start_match < end_subject)  
5237        {        {
5238        register unsigned int c = *start_match;        while (start_match < end_subject)
5239        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;          {
5240          else break;          register unsigned int c = *start_match;
5241            if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
5242              else break;
5243            }
5244        }        }
5245      }      }   /* Starting optimizations */
5246    
5247    /* Restore fudged end_subject */    /* Restore fudged end_subject */
5248    
5249    end_subject = save_end_subject;    end_subject = save_end_subject;
5250    
5251  #ifdef DEBUG  /* Sigh. Some compilers never learn. */    /* The following two optimizations are disabled for partial matching or if
5252    printf(">>>> Match against: ");    disabling is explicitly requested. */
5253    pchars(start_match, end_subject - start_match, TRUE, md);  
5254    printf("\n");    if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
5255  #endif      {
5256        /* If the pattern was studied, a minimum subject length may be set. This is
5257    /* If req_byte is set, we know that that character must appear in the subject      a lower bound; no actual string of that length may actually match the
5258    for the match to succeed. If the first character is set, req_byte must be      pattern. Although the value is, strictly, in characters, we treat it as
5259    later in the subject; otherwise the test starts at the match point. This      bytes to avoid spending too much time in this optimization. */
5260    optimization can save a huge amount of backtracking in patterns with nested  
5261    unlimited repeats that aren't going to match. Writing separate code for      if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
5262    cased/caseless versions makes it go faster, as does using an autoincrement          end_subject - start_match < study->minlength)
   and backing off on a match.  
   
   HOWEVER: when the subject string is very, very long, searching to its end can  
   take a long time, and give bad performance on quite ordinary patterns. This  
   showed up when somebody was matching something like /^\d+C/ on a 32-megabyte  
   string... so we don't do this when the string is sufficiently long.  
   
   ALSO: this processing is disabled when partial matching is requested.  
   */  
   
   if (req_byte >= 0 &&  
       end_subject - start_match < REQ_BYTE_MAX &&  
       !md->partial)  
     {  
     register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);  
   
     /* We don't need to repeat the search if we haven't yet reached the  
     place we found it at last time. */  
   
     if (p > req_byte_ptr)  
5263        {        {
5264        if (req_byte_caseless)        rc = MATCH_NOMATCH;
5265          break;
5266          }
5267    
5268        /* If req_byte is set, we know that that character must appear in the
5269        subject for the match to succeed. If the first character is set, req_byte
5270        must be later in the subject; otherwise the test starts at the match point.
5271        This optimization can save a huge amount of backtracking in patterns with
5272        nested unlimited repeats that aren't going to match. Writing separate code
5273        for cased/caseless versions makes it go faster, as does using an
5274        autoincrement and backing off on a match.
5275    
5276        HOWEVER: when the subject string is very, very long, searching to its end
5277        can take a long time, and give bad performance on quite ordinary patterns.
5278        This showed up when somebody was matching something like /^\d+C/ on a
5279        32-megabyte string... so we don't do this when the string is sufficiently
5280        long. */
5281    
5282        if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
5283          {
5284          register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
5285    
5286          /* We don't need to repeat the search if we haven't yet reached the
5287          place we found it at last time. */
5288    
5289          if (p > req_byte_ptr)
5290          {          {
5291          while (p < end_subject)          if (req_byte_caseless)
5292            {            {
5293            register int pp = *p++;            while (p < end_subject)
5294            if (pp == req_byte || pp == req_byte2) { p--; break; }              {
5295                register int pp = *p++;
5296                if (pp == req_byte || pp == req_byte2) { p--; break; }
5297                }
5298            }            }
5299          }          else
       else  
         {  
         while (p < end_subject)  
5300            {            {
5301            if (*p++ == req_byte) { p--; break; }            while (p < end_subject)
5302                {
5303                if (*p++ == req_byte) { p--; break; }
5304                }
5305            }            }
5306    
5307            /* If we can't find the required character, break the matching loop,
5308            forcing a match failure. */
5309    
5310            if (p >= end_subject)
5311              {
5312              rc = MATCH_NOMATCH;
5313              break;
5314              }
5315    
5316            /* If we have found the required character, save the point where we
5317            found it, so that we don't search again next time round the loop if
5318            the start hasn't passed this character yet. */
5319    
5320            req_byte_ptr = p;
5321          }          }
5322          }
5323        }
5324    
5325        /* If we can't find the required character, break the matching loop,  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
5326        forcing a match failure. */    printf(">>>> Match against: ");
5327      pchars(start_match, end_subject - start_match, TRUE, md);
5328        if (p >= end_subject)    printf("\n");
5329          {  #endif
         rc = MATCH_NOMATCH;  
         break;  
         }  
   
       /* If we have found the required character, save the point where we  
       found it, so that we don't search again next time round the loop if  
       the start hasn't passed this character yet. */  
5330    
5331        req_byte_ptr = p;    /* OK, we can now run the match. If "hitend" is set afterwards, remember the
5332        }    first starting point for which a partial match was found. */
     }  
5333    
   /* OK, we can now run the match. */  
   
5334    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
5335      md->start_used_ptr = start_match;
5336    md->match_call_count = 0;    md->match_call_count = 0;
5337    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
5338      if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
5339    
5340    switch(rc)    switch(rc)
5341      {      {
# Line 4873  for(;;) Line 5365  for(;;)
5365      rc = MATCH_NOMATCH;      rc = MATCH_NOMATCH;
5366      goto ENDLOOP;      goto ENDLOOP;
5367    
5368      /* Any other return is some kind of error. */      /* Any other return is either a match, or some kind of error. */
5369    
5370      default:      default:
5371      goto ENDLOOP;      goto ENDLOOP;
# Line 4903  for(;;) Line 5395  for(;;)
5395    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
5396    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. */
5397    
5398    if (start_match[-1] == '\r' &&    if (start_match[-1] == CHAR_CR &&
5399        start_match < end_subject &&        start_match < end_subject &&
5400        *start_match == '\n' &&        *start_match == CHAR_NL &&
5401        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
5402          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
5403           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||
# Line 4955  if (rc == MATCH_MATCH) Line 5447  if (rc == MATCH_MATCH)
5447    too many to fit into the vector. */    too many to fit into the vector. */
5448    
5449    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
5450    
5451    /* If there is space, set up the whole thing as substring 0. The value of    /* If there is space, set up the whole thing as substring 0. The value of
5452    md->start_match_ptr might be modified if \K was encountered on the success    md->start_match_ptr might be modified if \K was encountered on the success
5453    matching path. */    matching path. */
# Line 4979  if (using_temporary_offsets) Line 5471  if (using_temporary_offsets)
5471    (pcre_free)(md->offset_vector);    (pcre_free)(md->offset_vector);
5472    }    }
5473    
5474  if (rc != MATCH_NOMATCH)  if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
5475    {    {
5476    DPRINTF((">>>> error: returning %d\n", rc));    DPRINTF((">>>> error: returning %d\n", rc));
5477    return rc;    return rc;
5478    }    }
5479  else if (md->partial && md->hitend)  else if (start_partial != NULL)
5480    {    {
5481    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
5482      if (offsetcount > 1)
5483        {
5484        offsets[0] = start_partial - (USPTR)subject;
5485        offsets[1] = end_subject - (USPTR)subject;
5486        }
5487    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;
5488    }    }
5489  else  else

Legend:
Removed from v.366  
changed lines
  Added in v.455

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12