/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 359 by ph10, Wed Jul 9 16:20:19 2008 UTC revision 427 by ph10, Fri Aug 28 09:55:54 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2008 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 322  typedef struct heapframe { Line 322  typedef struct heapframe {
322    
323    /* Function arguments that may change */    /* Function arguments that may change */
324    
325    const uschar *Xeptr;    USPTR Xeptr;
326    const uschar *Xecode;    const uschar *Xecode;
327    const uschar *Xmstart;    USPTR Xmstart;
328    int Xoffset_top;    int Xoffset_top;
329    long int Xims;    long int Xims;
330    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 333  typedef struct heapframe { Line 333  typedef struct heapframe {
333    
334    /* Function local variables */    /* Function local variables */
335    
336    const uschar *Xcallpat;    USPTR Xcallpat;
337    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
338    const uschar *Xdata;    USPTR Xcharptr;
339    const uschar *Xnext;  #endif
340    const uschar *Xpp;    USPTR Xdata;
341    const uschar *Xprev;    USPTR Xnext;
342    const uschar *Xsaved_eptr;    USPTR Xpp;
343      USPTR Xprev;
344      USPTR Xsaved_eptr;
345    
346    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
347    
# Line 360  typedef struct heapframe { Line 362  typedef struct heapframe {
362    uschar Xocchars[8];    uschar Xocchars[8];
363  #endif  #endif
364    
365      int Xcodelink;
366    int Xctype;    int Xctype;
367    unsigned int Xfc;    unsigned int Xfc;
368    int Xfi;    int Xfi;
# Line 395  typedef struct heapframe { Line 398  typedef struct heapframe {
398    
399  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
400  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
401  same response.  same response. */
402    
403    /* These macros pack up tests that are used for partial matching, and which
404    appears several times in the code. We set the "hit end" flag if the pointer is
405    at the end of the subject and also past the start of the subject (i.e.
406    something has been matched). For hard partial matching, we then return
407    immediately. The second one is used when we already know we are past the end of
408    the subject. */
409    
410    #define CHECK_PARTIAL()\
411      if (md->partial && eptr >= md->end_subject && eptr > mstart)\
412        {\
413        md->hitend = TRUE;\
414        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
415        }
416    
417  Performance note: It might be tempting to extract commonly used fields from the  #define SCHECK_PARTIAL()\
418  md structure (e.g. utf8, end_subject) into individual variables to improve    if (md->partial && eptr > mstart)\
419        {\
420        md->hitend = TRUE;\
421        md->hitend = TRUE;\
422        if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
423        }
424    
425    
426    /* Performance note: It might be tempting to extract commonly used fields from
427    the md structure (e.g. utf8, end_subject) into individual variables to improve
428  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
429  made performance worse.  made performance worse.
430    
# Line 425  Returns: MATCH_MATCH if matched Line 451  Returns: MATCH_MATCH if matched
451  */  */
452    
453  static int  static int
454  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
455    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
456    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
457  {  {
# Line 439  register unsigned int c; /* Character Line 465  register unsigned int c; /* Character
465  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
466    
467  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
468    int condcode;
469    
470  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
471  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 481  HEAP_RECURSE: Line 508  HEAP_RECURSE:
508  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
509  #endif  #endif
510  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
511    #define codelink           frame->Xcodelink
512  #define data               frame->Xdata  #define data               frame->Xdata
513  #define next               frame->Xnext  #define next               frame->Xnext
514  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 561  int oclength; Line 589  int oclength;
589  uschar occhars[8];  uschar occhars[8];
590  #endif  #endif
591    
592    int codelink;
593  int ctype;  int ctype;
594  int length;  int length;
595  int max;  int max;
# Line 637  for (;;) Line 666  for (;;)
666    op = *ecode;    op = *ecode;
667    
668    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
669    matching at least one subject character. */    matching at least one subject character. This code is now wrapped in a macro
670      because it appears several times below. */
671    
672    if (md->partial &&    CHECK_PARTIAL();
       eptr >= md->end_subject &&  
       eptr > mstart)  
     md->hitend = TRUE;  
673    
674    switch(op)    switch(op)
675      {      {
# Line 787  for (;;) Line 814  for (;;)
814    
815      case OP_COND:      case OP_COND:
816      case OP_SCOND:      case OP_SCOND:
817      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
818    
819        /* Because of the way auto-callout works during compile, a callout item is
820        inserted between OP_COND and an assertion condition. */
821    
822        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
823          {
824          if (pcre_callout != NULL)
825            {
826            pcre_callout_block cb;
827            cb.version          = 1;   /* Version 1 of the callout block */
828            cb.callout_number   = ecode[LINK_SIZE+2];
829            cb.offset_vector    = md->offset_vector;
830            cb.subject          = (PCRE_SPTR)md->start_subject;
831            cb.subject_length   = md->end_subject - md->start_subject;
832            cb.start_match      = mstart - md->start_subject;
833            cb.current_position = eptr - md->start_subject;
834            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
835            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
836            cb.capture_top      = offset_top/2;
837            cb.capture_last     = md->capture_last;
838            cb.callout_data     = md->callout_data;
839            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
840            if (rrc < 0) RRETURN(rrc);
841            }
842          ecode += _pcre_OP_lengths[OP_CALLOUT];
843          }
844    
845        condcode = ecode[LINK_SIZE+1];
846    
847        /* Now see what the actual condition is */
848    
849        if (condcode == OP_RREF)         /* Recursion test */
850        {        {
851        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
852        condition = md->recursive != NULL &&        condition = md->recursive != NULL &&
# Line 795  for (;;) Line 854  for (;;)
854        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
855        }        }
856    
857      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF)    /* Group used test */
858        {        {
859        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
860        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
861        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
862        }        }
863    
864      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
865        {        {
866        condition = FALSE;        condition = FALSE;
867        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 829  for (;;) Line 888  for (;;)
888        else        else
889          {          {
890          condition = FALSE;          condition = FALSE;
891          ecode += GET(ecode, 1);          ecode += codelink;
892          }          }
893        }        }
894    
# Line 852  for (;;) Line 911  for (;;)
911          goto TAIL_RECURSE;          goto TAIL_RECURSE;
912          }          }
913        }        }
914      else                         /* Condition false & no 2nd alternative */      else                         /* Condition false & no alternative */
915        {        {
916        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
917        }        }
# Line 1075  for (;;) Line 1134  for (;;)
1134          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1135            {            {
1136            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1137              if (new_recursive.offset_save != stacksave)
1138                (pcre_free)(new_recursive.offset_save);
1139            RRETURN(rrc);            RRETURN(rrc);
1140            }            }
1141    
# Line 1421  for (;;) Line 1482  for (;;)
1482          {          {
1483          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1484            {            {
1485            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1486            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1487            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1488            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1679  for (;;) Line 1740  for (;;)
1740      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1741      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1742        {        {
1743        const ucd_record * prop = GET_UCD(c);        const ucd_record *prop = GET_UCD(c);
1744    
1745        switch(ecode[1])        switch(ecode[1])
1746          {          {
# Line 1817  for (;;) Line 1878  for (;;)
1878    
1879        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
1880          {          {
1881          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
1882              {
1883              CHECK_PARTIAL();
1884              RRETURN(MATCH_NOMATCH);
1885              }
1886          eptr += length;          eptr += length;
1887          }          }
1888    
# Line 1835  for (;;) Line 1900  for (;;)
1900            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1901            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1902            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1903                {
1904                CHECK_PARTIAL();
1905              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
1906                }
1907            eptr += length;            eptr += length;
1908            }            }
1909          /* Control never gets here */          /* Control never gets here */
# Line 1851  for (;;) Line 1919  for (;;)
1919            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims)) break;
1920            eptr += length;            eptr += length;
1921            }            }
1922            CHECK_PARTIAL();
1923          while (eptr >= pp)          while (eptr >= pp)
1924            {            {
1925            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
# Line 1918  for (;;) Line 1987  for (;;)
1987          {          {
1988          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
1989            {            {
1990            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
1991                {
1992                CHECK_PARTIAL();
1993                RRETURN(MATCH_NOMATCH);
1994                }
1995            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
1996            if (c > 255)            if (c > 255)
1997              {              {
# Line 1936  for (;;) Line 2009  for (;;)
2009          {          {
2010          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2011            {            {
2012            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2013                {
2014                CHECK_PARTIAL();
2015                RRETURN(MATCH_NOMATCH);
2016                }
2017            c = *eptr++;            c = *eptr++;
2018            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2019            }            }
# Line 1960  for (;;) Line 2037  for (;;)
2037              {              {
2038              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2039              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2040              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2041                  {
2042                  CHECK_PARTIAL();
2043                  RRETURN(MATCH_NOMATCH);
2044                  }
2045                if (eptr >= md->end_subject)
2046                  {
2047                  SCHECK_PARTIAL();
2048                  RRETURN(MATCH_NOMATCH);
2049                  }
2050              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2051              if (c > 255)              if (c > 255)
2052                {                {
# Line 1980  for (;;) Line 2066  for (;;)
2066              {              {
2067              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2068              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2069              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2070                  {
2071                  CHECK_PARTIAL();
2072                  RRETURN(MATCH_NOMATCH);
2073                  }
2074                if (eptr >= md->end_subject)
2075                  {
2076                  SCHECK_PARTIAL();
2077                  RRETURN(MATCH_NOMATCH);
2078                  }
2079              c = *eptr++;              c = *eptr++;
2080              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2081              }              }
# Line 2013  for (;;) Line 2108  for (;;)
2108                }                }
2109              eptr += len;              eptr += len;
2110              }              }
2111              CHECK_PARTIAL();
2112            for (;;)            for (;;)
2113              {              {
2114              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
# Line 2032  for (;;) Line 2128  for (;;)
2128              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2129              eptr++;              eptr++;
2130              }              }
2131              CHECK_PARTIAL();
2132            while (eptr >= pp)            while (eptr >= pp)
2133              {              {
2134              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
# Line 2047  for (;;) Line 2144  for (;;)
2144    
2145    
2146      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2147      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2148        mode, because Unicode properties are supported in non-UTF-8 mode. */
2149    
2150  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2151      case OP_XCLASS:      case OP_XCLASS:
# Line 2088  for (;;) Line 2186  for (;;)
2186    
2187        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2188          {          {
2189          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2190          GETCHARINC(c, eptr);            {
2191              SCHECK_PARTIAL();
2192              RRETURN(MATCH_NOMATCH);
2193              }
2194            GETCHARINCTEST(c, eptr);
2195          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2196          }          }
2197    
# Line 2107  for (;;) Line 2209  for (;;)
2209            {            {
2210            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2211            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2212            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max)
2213            GETCHARINC(c, eptr);              {
2214                CHECK_PARTIAL();
2215                RRETURN(MATCH_NOMATCH);
2216                }
2217              if (eptr >= md->end_subject)
2218                {
2219                SCHECK_PARTIAL();
2220                RRETURN(MATCH_NOMATCH);
2221                }
2222              GETCHARINCTEST(c, eptr);
2223            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
2224            }            }
2225          /* Control never gets here */          /* Control never gets here */
# Line 2123  for (;;) Line 2234  for (;;)
2234            {            {
2235            int len = 1;            int len = 1;
2236            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2237            GETCHARLEN(c, eptr, len);            GETCHARLENTEST(c, eptr, len);
2238            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2239            eptr += len;            eptr += len;
2240            }            }
2241            CHECK_PARTIAL();
2242          for(;;)          for(;;)
2243            {            {
2244            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
# Line 2267  for (;;) Line 2379  for (;;)
2379      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2380      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2381    
2382      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2383    
2384      REPEATCHAR:      REPEATCHAR:
2385  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2278  for (;;) Line 2388  for (;;)
2388        length = 1;        length = 1;
2389        charptr = ecode;        charptr = ecode;
2390        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2391        ecode += length;        ecode += length;
2392    
2393        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2296  for (;;) Line 2405  for (;;)
2405    
2406          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2407            {            {
2408            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2409                memcmp(eptr, charptr, length) == 0) eptr += length;
2410  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2411            /* Need braces because of following else */            else if (oclength > 0 &&
2412            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                     eptr <= md->end_subject - oclength &&
2413                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2414    #endif  /* SUPPORT_UCP */
2415            else            else
2416              {              {
2417              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2418              eptr += oclength;              RRETURN(MATCH_NOMATCH);
2419              }              }
 #else   /* without SUPPORT_UCP */  
           else { RRETURN(MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2420            }            }
2421    
2422          if (min == max) continue;          if (min == max) continue;
# Line 2318  for (;;) Line 2427  for (;;)
2427              {              {
2428              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2429              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2430              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
2431              if (memcmp(eptr, charptr, length) == 0) eptr += length;                {
2432                  CHECK_PARTIAL();
2433                  RRETURN(MATCH_NOMATCH);
2434                  }
2435                if (eptr <= md->end_subject - length &&
2436                  memcmp(eptr, charptr, length) == 0) eptr += length;
2437  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2438              /* Need braces because of following else */              else if (oclength > 0 &&
2439              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                       eptr <= md->end_subject - oclength &&
2440                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2441    #endif  /* SUPPORT_UCP */
2442              else              else
2443                {                {
2444                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2445                eptr += oclength;                RRETURN(MATCH_NOMATCH);
2446                }                }
 #else   /* without SUPPORT_UCP */  
             else { RRETURN (MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2447              }              }
2448            /* Control never gets here */            /* Control never gets here */
2449            }            }
# Line 2340  for (;;) Line 2453  for (;;)
2453            pp = eptr;            pp = eptr;
2454            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2455              {              {
2456              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2457              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2458  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2459              else if (oclength == 0) break;              else if (oclength > 0 &&
2460              else                       eptr <= md->end_subject - oclength &&
2461                {                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
               if (memcmp(eptr, occhars, oclength) != 0) break;  
               eptr += oclength;  
               }  
 #else   /* without SUPPORT_UCP */  
             else break;  
2462  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2463                else break;
2464              }              }
2465    
2466              CHECK_PARTIAL();
2467            if (possessive) continue;            if (possessive) continue;
2468    
2469            for(;;)            for(;;)
2470             {              {
2471             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2472             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2473             if (eptr == pp) RRETURN(MATCH_NOMATCH);              if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
2474  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2475             eptr--;              eptr--;
2476             BACKCHAR(eptr);              BACKCHAR(eptr);
2477  #else   /* without SUPPORT_UCP */  #else   /* without SUPPORT_UCP */
2478             eptr -= length;              eptr -= length;
2479  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2480             }              }
2481            }            }
2482          /* Control never gets here */          /* Control never gets here */
2483          }          }
# Line 2379  for (;;) Line 2490  for (;;)
2490  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2491    
2492      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2493        {  
2494        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2495    
2496      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2497      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2400  for (;;) Line 2509  for (;;)
2509        {        {
2510        fc = md->lcc[fc];        fc = md->lcc[fc];
2511        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2512            {
2513            if (eptr >= md->end_subject)
2514              {
2515              SCHECK_PARTIAL();
2516              RRETURN(MATCH_NOMATCH);
2517              }
2518          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2519            }
2520        if (min == max) continue;        if (min == max) continue;
2521        if (minimize)        if (minimize)
2522          {          {
# Line 2408  for (;;) Line 2524  for (;;)
2524            {            {
2525            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2526            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2527            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max)
2528                fc != md->lcc[*eptr++])              {
2529                CHECK_PARTIAL();
2530              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2531                }
2532              if (eptr >= md->end_subject)
2533                {
2534                SCHECK_PARTIAL();
2535                RRETURN(MATCH_NOMATCH);
2536                }
2537              if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2538            }            }
2539          /* Control never gets here */          /* Control never gets here */
2540          }          }
# Line 2422  for (;;) Line 2546  for (;;)
2546            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2547            eptr++;            eptr++;
2548            }            }
2549    
2550            CHECK_PARTIAL();
2551          if (possessive) continue;          if (possessive) continue;
2552    
2553          while (eptr >= pp)          while (eptr >= pp)
2554            {            {
2555            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
# Line 2438  for (;;) Line 2565  for (;;)
2565    
2566      else      else
2567        {        {
2568        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2569            {
2570            if (eptr >= md->end_subject)
2571              {
2572              SCHECK_PARTIAL();
2573              RRETURN(MATCH_NOMATCH);
2574              }
2575            if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2576            }
2577        if (min == max) continue;        if (min == max) continue;
2578        if (minimize)        if (minimize)
2579          {          {
# Line 2446  for (;;) Line 2581  for (;;)
2581            {            {
2582            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2583            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2584            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max)
2585                {
2586                CHECK_PARTIAL();
2587              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2588                }
2589              if (eptr >= md->end_subject)
2590                {
2591                SCHECK_PARTIAL();
2592                RRETURN(MATCH_NOMATCH);
2593                }
2594              if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
2595            }            }
2596          /* Control never gets here */          /* Control never gets here */
2597          }          }
# Line 2459  for (;;) Line 2603  for (;;)
2603            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2604            eptr++;            eptr++;
2605            }            }
2606            CHECK_PARTIAL();
2607          if (possessive) continue;          if (possessive) continue;
2608          while (eptr >= pp)          while (eptr >= pp)
2609            {            {
# Line 2552  for (;;) Line 2697  for (;;)
2697      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2698      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2699    
2700      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
2701    
2702      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2703      fc = *ecode++;      fc = *ecode++;
2704    
2705      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2582  for (;;) Line 2724  for (;;)
2724          register unsigned int d;          register unsigned int d;
2725          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2726            {            {
2727              if (eptr >= md->end_subject)
2728                {
2729                SCHECK_PARTIAL();
2730                RRETURN(MATCH_NOMATCH);
2731                }
2732            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2733            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
2734            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
# Line 2593  for (;;) Line 2740  for (;;)
2740        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2741          {          {
2742          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2743              {
2744              if (eptr >= md->end_subject)
2745                {
2746                SCHECK_PARTIAL();
2747                RRETURN(MATCH_NOMATCH);
2748                }
2749            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2750              }
2751          }          }
2752    
2753        if (min == max) continue;        if (min == max) continue;
# Line 2609  for (;;) Line 2763  for (;;)
2763              {              {
2764              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2765              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2766                if (fi >= max)
2767                  {
2768                  CHECK_PARTIAL();
2769                  RRETURN(MATCH_NOMATCH);
2770                  }
2771                if (eptr >= md->end_subject)
2772                  {
2773                  SCHECK_PARTIAL();
2774                  RRETURN(MATCH_NOMATCH);
2775                  }
2776              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2777              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2778              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2779              }              }
2780            }            }
2781          else          else
# Line 2623  for (;;) Line 2786  for (;;)
2786              {              {
2787              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2788              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2789              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max)
2790                  {
2791                  CHECK_PARTIAL();
2792                  RRETURN(MATCH_NOMATCH);
2793                  }
2794                if (eptr >= md->end_subject)
2795                  {
2796                  SCHECK_PARTIAL();
2797                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2798                  }
2799                if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
2800              }              }
2801            }            }
2802          /* Control never gets here */          /* Control never gets here */
# Line 2650  for (;;) Line 2822  for (;;)
2822              if (fc == d) break;              if (fc == d) break;
2823              eptr += len;              eptr += len;
2824              }              }
2825            CHECK_PARTIAL();
2826          if (possessive) continue;          if (possessive) continue;
2827          for(;;)          for(;;)
2828              {              {
# Line 2668  for (;;) Line 2841  for (;;)
2841              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2842              eptr++;              eptr++;
2843              }              }
2844              CHECK_PARTIAL();
2845            if (possessive) continue;            if (possessive) continue;
2846            while (eptr >= pp)            while (eptr >= pp)
2847              {              {
# Line 2693  for (;;) Line 2867  for (;;)
2867          register unsigned int d;          register unsigned int d;
2868          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2869            {            {
2870              if (eptr >= md->end_subject)
2871                {
2872                SCHECK_PARTIAL();
2873                RRETURN(MATCH_NOMATCH);
2874                }
2875            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
2876            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) RRETURN(MATCH_NOMATCH);
2877            }            }
# Line 2702  for (;;) Line 2881  for (;;)
2881        /* Not UTF-8 mode */        /* Not UTF-8 mode */
2882          {          {
2883          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2884              {
2885              if (eptr >= md->end_subject)
2886                {
2887                SCHECK_PARTIAL();
2888                RRETURN(MATCH_NOMATCH);
2889                }
2890            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2891              }
2892          }          }
2893    
2894        if (min == max) continue;        if (min == max) continue;
# Line 2718  for (;;) Line 2904  for (;;)
2904              {              {
2905              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2906              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2907              GETCHARINC(d, eptr);              if (fi >= max)
2908              if (fi >= max || eptr >= md->end_subject || fc == d)                {
2909                  CHECK_PARTIAL();
2910                  RRETURN(MATCH_NOMATCH);
2911                  }
2912                if (eptr >= md->end_subject)
2913                  {
2914                  SCHECK_PARTIAL();
2915                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2916                  }
2917                GETCHARINC(d, eptr);
2918                if (fc == d) RRETURN(MATCH_NOMATCH);
2919              }              }
2920            }            }
2921          else          else
# Line 2731  for (;;) Line 2926  for (;;)
2926              {              {
2927              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2928              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2929              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max)
2930                  {
2931                  CHECK_PARTIAL();
2932                  RRETURN(MATCH_NOMATCH);
2933                  }
2934                if (eptr >= md->end_subject)
2935                  {
2936                  SCHECK_PARTIAL();
2937                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2938                  }
2939                if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
2940              }              }
2941            }            }
2942          /* Control never gets here */          /* Control never gets here */
# Line 2757  for (;;) Line 2961  for (;;)
2961              if (fc == d) break;              if (fc == d) break;
2962              eptr += len;              eptr += len;
2963              }              }
2964              CHECK_PARTIAL();
2965            if (possessive) continue;            if (possessive) continue;
2966            for(;;)            for(;;)
2967              {              {
# Line 2775  for (;;) Line 2980  for (;;)
2980              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2981              eptr++;              eptr++;
2982              }              }
2983              CHECK_PARTIAL();
2984            if (possessive) continue;            if (possessive) continue;
2985            while (eptr >= pp)            while (eptr >= pp)
2986              {              {
# Line 2866  for (;;) Line 3072  for (;;)
3072    
3073      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3074      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3075      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3076      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3077      and single-bytes. */      and single-bytes. */
3078    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3079      if (min > 0)      if (min > 0)
3080        {        {
3081  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2884  for (;;) Line 3087  for (;;)
3087            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3088            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3089              {              {
3090              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3091                  {
3092                  SCHECK_PARTIAL();
3093                  RRETURN(MATCH_NOMATCH);
3094                  }
3095              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3096              }              }
3097            break;            break;
# Line 2892  for (;;) Line 3099  for (;;)
3099            case PT_LAMP:            case PT_LAMP:
3100            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3101              {              {
3102              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3103                  {
3104                  SCHECK_PARTIAL();
3105                  RRETURN(MATCH_NOMATCH);
3106                  }
3107              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3108              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
3109              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
# Line 2905  for (;;) Line 3116  for (;;)
3116            case PT_GC:            case PT_GC:
3117            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3118              {              {
3119              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3120                  {
3121                  SCHECK_PARTIAL();
3122                  RRETURN(MATCH_NOMATCH);
3123                  }
3124              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3125              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3126              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
# Line 2916  for (;;) Line 3131  for (;;)
3131            case PT_PC:            case PT_PC:
3132            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3133              {              {
3134              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3135                  {
3136                  SCHECK_PARTIAL();
3137                  RRETURN(MATCH_NOMATCH);
3138                  }
3139              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3140              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
3141              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
# Line 2927  for (;;) Line 3146  for (;;)
3146            case PT_SC:            case PT_SC:
3147            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3148              {              {
3149              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3150                  {
3151                  SCHECK_PARTIAL();
3152                  RRETURN(MATCH_NOMATCH);
3153                  }
3154              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3155              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
3156              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
# Line 2947  for (;;) Line 3170  for (;;)
3170          {          {
3171          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3172            {            {
3173              if (eptr >= md->end_subject)
3174                {
3175                SCHECK_PARTIAL();
3176                RRETURN(MATCH_NOMATCH);
3177                }
3178            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3179            prop_category = UCD_CATEGORY(c);            prop_category = UCD_CATEGORY(c);
3180            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3181            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3182              {              {
3183              int len = 1;              int len = 1;
3184              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3185                {                else { GETCHARLEN(c, eptr, len); }
               GETCHARLEN(c, eptr, len);  
               }  
3186              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3187              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3188              eptr += len;              eptr += len;
# Line 2975  for (;;) Line 3201  for (;;)
3201          case OP_ANY:          case OP_ANY:
3202          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3203            {            {
3204            if (eptr >= md->end_subject || IS_NEWLINE(eptr))            if (eptr >= md->end_subject)
3205                {
3206                SCHECK_PARTIAL();
3207              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3208                }
3209              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3210            eptr++;            eptr++;
3211            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3212            }            }
# Line 2985  for (;;) Line 3215  for (;;)
3215          case OP_ALLANY:          case OP_ALLANY:
3216          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3217            {            {
3218            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3219                {
3220                SCHECK_PARTIAL();
3221                RRETURN(MATCH_NOMATCH);
3222                }
3223            eptr++;            eptr++;
3224            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3225            }            }
3226          break;          break;
3227    
3228          case OP_ANYBYTE:          case OP_ANYBYTE:
3229            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3230          eptr += min;          eptr += min;
3231          break;          break;
3232    
3233          case OP_ANYNL:          case OP_ANYNL:
3234          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3235            {            {
3236            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3237                {
3238                SCHECK_PARTIAL();
3239                RRETURN(MATCH_NOMATCH);
3240                }
3241            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3242            switch(c)            switch(c)
3243              {              {
# Line 3024  for (;;) Line 3263  for (;;)
3263          case OP_NOT_HSPACE:          case OP_NOT_HSPACE:
3264          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3265            {            {
3266            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3267                {
3268                SCHECK_PARTIAL();
3269                RRETURN(MATCH_NOMATCH);
3270                }
3271            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3272            switch(c)            switch(c)
3273              {              {
# Line 3056  for (;;) Line 3299  for (;;)
3299          case OP_HSPACE:          case OP_HSPACE:
3300          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3301            {            {
3302            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3303                {
3304                SCHECK_PARTIAL();
3305                RRETURN(MATCH_NOMATCH);
3306                }
3307            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3308            switch(c)            switch(c)
3309              {              {
# Line 3088  for (;;) Line 3335  for (;;)
3335          case OP_NOT_VSPACE:          case OP_NOT_VSPACE:
3336          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3337            {            {
3338            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3339                {
3340                SCHECK_PARTIAL();
3341                RRETURN(MATCH_NOMATCH);
3342                }
3343            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3344            switch(c)            switch(c)
3345              {              {
# Line 3108  for (;;) Line 3359  for (;;)
3359          case OP_VSPACE:          case OP_VSPACE:
3360          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3361            {            {
3362            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3363                {
3364                SCHECK_PARTIAL();
3365                RRETURN(MATCH_NOMATCH);
3366                }
3367            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3368            switch(c)            switch(c)
3369              {              {
# Line 3128  for (;;) Line 3383  for (;;)
3383          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3384          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3385            {            {
3386            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3387                {
3388                SCHECK_PARTIAL();
3389                RRETURN(MATCH_NOMATCH);
3390                }
3391            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3392            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3393              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 3138  for (;;) Line 3397  for (;;)
3397          case OP_DIGIT:          case OP_DIGIT:
3398          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3399            {            {
3400            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3401               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3402                SCHECK_PARTIAL();
3403                RRETURN(MATCH_NOMATCH);
3404                }
3405              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3406              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3407            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3408            }            }
# Line 3148  for (;;) Line 3411  for (;;)
3411          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3412          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3413            {            {
3414            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3415               (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))              {
3416                SCHECK_PARTIAL();
3417                RRETURN(MATCH_NOMATCH);
3418                }
3419              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3420              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3421            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3422            }            }
# Line 3158  for (;;) Line 3425  for (;;)
3425          case OP_WHITESPACE:          case OP_WHITESPACE:
3426          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3427            {            {
3428            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3429               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3430                SCHECK_PARTIAL();
3431                RRETURN(MATCH_NOMATCH);
3432                }
3433              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3434              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3435            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3436            }            }
# Line 3178  for (;;) Line 3449  for (;;)
3449          case OP_WORDCHAR:          case OP_WORDCHAR:
3450          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3451            {            {
3452            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3453               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3454                SCHECK_PARTIAL();
3455                RRETURN(MATCH_NOMATCH);
3456                }
3457              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3458              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3459            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3460            }            }
# Line 3193  for (;;) Line 3468  for (;;)
3468  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3469    
3470        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3471        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
3472    
3473        switch(ctype)        switch(ctype)
3474          {          {
3475          case OP_ANY:          case OP_ANY:
3476          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3477            {            {
3478              if (eptr >= md->end_subject)
3479                {
3480                SCHECK_PARTIAL();
3481                RRETURN(MATCH_NOMATCH);
3482                }
3483            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3484            eptr++;            eptr++;
3485            }            }
3486          break;          break;
3487    
3488          case OP_ALLANY:          case OP_ALLANY:
3489            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3490          eptr += min;          eptr += min;
3491          break;          break;
3492    
3493          case OP_ANYBYTE:          case OP_ANYBYTE:
3494            if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
3495          eptr += min;          eptr += min;
3496          break;          break;
3497    
         /* Because of the CRLF case, we can't assume the minimum number of  
         bytes are present in this case. */  
   
3498          case OP_ANYNL:          case OP_ANYNL:
3499          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3500            {            {
3501            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3502                {
3503                SCHECK_PARTIAL();
3504                RRETURN(MATCH_NOMATCH);
3505                }
3506            switch(*eptr++)            switch(*eptr++)
3507              {              {
3508              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3242  for (;;) Line 3524  for (;;)
3524          case OP_NOT_HSPACE:          case OP_NOT_HSPACE:
3525          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3526            {            {
3527            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3528                {
3529                SCHECK_PARTIAL();
3530                RRETURN(MATCH_NOMATCH);
3531                }
3532            switch(*eptr++)            switch(*eptr++)
3533              {              {
3534              default: break;              default: break;
# Line 3257  for (;;) Line 3543  for (;;)
3543          case OP_HSPACE:          case OP_HSPACE:
3544          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3545            {            {
3546            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3547                {
3548                SCHECK_PARTIAL();
3549                RRETURN(MATCH_NOMATCH);
3550                }
3551            switch(*eptr++)            switch(*eptr++)
3552              {              {
3553              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3272  for (;;) Line 3562  for (;;)
3562          case OP_NOT_VSPACE:          case OP_NOT_VSPACE:
3563          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3564            {            {
3565            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3566                {
3567                SCHECK_PARTIAL();
3568                RRETURN(MATCH_NOMATCH);
3569                }
3570            switch(*eptr++)            switch(*eptr++)
3571              {              {
3572              default: break;              default: break;
# Line 3289  for (;;) Line 3583  for (;;)
3583          case OP_VSPACE:          case OP_VSPACE:
3584          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3585            {            {
3586            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3587                {
3588                SCHECK_PARTIAL();
3589                RRETURN(MATCH_NOMATCH);
3590                }
3591            switch(*eptr++)            switch(*eptr++)
3592              {              {
3593              default: RRETURN(MATCH_NOMATCH);              default: RRETURN(MATCH_NOMATCH);
# Line 3305  for (;;) Line 3603  for (;;)
3603    
3604          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3605          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3606              {
3607              if (eptr >= md->end_subject)
3608                {
3609                SCHECK_PARTIAL();
3610                RRETURN(MATCH_NOMATCH);
3611                }
3612            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3613              }
3614          break;          break;
3615    
3616          case OP_DIGIT:          case OP_DIGIT:
3617          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3618              {
3619              if (eptr >= md->end_subject)
3620                {
3621                SCHECK_PARTIAL();
3622                RRETURN(MATCH_NOMATCH);
3623                }
3624            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
3625              }
3626          break;          break;
3627    
3628          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3629          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3630              {
3631              if (eptr >= md->end_subject)
3632                {
3633                SCHECK_PARTIAL();
3634                RRETURN(MATCH_NOMATCH);
3635                }
3636            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
3637              }
3638          break;          break;
3639    
3640          case OP_WHITESPACE:          case OP_WHITESPACE:
3641          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3642              {
3643              if (eptr >= md->end_subject)
3644                {
3645                SCHECK_PARTIAL();
3646                RRETURN(MATCH_NOMATCH);
3647                }
3648            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
3649              }
3650          break;          break;
3651    
3652          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3653          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3654              {
3655              if (eptr >= md->end_subject)
3656                {
3657                SCHECK_PARTIAL();
3658                RRETURN(MATCH_NOMATCH);
3659                }
3660            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
3661              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3662              }
3663          break;          break;
3664    
3665          case OP_WORDCHAR:          case OP_WORDCHAR:
3666          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3667              {
3668              if (eptr >= md->end_subject)
3669                {
3670                SCHECK_PARTIAL();
3671                RRETURN(MATCH_NOMATCH);
3672                }
3673            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
3674              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3675              }
3676          break;          break;
3677    
3678          default:          default:
# Line 3360  for (;;) Line 3700  for (;;)
3700              {              {
3701              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3702              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3703              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3704                  {
3705                  CHECK_PARTIAL();
3706                  RRETURN(MATCH_NOMATCH);
3707                  }
3708                if (eptr >= md->end_subject)
3709                  {
3710                  SCHECK_PARTIAL();
3711                  RRETURN(MATCH_NOMATCH);
3712                  }
3713              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3714              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) RRETURN(MATCH_NOMATCH);
3715              }              }
# Line 3371  for (;;) Line 3720  for (;;)
3720              {              {
3721              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3722              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3723              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3724                  {
3725                  CHECK_PARTIAL();
3726                  RRETURN(MATCH_NOMATCH);
3727                  }
3728                if (eptr >= md->end_subject)
3729                  {
3730                  SCHECK_PARTIAL();
3731                  RRETURN(MATCH_NOMATCH);
3732                  }
3733              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3734              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
3735              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
# Line 3386  for (;;) Line 3744  for (;;)
3744              {              {
3745              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3746              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3747              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3748                  {
3749                  CHECK_PARTIAL();
3750                  RRETURN(MATCH_NOMATCH);
3751                  }
3752                if (eptr >= md->end_subject)
3753                  {
3754                  SCHECK_PARTIAL();
3755                  RRETURN(MATCH_NOMATCH);
3756                  }
3757              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3758              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3759              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
# Line 3399  for (;;) Line 3766  for (;;)
3766              {              {
3767              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3768              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3769              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3770                  {
3771                  CHECK_PARTIAL();
3772                  RRETURN(MATCH_NOMATCH);
3773                  }
3774                if (eptr >= md->end_subject)
3775                  {
3776                  SCHECK_PARTIAL();
3777                  RRETURN(MATCH_NOMATCH);
3778                  }
3779              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3780              prop_chartype = UCD_CHARTYPE(c);              prop_chartype = UCD_CHARTYPE(c);
3781              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
# Line 3412  for (;;) Line 3788  for (;;)
3788              {              {
3789              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3790              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3791              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max)
3792                  {
3793                  CHECK_PARTIAL();
3794                  RRETURN(MATCH_NOMATCH);
3795                  }
3796                if (eptr >= md->end_subject)
3797                  {
3798                  SCHECK_PARTIAL();
3799                  RRETURN(MATCH_NOMATCH);
3800                  }
3801              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3802              prop_script = UCD_SCRIPT(c);              prop_script = UCD_SCRIPT(c);
3803              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
# Line 3434  for (;;) Line 3819  for (;;)
3819            {            {
3820            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3821            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3822            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max)
3823                {
3824                CHECK_PARTIAL();
3825                RRETURN(MATCH_NOMATCH);
3826                }
3827              if (eptr >= md->end_subject)
3828                {
3829                SCHECK_PARTIAL();
3830                RRETURN(MATCH_NOMATCH);
3831                }
3832            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3833            prop_category = UCD_CATEGORY(c);            prop_category = UCD_CATEGORY(c);
3834            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3835            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3836              {              {
3837              int len = 1;              int len = 1;
3838              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3839                {                else { GETCHARLEN(c, eptr, len); }
               GETCHARLEN(c, eptr, len);  
               }  
3840              prop_category = UCD_CATEGORY(c);              prop_category = UCD_CATEGORY(c);
3841              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3842              eptr += len;              eptr += len;
# Line 3463  for (;;) Line 3855  for (;;)
3855            {            {
3856            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3857            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3858            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max)
3859                 (ctype == OP_ANY && IS_NEWLINE(eptr)))              {
3860                CHECK_PARTIAL();
3861                RRETURN(MATCH_NOMATCH);
3862                }
3863              if (eptr >= md->end_subject)
3864                {
3865                SCHECK_PARTIAL();
3866                RRETURN(MATCH_NOMATCH);
3867                }
3868              if (ctype == OP_ANY && IS_NEWLINE(eptr))
3869              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
   
3870            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3871            switch(ctype)            switch(ctype)
3872              {              {
# Line 3622  for (;;) Line 4022  for (;;)
4022            {            {
4023            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4024            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4025            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max)
4026                 (ctype == OP_ANY && IS_NEWLINE(eptr)))              {
4027                CHECK_PARTIAL();
4028                RRETURN(MATCH_NOMATCH);
4029                }
4030              if (eptr >= md->end_subject)
4031                {
4032                SCHECK_PARTIAL();
4033                RRETURN(MATCH_NOMATCH);
4034                }
4035              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4036              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
   
4037            c = *eptr++;            c = *eptr++;
4038            switch(ctype)            switch(ctype)
4039              {              {
# Line 3814  for (;;) Line 4222  for (;;)
4222    
4223          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4224    
4225            CHECK_PARTIAL();
4226          if (possessive) continue;          if (possessive) continue;
4227          for(;;)          for(;;)
4228            {            {
# Line 3850  for (;;) Line 4259  for (;;)
4259    
4260          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4261    
4262            CHECK_PARTIAL();
4263          if (possessive) continue;          if (possessive) continue;
4264          for(;;)          for(;;)
4265            {            {
# Line 4086  for (;;) Line 4496  for (;;)
4496    
4497          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4498    
4499            CHECK_PARTIAL();
4500          if (possessive) continue;          if (possessive) continue;
4501          for(;;)          for(;;)
4502            {            {
# Line 4241  for (;;) Line 4652  for (;;)
4652    
4653          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4654    
4655            CHECK_PARTIAL();
4656          if (possessive) continue;          if (possessive) continue;
4657          while (eptr >= pp)          while (eptr >= pp)
4658            {            {
# Line 4408  const uschar *tables; Line 4820  const uschar *tables;
4820  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4821  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4822  USPTR end_subject;  USPTR end_subject;
4823    USPTR start_partial = NULL;
4824  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
4825    
4826  pcre_study_data internal_study;  pcre_study_data internal_study;
# Line 4491  md->jscript_compat = (re->options & PCRE Line 4904  md->jscript_compat = (re->options & PCRE
4904  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4905  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
4906  md->notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
4907  md->partial = (options & PCRE_PARTIAL) != 0;  md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
4908                  ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
4909  md->hitend = FALSE;  md->hitend = FALSE;
4910    
4911  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
# Line 4532  switch ((((options & PCRE_NEWLINE_BITS) Line 4946  switch ((((options & PCRE_NEWLINE_BITS)
4946          (pcre_uint32)options) & PCRE_NEWLINE_BITS)          (pcre_uint32)options) & PCRE_NEWLINE_BITS)
4947    {    {
4948    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
4949    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
4950    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
4951    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
4952         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
4953    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
4954    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4955    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
# Line 4565  else Line 4979  else
4979      }      }
4980    }    }
4981    
4982  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching was originally supported only for a restricted set of
4983  moment. */  regexes; from release 8.00 there are no restrictions, but the bits are still
4984    defined (though never set). So there's no harm in leaving this code. */
4985    
4986  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
4987    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
# Line 4577  back the character offset. */ Line 4992  back the character offset. */
4992  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4993  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
4994    {    {
4995    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
4996      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
4997    if (start_offset > 0 && start_offset < length)    if (start_offset > 0 && start_offset < length)
4998      {      {
4999      int tb = ((uschar *)subject)[start_offset];      int tb = ((USPTR)subject)[start_offset];
5000      if (tb > 127)      if (tb > 127)
5001        {        {
5002        tb &= 0xc0;        tb &= 0xc0;
# Line 4687  for(;;) Line 5102  for(;;)
5102      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
5103      }      }
5104    
5105    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* If firstline is TRUE, the start of the match is constrained to the first
5106    start of the match is constrained to the first line of a multiline string.    line of a multiline string. That is, the match must be before or at the first
5107    That is, the match must be before or at the first newline. Implement this by    newline. Implement this by temporarily adjusting end_subject so that we stop
5108    temporarily adjusting end_subject so that we stop scanning at a newline. If    scanning at a newline. If the match fails at the newline, later code breaks
5109    the match fails at the newline, later code breaks this loop. */    this loop. */
5110    
5111    if (firstline)    if (firstline)
5112      {      {
5113      USPTR t = start_match;      USPTR t = start_match;
5114    #ifdef SUPPORT_UTF8
5115        if (utf8)
5116          {
5117          while (t < md->end_subject && !IS_NEWLINE(t))
5118            {
5119            t++;
5120            while (t < end_subject && (*t & 0xc0) == 0x80) t++;
5121            }
5122          }
5123        else
5124    #endif
5125      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
5126      end_subject = t;      end_subject = t;
5127      }      }
5128    
5129    /* Now test for a unique first byte */    /* There are some optimizations that avoid running the match if a known
5130      starting point is not found, or if a known later character is not present.
5131      However, there is an option that disables these, for testing and for ensuring
5132      that all callouts do actually occur. */
5133    
5134    if (first_byte >= 0)    if ((options & PCRE_NO_START_OPTIMIZE) == 0)
5135      {      {
5136      if (first_byte_caseless)      /* Advance to a unique first byte if there is one. */
       while (start_match < end_subject &&  
              md->lcc[*start_match] != first_byte)  
         { NEXTCHAR(start_match); }  
     else  
       while (start_match < end_subject && *start_match != first_byte)  
         { NEXTCHAR(start_match); }  
     }  
5137    
5138    /* Or to just after a linebreak for a multiline match if possible */      if (first_byte >= 0)
5139          {
5140          if (first_byte_caseless)
5141            while (start_match < end_subject && md->lcc[*start_match] != first_byte)
5142              start_match++;
5143          else
5144            while (start_match < end_subject && *start_match != first_byte)
5145              start_match++;
5146          }
5147    
5148    else if (startline)      /* Or to just after a linebreak for a multiline match */
5149      {  
5150      if (start_match > md->start_subject + start_offset)      else if (startline)
5151        {        {
5152        while (start_match <= end_subject && !WAS_NEWLINE(start_match))        if (start_match > md->start_subject + start_offset)
5153          { NEXTCHAR(start_match); }          {
5154    #ifdef SUPPORT_UTF8
5155            if (utf8)
5156              {
5157              while (start_match < end_subject && !WAS_NEWLINE(start_match))
5158                {
5159                start_match++;
5160                while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
5161                  start_match++;
5162                }
5163              }
5164            else
5165    #endif
5166            while (start_match < end_subject && !WAS_NEWLINE(start_match))
5167              start_match++;
5168    
5169            /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
5170            and we are now at a LF, advance the match position by one more character.
5171            */
5172    
5173        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,          if (start_match[-1] == CHAR_CR &&
5174        and we are now at a LF, advance the match position by one more character.               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
5175        */               start_match < end_subject &&
5176                 *start_match == CHAR_NL)
5177        if (start_match[-1] == '\r' &&            start_match++;
5178             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&          }
            start_match < end_subject &&  
            *start_match == '\n')  
         start_match++;  
5179        }        }
     }  
5180    
5181    /* Or to a non-unique first char after study */      /* Or to a non-unique first byte after study */
5182    
5183    else if (start_bits != NULL)      else if (start_bits != NULL)
     {  
     while (start_match < end_subject)  
5184        {        {
5185        register unsigned int c = *start_match;        while (start_match < end_subject)
5186        if ((start_bits[c/8] & (1 << (c&7))) == 0)          {
5187          { NEXTCHAR(start_match); }          register unsigned int c = *start_match;
5188        else break;          if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
5189              else break;
5190            }
5191        }        }
5192      }      }   /* Starting optimizations */
5193    
5194    /* Restore fudged end_subject */    /* Restore fudged end_subject */
5195    
# Line 4757  for(;;) Line 5201  for(;;)
5201    printf("\n");    printf("\n");
5202  #endif  #endif
5203    
5204    /* If req_byte is set, we know that that character must appear in the subject    /* If req_byte is set, we know that that character must appear in the
5205    for the match to succeed. If the first character is set, req_byte must be    subject for the match to succeed. If the first character is set, req_byte
5206    later in the subject; otherwise the test starts at the match point. This    must be later in the subject; otherwise the test starts at the match point.
5207    optimization can save a huge amount of backtracking in patterns with nested    This optimization can save a huge amount of backtracking in patterns with
5208    unlimited repeats that aren't going to match. Writing separate code for    nested unlimited repeats that aren't going to match. Writing separate code
5209    cased/caseless versions makes it go faster, as does using an autoincrement    for cased/caseless versions makes it go faster, as does using an
5210    and backing off on a match.    autoincrement and backing off on a match.
5211    
5212    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end
5213    take a long time, and give bad performance on quite ordinary patterns. This    can take a long time, and give bad performance on quite ordinary patterns.
5214    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte    This showed up when somebody was matching something like /^\d+C/ on a
5215    string... so we don't do this when the string is sufficiently long.    32-megabyte string... so we don't do this when the string is sufficiently
5216      long.
5217    
5218    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested, or if
5219    */    disabling is explicitly requested. */
5220    
5221    if (req_byte >= 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
5222          req_byte >= 0 &&
5223        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
5224        !md->partial)        !md->partial)
5225      {      {
# Line 4817  for(;;) Line 5263  for(;;)
5263        }        }
5264      }      }
5265    
5266    /* OK, we can now run the match. */    /* OK, we can now run the match. If "hitend" is set afterwards, remember the
5267      first starting point for which a partial match was found. */
5268    
5269    md->start_match_ptr = start_match;    md->start_match_ptr = start_match;
5270    md->match_call_count = 0;    md->match_call_count = 0;
5271    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
5272      if (md->hitend && start_partial == NULL) start_partial = start_match;
5273    
5274    switch(rc)    switch(rc)
5275      {      {
# Line 4881  for(;;) Line 5329  for(;;)
5329    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
5330    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. */
5331    
5332    if (start_match[-1] == '\r' &&    if (start_match[-1] == CHAR_CR &&
5333        start_match < end_subject &&        start_match < end_subject &&
5334        *start_match == '\n' &&        *start_match == CHAR_NL &&
5335        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
5336          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
5337           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||
# Line 4957  if (using_temporary_offsets) Line 5405  if (using_temporary_offsets)
5405    (pcre_free)(md->offset_vector);    (pcre_free)(md->offset_vector);
5406    }    }
5407    
5408  if (rc != MATCH_NOMATCH)  if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
5409    {    {
5410    DPRINTF((">>>> error: returning %d\n", rc));    DPRINTF((">>>> error: returning %d\n", rc));
5411    return rc;    return rc;
5412    }    }
5413  else if (md->partial && md->hitend)  else if (start_partial != NULL)
5414    {    {
5415    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
5416      if (offsetcount > 1)
5417        {
5418        offsets[0] = start_partial - (USPTR)subject;
5419        offsets[1] = end_subject - (USPTR)subject;
5420        }
5421    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;
5422    }    }
5423  else  else

Legend:
Removed from v.359  
changed lines
  Added in v.427

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12