/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 165 by ph10, Wed May 9 10:50:57 2007 UTC revision 510 by ph10, Sat Mar 27 17:45:29 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 53  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
   
 #define EPTR_WORK_SIZE (1000)  
   
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 70  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_SKIP_ARG     (-996)
78    #define MATCH_THEN         (-995)
79    
80    /* This is a convenience macro for code that occurs many times. */
81    
82    #define MRRETURN(ra) \
83      { \
84      md->mark = markptr; \
85      RRETURN(ra); \
86      }
87    
88  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
89  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
90  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 83  static const char rep_max[] = { 0, 0, 0, Line 98  static const char rep_max[] = { 0, 0, 0,
98    
99    
100    
101  #ifdef DEBUG  #ifdef PCRE_DEBUG
102  /*************************************************  /*************************************************
103  *        Debugging function to print chars       *  *        Debugging function to print chars       *
104  *************************************************/  *************************************************/
# Line 135  match_ref(int offset, register USPTR ept Line 150  match_ref(int offset, register USPTR ept
150  {  {
151  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
152    
153  #ifdef DEBUG  #ifdef PCRE_DEBUG
154  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
155    printf("matching subject <null>");    printf("matching subject <null>");
156  else  else
# Line 152  printf("\n"); Line 167  printf("\n");
167    
168  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
169    
170  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
171    properly if Unicode properties are supported. Otherwise, we can check only
172    ASCII characters. */
173    
174  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
175    {    {
176    #ifdef SUPPORT_UTF8
177    #ifdef SUPPORT_UCP
178      if (md->utf8)
179        {
180        USPTR endptr = eptr + length;
181        while (eptr < endptr)
182          {
183          int c, d;
184          GETCHARINC(c, eptr);
185          GETCHARINC(d, p);
186          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
187          }
188        }
189      else
190    #endif
191    #endif
192    
193      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
194      is no UCP support. */
195    
196    while (length-- > 0)    while (length-- > 0)
197      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
198    }    }
199    
200    /* In the caseful case, we can just compare the bytes, whether or not we
201    are in UTF-8 mode. */
202    
203  else  else
204    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
205    
# Line 205  variable instead of being passed in the Line 246  variable instead of being passed in the
246  ****************************************************************************  ****************************************************************************
247  ***************************************************************************/  ***************************************************************************/
248    
249    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
250  /* Numbers for RMATCH calls */  below must be updated in sync.  */
251    
252  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
253         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
254         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
255         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
256         RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
257           RM51,  RM52, RM53, RM54 };
258    
259  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
260  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
261  actuall used in this definition. */  actually used in this definition. */
262    
263  #ifndef NO_RECURSE  #ifndef NO_RECURSE
264  #define REGISTER register  #define REGISTER register
265    
266  #ifdef DEBUG  #ifdef PCRE_DEBUG
267  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
268    { \    { \
269    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
270    rrc = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
271    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
272    }    }
273  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 236  actuall used in this definition. */ Line 277  actuall used in this definition. */
277    }    }
278  #else  #else
279  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
280    rrc = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
281  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
282  #endif  #endif
283    
# Line 255  argument of match(), which never changes Line 296  argument of match(), which never changes
296    frame->Xwhere = rw; \    frame->Xwhere = rw; \
297    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
298    newframe->Xecode = rb;\    newframe->Xecode = rb;\
299      newframe->Xmstart = mstart;\
300      newframe->Xmarkptr = markptr;\
301    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
302    newframe->Xims = re;\    newframe->Xims = re;\
303    newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
# Line 289  typedef struct heapframe { Line 332  typedef struct heapframe {
332    
333    /* Function arguments that may change */    /* Function arguments that may change */
334    
335    const uschar *Xeptr;    USPTR Xeptr;
336    const uschar *Xecode;    const uschar *Xecode;
337      USPTR Xmstart;
338      USPTR Xmarkptr;
339    int Xoffset_top;    int Xoffset_top;
340    long int Xims;    long int Xims;
341    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 299  typedef struct heapframe { Line 344  typedef struct heapframe {
344    
345    /* Function local variables */    /* Function local variables */
346    
347    const uschar *Xcallpat;    USPTR Xcallpat;
348    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
349    const uschar *Xdata;    USPTR Xcharptr;
350    const uschar *Xnext;  #endif
351    const uschar *Xpp;    USPTR Xdata;
352    const uschar *Xprev;    USPTR Xnext;
353    const uschar *Xsaved_eptr;    USPTR Xpp;
354      USPTR Xprev;
355      USPTR Xsaved_eptr;
356    
357    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
358    
# Line 326  typedef struct heapframe { Line 373  typedef struct heapframe {
373    uschar Xocchars[8];    uschar Xocchars[8];
374  #endif  #endif
375    
376      int Xcodelink;
377    int Xctype;    int Xctype;
378    unsigned int Xfc;    unsigned int Xfc;
379    int Xfi;    int Xfi;
# Line 361  typedef struct heapframe { Line 409  typedef struct heapframe {
409    
410  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
411  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
412  same response.  same response. */
413    
414    /* These macros pack up tests that are used for partial matching, and which
415    appears several times in the code. We set the "hit end" flag if the pointer is
416    at the end of the subject and also past the start of the subject (i.e.
417    something has been matched). For hard partial matching, we then return
418    immediately. The second one is used when we already know we are past the end of
419    the subject. */
420    
421    #define CHECK_PARTIAL()\
422      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
423        {\
424        md->hitend = TRUE;\
425        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
426        }
427    
428    #define SCHECK_PARTIAL()\
429      if (md->partial != 0 && eptr > mstart)\
430        {\
431        md->hitend = TRUE;\
432        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
433        }
434    
435  Performance note: It might be tempting to extract commonly used fields from the  
436  md structure (e.g. utf8, end_subject) into individual variables to improve  /* Performance note: It might be tempting to extract commonly used fields from
437    the md structure (e.g. utf8, end_subject) into individual variables to improve
438  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
439  made performance worse.  made performance worse.
440    
441  Arguments:  Arguments:
442     eptr        pointer to current character in subject     eptr        pointer to current character in subject
443     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
444       mstart      pointer to the current match start position (can be modified
445                     by encountering \K)
446       markptr     pointer to the most recent MARK name, or NULL
447     offset_top  current top pointer     offset_top  current top pointer
448     md          pointer to "static" info for the match     md          pointer to "static" info for the match
449     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 380  Arguments: Line 453  Arguments:
453                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
454                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
455                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
456     rdepth      the recursion depth     rdepth      the recursion depth
457    
458  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
459                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
460                   a negative MATCH_xxx value for PRUNE, SKIP, etc
461                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
462                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
463  */  */
464    
465  static int  static int
466  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
467    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
468    int flags, unsigned int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
469  {  {
470  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
471  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 404  register unsigned int c; /* Character Line 477  register unsigned int c; /* Character
477  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
478    
479  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
480    int condcode;
481    
482  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
483  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 418  frame->Xprevframe = NULL; /* Line 492  frame->Xprevframe = NULL; /*
492    
493  frame->Xeptr = eptr;  frame->Xeptr = eptr;
494  frame->Xecode = ecode;  frame->Xecode = ecode;
495    frame->Xmstart = mstart;
496    frame->Xmarkptr = markptr;
497  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
498  frame->Xims = ims;  frame->Xims = ims;
499  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 432  HEAP_RECURSE: Line 508  HEAP_RECURSE:
508    
509  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
510  #define ecode              frame->Xecode  #define ecode              frame->Xecode
511    #define mstart             frame->Xmstart
512    #define markptr            frame->Xmarkptr
513  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
514  #define ims                frame->Xims  #define ims                frame->Xims
515  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 444  HEAP_RECURSE: Line 522  HEAP_RECURSE:
522  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
523  #endif  #endif
524  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
525    #define codelink           frame->Xcodelink
526  #define data               frame->Xdata  #define data               frame->Xdata
527  #define next               frame->Xnext  #define next               frame->Xnext
528  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 524  int oclength; Line 603  int oclength;
603  uschar occhars[8];  uschar occhars[8];
604  #endif  #endif
605    
606    int codelink;
607  int ctype;  int ctype;
608  int length;  int length;
609  int max;  int max;
# Line 557  TAIL_RECURSE: Line 637  TAIL_RECURSE:
637  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
638  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
639  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
640  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
641  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
642  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
643  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
# Line 580  original_ims = ims; /* Save for reset Line 660  original_ims = ims; /* Save for reset
660  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
661  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
662  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
663  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
664  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
665  already used. */  block that is used is on the stack, so a new one may be required for each
666    match(). */
667    
668  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
669    {    {
670    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
671    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
672      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
673    }    }
674    
675  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 605  for (;;) Line 679  for (;;)
679    minimize = possessive = FALSE;    minimize = possessive = FALSE;
680    op = *ecode;    op = *ecode;
681    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
682    switch(op)    switch(op)
683      {      {
684        case OP_MARK:
685        markptr = ecode + 2;
686        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
687          ims, eptrb, flags, RM51);
688    
689        /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
690        argument, and we must check whether that argument matches this MARK's
691        argument. It is passed back in md->start_match_ptr (an overloading of that
692        variable). If it does match, we reset that variable to the current subject
693        position and return MATCH_SKIP. Otherwise, pass back the return code
694        unaltered. */
695    
696        if (rrc == MATCH_SKIP_ARG &&
697            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
698          {
699          md->start_match_ptr = eptr;
700          RRETURN(MATCH_SKIP);
701          }
702    
703        if (md->mark == NULL) md->mark = markptr;
704        RRETURN(rrc);
705    
706        case OP_FAIL:
707        MRRETURN(MATCH_NOMATCH);
708    
709        case OP_COMMIT:
710        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
711          ims, eptrb, flags, RM52);
712        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
713        MRRETURN(MATCH_COMMIT);
714    
715        case OP_PRUNE:
716        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
717          ims, eptrb, flags, RM51);
718        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
719        MRRETURN(MATCH_PRUNE);
720    
721        case OP_PRUNE_ARG:
722        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
723          ims, eptrb, flags, RM51);
724        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
725        md->mark = ecode + 2;
726        RRETURN(MATCH_PRUNE);
727    
728        case OP_SKIP:
729        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
730          ims, eptrb, flags, RM53);
731        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
732        md->start_match_ptr = eptr;   /* Pass back current position */
733        MRRETURN(MATCH_SKIP);
734    
735        case OP_SKIP_ARG:
736        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
737          ims, eptrb, flags, RM53);
738        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
739    
740        /* Pass back the current skip name by overloading md->start_match_ptr and
741        returning the special MATCH_SKIP_ARG return code. This will either be
742        caught by a matching MARK, or get to the top, where it is treated the same
743        as PRUNE. */
744    
745        md->start_match_ptr = ecode + 2;
746        RRETURN(MATCH_SKIP_ARG);
747    
748        case OP_THEN:
749        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
750          ims, eptrb, flags, RM54);
751        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
752        MRRETURN(MATCH_THEN);
753    
754        case OP_THEN_ARG:
755        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
756          ims, eptrb, flags, RM54);
757        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
758        md->mark = ecode + 2;
759        RRETURN(MATCH_THEN);
760    
761      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
762      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
763      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 634  for (;;) Line 777  for (;;)
777      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
778      offset = number << 1;      offset = number << 1;
779    
780  #ifdef DEBUG  #ifdef PCRE_DEBUG
781      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
782      printf("subject=");      printf("subject=");
783      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
# Line 656  for (;;) Line 799  for (;;)
799          {          {
800          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
801            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
802          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
803          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
804          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
805          }          }
# Line 668  for (;;) Line 811  for (;;)
811        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
812        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
813    
814          if (rrc != MATCH_THEN) md->mark = markptr;
815        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
816        }        }
817    
818      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
819      bracket. */      as a non-capturing bracket. */
820    
821        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
822        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
823    
824      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
825    
826        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
827        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
828    
829      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
830      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
831      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
832      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
833        is set.*/
834    
835      case OP_BRA:      case OP_BRA:
836      case OP_SBRA:      case OP_SBRA:
# Line 687  for (;;) Line 838  for (;;)
838      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
839      for (;;)      for (;;)
840        {        {
841        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
842          {          {
843          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
844          flags |= match_tail_recursed;            {
845          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
846          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
847              goto TAIL_RECURSE;
848              }
849    
850            /* Possibly empty group; can't use tail recursion. */
851    
852            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
853              eptrb, flags, RM48);
854            if (rrc == MATCH_NOMATCH) md->mark = markptr;
855            RRETURN(rrc);
856          }          }
857    
858        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
# Line 700  for (;;) Line 860  for (;;)
860    
861        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
862          eptrb, flags, RM2);          eptrb, flags, RM2);
863        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
864        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
865        }        }
866      /* Control never reaches here. */      /* Control never reaches here. */
# Line 713  for (;;) Line 873  for (;;)
873    
874      case OP_COND:      case OP_COND:
875      case OP_SCOND:      case OP_SCOND:
876      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
877    
878        /* Because of the way auto-callout works during compile, a callout item is
879        inserted between OP_COND and an assertion condition. */
880    
881        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
882        {        {
883        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        if (pcre_callout != NULL)
884        condition = md->recursive != NULL &&          {
885          (offset == RREF_ANY || offset == md->recursive->group_num);          pcre_callout_block cb;
886        ecode += condition? 3 : GET(ecode, 1);          cb.version          = 1;   /* Version 1 of the callout block */
887            cb.callout_number   = ecode[LINK_SIZE+2];
888            cb.offset_vector    = md->offset_vector;
889            cb.subject          = (PCRE_SPTR)md->start_subject;
890            cb.subject_length   = md->end_subject - md->start_subject;
891            cb.start_match      = mstart - md->start_subject;
892            cb.current_position = eptr - md->start_subject;
893            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
894            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
895            cb.capture_top      = offset_top/2;
896            cb.capture_last     = md->capture_last;
897            cb.callout_data     = md->callout_data;
898            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
899            if (rrc < 0) RRETURN(rrc);
900            }
901          ecode += _pcre_OP_lengths[OP_CALLOUT];
902          }
903    
904        condcode = ecode[LINK_SIZE+1];
905    
906        /* Now see what the actual condition is */
907    
908        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
909          {
910          if (md->recursive == NULL)                /* Not recursing => FALSE */
911            {
912            condition = FALSE;
913            ecode += GET(ecode, 1);
914            }
915          else
916            {
917            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
918            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
919    
920            /* If the test is for recursion into a specific subpattern, and it is
921            false, but the test was set up by name, scan the table to see if the
922            name refers to any other numbers, and test them. The condition is true
923            if any one is set. */
924    
925            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
926              {
927              uschar *slotA = md->name_table;
928              for (i = 0; i < md->name_count; i++)
929                {
930                if (GET2(slotA, 0) == recno) break;
931                slotA += md->name_entry_size;
932                }
933    
934              /* Found a name for the number - there can be only one; duplicate
935              names for different numbers are allowed, but not vice versa. First
936              scan down for duplicates. */
937    
938              if (i < md->name_count)
939                {
940                uschar *slotB = slotA;
941                while (slotB > md->name_table)
942                  {
943                  slotB -= md->name_entry_size;
944                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
945                    {
946                    condition = GET2(slotB, 0) == md->recursive->group_num;
947                    if (condition) break;
948                    }
949                  else break;
950                  }
951    
952                /* Scan up for duplicates */
953    
954                if (!condition)
955                  {
956                  slotB = slotA;
957                  for (i++; i < md->name_count; i++)
958                    {
959                    slotB += md->name_entry_size;
960                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
961                      {
962                      condition = GET2(slotB, 0) == md->recursive->group_num;
963                      if (condition) break;
964                      }
965                    else break;
966                    }
967                  }
968                }
969              }
970    
971            /* Chose branch according to the condition */
972    
973            ecode += condition? 3 : GET(ecode, 1);
974            }
975        }        }
976    
977      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
978        {        {
979        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
980        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
981    
982          /* If the numbered capture is unset, but the reference was by name,
983          scan the table to see if the name refers to any other numbers, and test
984          them. The condition is true if any one is set. This is tediously similar
985          to the code above, but not close enough to try to amalgamate. */
986    
987          if (!condition && condcode == OP_NCREF)
988            {
989            int refno = offset >> 1;
990            uschar *slotA = md->name_table;
991    
992            for (i = 0; i < md->name_count; i++)
993              {
994              if (GET2(slotA, 0) == refno) break;
995              slotA += md->name_entry_size;
996              }
997    
998            /* Found a name for the number - there can be only one; duplicate names
999            for different numbers are allowed, but not vice versa. First scan down
1000            for duplicates. */
1001    
1002            if (i < md->name_count)
1003              {
1004              uschar *slotB = slotA;
1005              while (slotB > md->name_table)
1006                {
1007                slotB -= md->name_entry_size;
1008                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1009                  {
1010                  offset = GET2(slotB, 0) << 1;
1011                  condition = offset < offset_top &&
1012                    md->offset_vector[offset] >= 0;
1013                  if (condition) break;
1014                  }
1015                else break;
1016                }
1017    
1018              /* Scan up for duplicates */
1019    
1020              if (!condition)
1021                {
1022                slotB = slotA;
1023                for (i++; i < md->name_count; i++)
1024                  {
1025                  slotB += md->name_entry_size;
1026                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1027                    {
1028                    offset = GET2(slotB, 0) << 1;
1029                    condition = offset < offset_top &&
1030                      md->offset_vector[offset] >= 0;
1031                    if (condition) break;
1032                    }
1033                  else break;
1034                  }
1035                }
1036              }
1037            }
1038    
1039          /* Chose branch according to the condition */
1040    
1041        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
1042        }        }
1043    
1044      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
1045        {        {
1046        condition = FALSE;        condition = FALSE;
1047        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 748  for (;;) Line 1061  for (;;)
1061          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1062          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1063          }          }
1064        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1065          {          {
1066          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1067          }          }
1068        else        else
1069          {          {
1070          condition = FALSE;          condition = FALSE;
1071          ecode += GET(ecode, 1);          ecode += codelink;
1072          }          }
1073        }        }
1074    
1075      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1076      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
1077      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
1078        group. If the second alternative doesn't exist, we can just plough on. */
1079    
1080      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1081        {        {
1082        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1083        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
1084        goto TAIL_RECURSE;          {
1085            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1086            RRETURN(rrc);
1087            }
1088          else                       /* Group must match something */
1089            {
1090            flags = 0;
1091            goto TAIL_RECURSE;
1092            }
1093        }        }
1094      else      else                         /* Condition false & no alternative */
1095        {        {
1096        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1097        }        }
1098      break;      break;
1099    
1100    
1101      /* End of the pattern. If we are in a top-level recursion, we should      /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1102      restore the offsets appropriately and continue from after the call. */      to close any currently open capturing brackets. */
1103    
1104        case OP_CLOSE:
1105        number = GET2(ecode, 1);
1106        offset = number << 1;
1107    
1108    #ifdef PCRE_DEBUG
1109          printf("end bracket %d at *ACCEPT", number);
1110          printf("\n");
1111    #endif
1112    
1113        md->capture_last = number;
1114        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1115          {
1116          md->offset_vector[offset] =
1117            md->offset_vector[md->offset_end - number];
1118          md->offset_vector[offset+1] = eptr - md->start_subject;
1119          if (offset_top <= offset) offset_top = offset + 2;
1120          }
1121        ecode += 3;
1122        break;
1123    
1124    
1125        /* End of the pattern, either real or forced. If we are in a top-level
1126        recursion, we should restore the offsets appropriately and continue from
1127        after the call. */
1128    
1129        case OP_ACCEPT:
1130      case OP_END:      case OP_END:
1131      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1132        {        {
# Line 787  for (;;) Line 1135  for (;;)
1135        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1136        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1137          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1138        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1139        ims = original_ims;        ims = original_ims;
1140        ecode = rec->after_call;        ecode = rec->after_call;
1141        break;        break;
1142        }        }
1143    
1144      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1145      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1146        the subject. In both cases, backtracking will then try other alternatives,
1147      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if any. */
1148      md->end_match_ptr = eptr;          /* Record where we ended */  
1149      md->end_offset_top = offset_top;   /* and how many extracts were taken */      if (eptr == mstart &&
1150      RRETURN(MATCH_MATCH);          (md->notempty ||
1151              (md->notempty_atstart &&
1152                mstart == md->start_subject + md->start_offset)))
1153          MRRETURN(MATCH_NOMATCH);
1154    
1155        /* Otherwise, we have a match. */
1156    
1157        md->end_match_ptr = eptr;           /* Record where we ended */
1158        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1159        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1160        MRRETURN(MATCH_MATCH);
1161    
1162      /* Change option settings */      /* Change option settings */
1163    
# Line 821  for (;;) Line 1179  for (;;)
1179        {        {
1180        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1181          RM4);          RM4);
1182        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH)
1183        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1184            mstart = md->start_match_ptr;   /* In case \K reset it */
1185            break;
1186            }
1187          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1188        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1189        }        }
1190      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1191      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1192    
1193      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1194    
# Line 840  for (;;) Line 1202  for (;;)
1202      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1203      continue;      continue;
1204    
1205      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1206        PRUNE, or COMMIT means we must assume failure without checking subsequent
1207        branches. */
1208    
1209      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1210      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 848  for (;;) Line 1212  for (;;)
1212        {        {
1213        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1214          RM5);          RM5);
1215        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) MRRETURN(MATCH_NOMATCH);
1216        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1217            {
1218            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1219            break;
1220            }
1221          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1222        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1223        }        }
1224      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 872  for (;;) Line 1241  for (;;)
1241        while (i-- > 0)        while (i-- > 0)
1242          {          {
1243          eptr--;          eptr--;
1244          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1245          BACKCHAR(eptr)          BACKCHAR(eptr);
1246          }          }
1247        }        }
1248      else      else
# Line 883  for (;;) Line 1252  for (;;)
1252    
1253        {        {
1254        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1255        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1256        }        }
1257    
1258      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1259    
1260        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1261      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1262      break;      break;
1263    
# Line 904  for (;;) Line 1274  for (;;)
1274        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1275        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1276        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
1277        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
1278        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
1279        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1280        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1281        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1282        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1283        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1284        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1285        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1286        }        }
1287      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 966  for (;;) Line 1336  for (;;)
1336    
1337        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1338              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1339        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1340    
1341        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1342        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 984  for (;;) Line 1353  for (;;)
1353            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1354            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1355              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1356            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1357            }            }
1358          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1359            {            {
1360            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1361              if (new_recursive.offset_save != stacksave)
1362                (pcre_free)(new_recursive.offset_save);
1363            RRETURN(rrc);            RRETURN(rrc);
1364            }            }
1365    
# Line 1003  for (;;) Line 1374  for (;;)
1374        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1375        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1376          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1377        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1378        }        }
1379      /* Control never reaches here */      /* Control never reaches here */
1380    
# Line 1012  for (;;) Line 1383  for (;;)
1383      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1384      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1385      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1386      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1387        the start-of-match value in case it was changed by \K. */
1388    
1389      case OP_ONCE:      case OP_ONCE:
1390      prev = ecode;      prev = ecode;
# Line 1020  for (;;) Line 1392  for (;;)
1392    
1393      do      do
1394        {        {
1395        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1396          eptrb, 0, RM7);        if (rrc == MATCH_MATCH)
1397        if (rrc == MATCH_MATCH) break;          {
1398        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          mstart = md->start_match_ptr;
1399            break;
1400            }
1401          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1402        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1403        }        }
1404      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1066  for (;;) Line 1441  for (;;)
1441    
1442      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1443        {        {
1444        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
         RM8);  
1445        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1446        ecode = prev;        ecode = prev;
1447        flags = match_tail_recursed;        flags = 0;
1448        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1449        }        }
1450      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
# Line 1078  for (;;) Line 1452  for (;;)
1452        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1453        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1454        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1455        flags = match_tail_recursed;        flags = 0;
1456        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1457        }        }
1458      /* Control never gets here */      /* Control never gets here */
# Line 1090  for (;;) Line 1464  for (;;)
1464      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1465      break;      break;
1466    
1467      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1468      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1469      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1470      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1471      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1472    
1473      case OP_BRAZERO:      case OP_BRAZERO:
1474        {        {
# Line 1116  for (;;) Line 1490  for (;;)
1490        }        }
1491      break;      break;
1492    
1493        case OP_SKIPZERO:
1494          {
1495          next = ecode+1;
1496          do next += GET(next,1); while (*next == OP_ALT);
1497          ecode = next + 1 + LINK_SIZE;
1498          }
1499        break;
1500    
1501      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1502    
1503      case OP_KET:      case OP_KET:
# Line 1134  for (;;) Line 1516  for (;;)
1516        }        }
1517      else saved_eptr = NULL;      else saved_eptr = NULL;
1518    
1519      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or an atomic group, stop
1520      MATCH_MATCH, but record the current high water mark for use by positive      matching and return MATCH_MATCH, but record the current high water mark for
1521      assertions. Do this also for the "once" (atomic) groups. */      use by positive assertions. We also need to record the match start in case
1522        it was changed by \K. */
1523    
1524      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1525          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1144  for (;;) Line 1527  for (;;)
1527        {        {
1528        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1529        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1530        RRETURN(MATCH_MATCH);        md->start_match_ptr = mstart;
1531          MRRETURN(MATCH_MATCH);
1532        }        }
1533    
1534      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
# Line 1158  for (;;) Line 1542  for (;;)
1542        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1543        offset = number << 1;        offset = number << 1;
1544    
1545  #ifdef DEBUG  #ifdef PCRE_DEBUG
1546        printf("end bracket %d", number);        printf("end bracket %d", number);
1547        printf("\n");        printf("\n");
1548  #endif  #endif
# Line 1180  for (;;) Line 1564  for (;;)
1564          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1565          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1566          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
         md->start_match = rec->save_start;  
1567          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1568            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1569            offset_top = rec->save_offset_top;
1570          ecode = rec->after_call;          ecode = rec->after_call;
1571          ims = original_ims;          ims = original_ims;
1572          break;          break;
# Line 1209  for (;;) Line 1593  for (;;)
1593    
1594      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1595      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1596      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1597        unlimited repeat of a group that can match an empty string. */
1598    
1599      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1600    
1601      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1602        {        {
1603        RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
         RM12);  
1604        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1605          if (flags != 0)    /* Could match an empty string */
1606            {
1607            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1608            RRETURN(rrc);
1609            }
1610        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1611        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1612        }        }
1613      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
# Line 1227  for (;;) Line 1615  for (;;)
1615        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1616        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1617        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1618        flags = match_tail_recursed;        flags = 0;
1619        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1620        }        }
1621      /* Control never gets here */      /* Control never gets here */
# Line 1235  for (;;) Line 1623  for (;;)
1623      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1624    
1625      case OP_CIRC:      case OP_CIRC:
1626      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1627      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1628        {        {
1629        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1630            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1631          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1632        ecode++;        ecode++;
1633        break;        break;
1634        }        }
# Line 1249  for (;;) Line 1637  for (;;)
1637      /* Start of subject assertion */      /* Start of subject assertion */
1638    
1639      case OP_SOD:      case OP_SOD:
1640      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1641      ecode++;      ecode++;
1642      break;      break;
1643    
1644      /* Start of match assertion */      /* Start of match assertion */
1645    
1646      case OP_SOM:      case OP_SOM:
1647      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1648        ecode++;
1649        break;
1650    
1651        /* Reset the start of match point */
1652    
1653        case OP_SET_SOM:
1654        mstart = eptr;
1655      ecode++;      ecode++;
1656      break;      break;
1657    
# Line 1267  for (;;) Line 1662  for (;;)
1662      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1663        {        {
1664        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1665          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1666        else        else
1667          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1668        ecode++;        ecode++;
1669        break;        break;
1670        }        }
1671      else      else
1672        {        {
1673        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1674        if (!md->endonly)        if (!md->endonly)
1675          {          {
1676          if (eptr != md->end_subject &&          if (eptr != md->end_subject &&
1677              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1678            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
1679          ecode++;          ecode++;
1680          break;          break;
1681          }          }
# Line 1290  for (;;) Line 1685  for (;;)
1685      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1686    
1687      case OP_EOD:      case OP_EOD:
1688      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1689      ecode++;      ecode++;
1690      break;      break;
1691    
# Line 1299  for (;;) Line 1694  for (;;)
1694      case OP_EODN:      case OP_EODN:
1695      if (eptr != md->end_subject &&      if (eptr != md->end_subject &&
1696          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1697        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1698      ecode++;      ecode++;
1699      break;      break;
1700    
# Line 1311  for (;;) Line 1706  for (;;)
1706    
1707        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1708        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1709        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1710          partial matching. */
1711    
1712  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1713        if (utf8)        if (utf8)
1714          {          {
1715          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1716            {            {
1717            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1718            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1719              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1720            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1721            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1722            }            }
1723          if (eptr >= md->end_subject) cur_is_word = FALSE; else          if (eptr >= md->end_subject)
1724              {
1725              SCHECK_PARTIAL();
1726              cur_is_word = FALSE;
1727              }
1728            else
1729            {            {
1730            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1731            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
# Line 1332  for (;;) Line 1734  for (;;)
1734        else        else
1735  #endif  #endif
1736    
1737        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode */
1738    
1739          {          {
1740          prev_is_word = (eptr != md->start_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1741            ((md->ctypes[eptr[-1]] & ctype_word) != 0);            {
1742          cur_is_word = (eptr < md->end_subject) &&            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1743            ((md->ctypes[*eptr] & ctype_word) != 0);            prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1744              }
1745            if (eptr >= md->end_subject)
1746              {
1747              SCHECK_PARTIAL();
1748              cur_is_word = FALSE;
1749              }
1750            else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1751          }          }
1752    
1753        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1754    
1755        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1756             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1757          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1758        }        }
1759      break;      break;
1760    
1761      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1762    
1763      case OP_ANY:      case OP_ANY:
1764      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1765        /* Fall through */
1766    
1767        case OP_ALLANY:
1768        if (eptr++ >= md->end_subject)
1769        {        {
1770        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);        SCHECK_PARTIAL();
1771          MRRETURN(MATCH_NOMATCH);
1772        }        }
1773      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1774      ecode++;      ecode++;
1775      break;      break;
1776    
# Line 1366  for (;;) Line 1778  for (;;)
1778      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1779    
1780      case OP_ANYBYTE:      case OP_ANYBYTE:
1781      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1782          {
1783          SCHECK_PARTIAL();
1784          MRRETURN(MATCH_NOMATCH);
1785          }
1786      ecode++;      ecode++;
1787      break;      break;
1788    
1789      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1790      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1791          {
1792          SCHECK_PARTIAL();
1793          MRRETURN(MATCH_NOMATCH);
1794          }
1795      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1796      if (      if (
1797  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1379  for (;;) Line 1799  for (;;)
1799  #endif  #endif
1800         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1801         )         )
1802        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1803      ecode++;      ecode++;
1804      break;      break;
1805    
1806      case OP_DIGIT:      case OP_DIGIT:
1807      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1808          {
1809          SCHECK_PARTIAL();
1810          MRRETURN(MATCH_NOMATCH);
1811          }
1812      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1813      if (      if (
1814  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1392  for (;;) Line 1816  for (;;)
1816  #endif  #endif
1817         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1818         )         )
1819        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1820      ecode++;      ecode++;
1821      break;      break;
1822    
1823      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1824      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1825          {
1826          SCHECK_PARTIAL();
1827          MRRETURN(MATCH_NOMATCH);
1828          }
1829      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1830      if (      if (
1831  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1405  for (;;) Line 1833  for (;;)
1833  #endif  #endif
1834         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1835         )         )
1836        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1837      ecode++;      ecode++;
1838      break;      break;
1839    
1840      case OP_WHITESPACE:      case OP_WHITESPACE:
1841      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1842          {
1843          SCHECK_PARTIAL();
1844          MRRETURN(MATCH_NOMATCH);
1845          }
1846      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1847      if (      if (
1848  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1418  for (;;) Line 1850  for (;;)
1850  #endif  #endif
1851         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1852         )         )
1853        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1854      ecode++;      ecode++;
1855      break;      break;
1856    
1857      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1858      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1859          {
1860          SCHECK_PARTIAL();
1861          MRRETURN(MATCH_NOMATCH);
1862          }
1863      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1864      if (      if (
1865  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1431  for (;;) Line 1867  for (;;)
1867  #endif  #endif
1868         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1869         )         )
1870        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1871      ecode++;      ecode++;
1872      break;      break;
1873    
1874      case OP_WORDCHAR:      case OP_WORDCHAR:
1875      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1876          {
1877          SCHECK_PARTIAL();
1878          MRRETURN(MATCH_NOMATCH);
1879          }
1880      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1881      if (      if (
1882  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1444  for (;;) Line 1884  for (;;)
1884  #endif  #endif
1885         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1886         )         )
1887        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1888      ecode++;      ecode++;
1889      break;      break;
1890    
1891      case OP_ANYNL:      case OP_ANYNL:
1892      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1893          {
1894          SCHECK_PARTIAL();
1895          MRRETURN(MATCH_NOMATCH);
1896          }
1897      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1898      switch(c)      switch(c)
1899        {        {
1900        default: RRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
1901        case 0x000d:        case 0x000d:
1902        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1903        break;        break;
1904    
1905        case 0x000a:        case 0x000a:
1906          break;
1907    
1908        case 0x000b:        case 0x000b:
1909        case 0x000c:        case 0x000c:
1910        case 0x0085:        case 0x0085:
1911        case 0x2028:        case 0x2028:
1912        case 0x2029:        case 0x2029:
1913          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1914          break;
1915          }
1916        ecode++;
1917        break;
1918    
1919        case OP_NOT_HSPACE:
1920        if (eptr >= md->end_subject)
1921          {
1922          SCHECK_PARTIAL();
1923          MRRETURN(MATCH_NOMATCH);
1924          }
1925        GETCHARINCTEST(c, eptr);
1926        switch(c)
1927          {
1928          default: break;
1929          case 0x09:      /* HT */
1930          case 0x20:      /* SPACE */
1931          case 0xa0:      /* NBSP */
1932          case 0x1680:    /* OGHAM SPACE MARK */
1933          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1934          case 0x2000:    /* EN QUAD */
1935          case 0x2001:    /* EM QUAD */
1936          case 0x2002:    /* EN SPACE */
1937          case 0x2003:    /* EM SPACE */
1938          case 0x2004:    /* THREE-PER-EM SPACE */
1939          case 0x2005:    /* FOUR-PER-EM SPACE */
1940          case 0x2006:    /* SIX-PER-EM SPACE */
1941          case 0x2007:    /* FIGURE SPACE */
1942          case 0x2008:    /* PUNCTUATION SPACE */
1943          case 0x2009:    /* THIN SPACE */
1944          case 0x200A:    /* HAIR SPACE */
1945          case 0x202f:    /* NARROW NO-BREAK SPACE */
1946          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1947          case 0x3000:    /* IDEOGRAPHIC SPACE */
1948          MRRETURN(MATCH_NOMATCH);
1949          }
1950        ecode++;
1951        break;
1952    
1953        case OP_HSPACE:
1954        if (eptr >= md->end_subject)
1955          {
1956          SCHECK_PARTIAL();
1957          MRRETURN(MATCH_NOMATCH);
1958          }
1959        GETCHARINCTEST(c, eptr);
1960        switch(c)
1961          {
1962          default: MRRETURN(MATCH_NOMATCH);
1963          case 0x09:      /* HT */
1964          case 0x20:      /* SPACE */
1965          case 0xa0:      /* NBSP */
1966          case 0x1680:    /* OGHAM SPACE MARK */
1967          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1968          case 0x2000:    /* EN QUAD */
1969          case 0x2001:    /* EM QUAD */
1970          case 0x2002:    /* EN SPACE */
1971          case 0x2003:    /* EM SPACE */
1972          case 0x2004:    /* THREE-PER-EM SPACE */
1973          case 0x2005:    /* FOUR-PER-EM SPACE */
1974          case 0x2006:    /* SIX-PER-EM SPACE */
1975          case 0x2007:    /* FIGURE SPACE */
1976          case 0x2008:    /* PUNCTUATION SPACE */
1977          case 0x2009:    /* THIN SPACE */
1978          case 0x200A:    /* HAIR SPACE */
1979          case 0x202f:    /* NARROW NO-BREAK SPACE */
1980          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1981          case 0x3000:    /* IDEOGRAPHIC SPACE */
1982          break;
1983          }
1984        ecode++;
1985        break;
1986    
1987        case OP_NOT_VSPACE:
1988        if (eptr >= md->end_subject)
1989          {
1990          SCHECK_PARTIAL();
1991          MRRETURN(MATCH_NOMATCH);
1992          }
1993        GETCHARINCTEST(c, eptr);
1994        switch(c)
1995          {
1996          default: break;
1997          case 0x0a:      /* LF */
1998          case 0x0b:      /* VT */
1999          case 0x0c:      /* FF */
2000          case 0x0d:      /* CR */
2001          case 0x85:      /* NEL */
2002          case 0x2028:    /* LINE SEPARATOR */
2003          case 0x2029:    /* PARAGRAPH SEPARATOR */
2004          MRRETURN(MATCH_NOMATCH);
2005          }
2006        ecode++;
2007        break;
2008    
2009        case OP_VSPACE:
2010        if (eptr >= md->end_subject)
2011          {
2012          SCHECK_PARTIAL();
2013          MRRETURN(MATCH_NOMATCH);
2014          }
2015        GETCHARINCTEST(c, eptr);
2016        switch(c)
2017          {
2018          default: MRRETURN(MATCH_NOMATCH);
2019          case 0x0a:      /* LF */
2020          case 0x0b:      /* VT */
2021          case 0x0c:      /* FF */
2022          case 0x0d:      /* CR */
2023          case 0x85:      /* NEL */
2024          case 0x2028:    /* LINE SEPARATOR */
2025          case 0x2029:    /* PARAGRAPH SEPARATOR */
2026        break;        break;
2027        }        }
2028      ecode++;      ecode++;
# Line 1474  for (;;) Line 2034  for (;;)
2034    
2035      case OP_PROP:      case OP_PROP:
2036      case OP_NOTPROP:      case OP_NOTPROP:
2037      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2038          {
2039          SCHECK_PARTIAL();
2040          MRRETURN(MATCH_NOMATCH);
2041          }
2042      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2043        {        {
2044        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2045    
2046        switch(ecode[1])        switch(ecode[1])
2047          {          {
2048          case PT_ANY:          case PT_ANY:
2049          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2050          break;          break;
2051    
2052          case PT_LAMP:          case PT_LAMP:
2053          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2054               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2055               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2056            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2057           break;           break;
2058    
2059          case PT_GC:          case PT_GC:
2060          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2061            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2062          break;          break;
2063    
2064          case PT_PC:          case PT_PC:
2065          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2066            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2067          break;          break;
2068    
2069          case PT_SC:          case PT_SC:
2070          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2071            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2072          break;          break;
2073    
2074          default:          default:
# Line 1520  for (;;) Line 2083  for (;;)
2083      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2084    
2085      case OP_EXTUNI:      case OP_EXTUNI:
2086      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2087          {
2088          SCHECK_PARTIAL();
2089          MRRETURN(MATCH_NOMATCH);
2090          }
2091      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2092        {        {
2093        int chartype, script;        int category = UCD_CATEGORY(c);
2094        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2095        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2096          {          {
2097          int len = 1;          int len = 1;
# Line 1533  for (;;) Line 2099  for (;;)
2099            {            {
2100            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2101            }            }
2102          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2103          if (category != ucp_M) break;          if (category != ucp_M) break;
2104          eptr += len;          eptr += len;
2105          }          }
# Line 1554  for (;;) Line 2120  for (;;)
2120      case OP_REF:      case OP_REF:
2121        {        {
2122        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2123        ecode += 3;                                 /* Advance past item */        ecode += 3;
2124    
2125          /* If the reference is unset, there are two possibilities:
2126    
2127          (a) In the default, Perl-compatible state, set the length to be longer
2128          than the amount of subject left; this ensures that every attempt at a
2129          match fails. We can't just fail here, because of the possibility of
2130          quantifiers with zero minima.
2131    
2132          (b) If the JavaScript compatibility flag is set, set the length to zero
2133          so that the back reference matches an empty string.
2134    
2135          Otherwise, set the length to the length of what was matched by the
2136          referenced subpattern. */
2137    
2138        /* If the reference is unset, set the length to be longer than the amount        if (offset >= offset_top || md->offset_vector[offset] < 0)
2139        of subject left; this ensures that every attempt at a match fails. We          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
2140        can't just fail here, because of the possibility of quantifiers with zero        else
2141        minima. */          length = md->offset_vector[offset+1] - md->offset_vector[offset];
   
       length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
         md->end_subject - eptr + 1 :  
         md->offset_vector[offset+1] - md->offset_vector[offset];  
2142    
2143        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2144    
# Line 1592  for (;;) Line 2167  for (;;)
2167          break;          break;
2168    
2169          default:               /* No repeat follows */          default:               /* No repeat follows */
2170          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2171              {
2172              CHECK_PARTIAL();
2173              MRRETURN(MATCH_NOMATCH);
2174              }
2175          eptr += length;          eptr += length;
2176          continue;              /* With the main loop */          continue;              /* With the main loop */
2177          }          }
# Line 1608  for (;;) Line 2187  for (;;)
2187    
2188        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2189          {          {
2190          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2191              {
2192              CHECK_PARTIAL();
2193              MRRETURN(MATCH_NOMATCH);
2194              }
2195          eptr += length;          eptr += length;
2196          }          }
2197    
# Line 1625  for (;;) Line 2208  for (;;)
2208            {            {
2209            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2210            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2211            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2212              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2213                {
2214                CHECK_PARTIAL();
2215                MRRETURN(MATCH_NOMATCH);
2216                }
2217            eptr += length;            eptr += length;
2218            }            }
2219          /* Control never gets here */          /* Control never gets here */
# Line 1639  for (;;) Line 2226  for (;;)
2226          pp = eptr;          pp = eptr;
2227          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2228            {            {
2229            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2230            eptr += length;              {
2231                CHECK_PARTIAL();
2232                break;
2233                }
2234              eptr += length;
2235            }            }
2236          while (eptr >= pp)          while (eptr >= pp)
2237            {            {
# Line 1648  for (;;) Line 2239  for (;;)
2239            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2240            eptr -= length;            eptr -= length;
2241            }            }
2242          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2243          }          }
2244        }        }
2245      /* Control never gets here */      /* Control never gets here */
2246    
   
   
2247      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2248      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2249      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1709  for (;;) Line 2298  for (;;)
2298          {          {
2299          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2300            {            {
2301            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2302                {
2303                SCHECK_PARTIAL();
2304                MRRETURN(MATCH_NOMATCH);
2305                }
2306            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2307            if (c > 255)            if (c > 255)
2308              {              {
2309              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2310              }              }
2311            else            else
2312              {              {
2313              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2314              }              }
2315            }            }
2316          }          }
# Line 1727  for (;;) Line 2320  for (;;)
2320          {          {
2321          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2322            {            {
2323            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2324                {
2325                SCHECK_PARTIAL();
2326                MRRETURN(MATCH_NOMATCH);
2327                }
2328            c = *eptr++;            c = *eptr++;
2329            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2330            }            }
2331          }          }
2332    
# Line 1751  for (;;) Line 2348  for (;;)
2348              {              {
2349              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2350              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2351              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2352                if (eptr >= md->end_subject)
2353                  {
2354                  SCHECK_PARTIAL();
2355                  MRRETURN(MATCH_NOMATCH);
2356                  }
2357              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2358              if (c > 255)              if (c > 255)
2359                {                {
2360                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2361                }                }
2362              else              else
2363                {                {
2364                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2365                }                }
2366              }              }
2367            }            }
# Line 1771  for (;;) Line 2373  for (;;)
2373              {              {
2374              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2375              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2376              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2377                if (eptr >= md->end_subject)
2378                  {
2379                  SCHECK_PARTIAL();
2380                  MRRETURN(MATCH_NOMATCH);
2381                  }
2382              c = *eptr++;              c = *eptr++;
2383              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2384              }              }
2385            }            }
2386          /* Control never gets here */          /* Control never gets here */
# Line 1792  for (;;) Line 2399  for (;;)
2399            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2400              {              {
2401              int len = 1;              int len = 1;
2402              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2403                  {
2404                  SCHECK_PARTIAL();
2405                  break;
2406                  }
2407              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2408              if (c > 255)              if (c > 255)
2409                {                {
# Line 1818  for (;;) Line 2429  for (;;)
2429            {            {
2430            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2431              {              {
2432              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2433                  {
2434                  SCHECK_PARTIAL();
2435                  break;
2436                  }
2437              c = *eptr;              c = *eptr;
2438              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2439              eptr++;              eptr++;
# Line 1831  for (;;) Line 2446  for (;;)
2446              }              }
2447            }            }
2448    
2449          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2450          }          }
2451        }        }
2452      /* Control never gets here */      /* Control never gets here */
2453    
2454    
2455      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2456      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2457        mode, because Unicode properties are supported in non-UTF-8 mode. */
2458    
2459  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2460      case OP_XCLASS:      case OP_XCLASS:
# Line 1879  for (;;) Line 2495  for (;;)
2495    
2496        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2497          {          {
2498          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2499          GETCHARINC(c, eptr);            {
2500          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2501              MRRETURN(MATCH_NOMATCH);
2502              }
2503            GETCHARINCTEST(c, eptr);
2504            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2505          }          }
2506    
2507        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1898  for (;;) Line 2518  for (;;)
2518            {            {
2519            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2520            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2521            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2522            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2523            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2524                SCHECK_PARTIAL();
2525                MRRETURN(MATCH_NOMATCH);
2526                }
2527              GETCHARINCTEST(c, eptr);
2528              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2529            }            }
2530          /* Control never gets here */          /* Control never gets here */
2531          }          }
# Line 1913  for (;;) Line 2538  for (;;)
2538          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2539            {            {
2540            int len = 1;            int len = 1;
2541            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2542            GETCHARLEN(c, eptr, len);              {
2543                SCHECK_PARTIAL();
2544                break;
2545                }
2546              GETCHARLENTEST(c, eptr, len);
2547            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2548            eptr += len;            eptr += len;
2549            }            }
# Line 1923  for (;;) Line 2552  for (;;)
2552            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2553            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2554            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2555            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2556            }            }
2557          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2558          }          }
2559    
2560        /* Control never gets here */        /* Control never gets here */
# Line 1941  for (;;) Line 2570  for (;;)
2570        length = 1;        length = 1;
2571        ecode++;        ecode++;
2572        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2573        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2574        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2575            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2576            MRRETURN(MATCH_NOMATCH);
2577            }
2578          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2579        }        }
2580      else      else
2581  #endif  #endif
2582    
2583      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2584        {        {
2585        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2586        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2587            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2588            MRRETURN(MATCH_NOMATCH);
2589            }
2590          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2591        ecode += 2;        ecode += 2;
2592        }        }
2593      break;      break;
# Line 1965  for (;;) Line 2602  for (;;)
2602        ecode++;        ecode++;
2603        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2604    
2605        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2606            {
2607            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2608            MRRETURN(MATCH_NOMATCH);
2609            }
2610    
2611        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2612        can use the fast lookup table. */        can use the fast lookup table. */
2613    
2614        if (fc < 128)        if (fc < 128)
2615          {          {
2616          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2617          }          }
2618    
2619        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
# Line 1989  for (;;) Line 2630  for (;;)
2630          if (fc != dc)          if (fc != dc)
2631            {            {
2632  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2633            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2634  #endif  #endif
2635              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2636            }            }
2637          }          }
2638        }        }
# Line 2000  for (;;) Line 2641  for (;;)
2641    
2642      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2643        {        {
2644        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2645        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2646            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2647            MRRETURN(MATCH_NOMATCH);
2648            }
2649          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2650        ecode += 2;        ecode += 2;
2651        }        }
2652      break;      break;
# Line 2054  for (;;) Line 2699  for (;;)
2699      case OP_MINQUERY:      case OP_MINQUERY:
2700      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2701      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2702    
2703      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2704      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2705      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2706    
2707      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2708    
2709      REPEATCHAR:      REPEATCHAR:
2710  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2069  for (;;) Line 2713  for (;;)
2713        length = 1;        length = 1;
2714        charptr = ecode;        charptr = ecode;
2715        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2716        ecode += length;        ecode += length;
2717    
2718        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2080  for (;;) Line 2723  for (;;)
2723  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2724          unsigned int othercase;          unsigned int othercase;
2725          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2726              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2727            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2728          else oclength = 0;          else oclength = 0;
2729  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2730    
2731          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2732            {            {
2733            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2734                memcmp(eptr, charptr, length) == 0) eptr += length;
2735  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2736            /* Need braces because of following else */            else if (oclength > 0 &&
2737            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                     eptr <= md->end_subject - oclength &&
2738                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2739    #endif  /* SUPPORT_UCP */
2740            else            else
2741              {              {
2742              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2743              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2744              }              }
 #else   /* without SUPPORT_UCP */  
           else { RRETURN(MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2745            }            }
2746    
2747          if (min == max) continue;          if (min == max) continue;
# Line 2109  for (;;) Line 2752  for (;;)
2752              {              {
2753              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2754              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2755              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2756              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2757                  memcmp(eptr, charptr, length) == 0) eptr += length;
2758  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2759              /* Need braces because of following else */              else if (oclength > 0 &&
2760              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                       eptr <= md->end_subject - oclength &&
2761                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2762    #endif  /* SUPPORT_UCP */
2763              else              else
2764                {                {
2765                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2766                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2767                }                }
 #else   /* without SUPPORT_UCP */  
             else { RRETURN (MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2768              }              }
2769            /* Control never gets here */            /* Control never gets here */
2770            }            }
# Line 2131  for (;;) Line 2774  for (;;)
2774            pp = eptr;            pp = eptr;
2775            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2776              {              {
2777              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2778              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2779  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2780              else if (oclength == 0) break;              else if (oclength > 0 &&
2781                         eptr <= md->end_subject - oclength &&
2782                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2783    #endif  /* SUPPORT_UCP */
2784              else              else
2785                {                {
2786                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2787                eptr += oclength;                break;
2788                }                }
 #else   /* without SUPPORT_UCP */  
             else break;  
 #endif  /* SUPPORT_UCP */  
2789              }              }
2790    
2791            if (possessive) continue;            if (possessive) continue;
2792    
2793            for(;;)            for(;;)
2794             {              {
2795             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2796             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2797             if (eptr == pp) RRETURN(MATCH_NOMATCH);              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2798  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2799             eptr--;              eptr--;
2800             BACKCHAR(eptr);              BACKCHAR(eptr);
2801  #else   /* without SUPPORT_UCP */  #else   /* without SUPPORT_UCP */
2802             eptr -= length;              eptr -= length;
2803  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2804             }              }
2805            }            }
2806          /* Control never gets here */          /* Control never gets here */
2807          }          }
# Line 2170  for (;;) Line 2814  for (;;)
2814  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2815    
2816      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2817        {  
2818        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2819    
2820      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2821      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2191  for (;;) Line 2833  for (;;)
2833        {        {
2834        fc = md->lcc[fc];        fc = md->lcc[fc];
2835        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2836          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2837            if (eptr >= md->end_subject)
2838              {
2839              SCHECK_PARTIAL();
2840              MRRETURN(MATCH_NOMATCH);
2841              }
2842            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2843            }
2844        if (min == max) continue;        if (min == max) continue;
2845        if (minimize)        if (minimize)
2846          {          {
# Line 2199  for (;;) Line 2848  for (;;)
2848            {            {
2849            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2850            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2851            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2852                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2853              RRETURN(MATCH_NOMATCH);              {
2854                SCHECK_PARTIAL();
2855                MRRETURN(MATCH_NOMATCH);
2856                }
2857              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2858            }            }
2859          /* Control never gets here */          /* Control never gets here */
2860          }          }
# Line 2210  for (;;) Line 2863  for (;;)
2863          pp = eptr;          pp = eptr;
2864          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2865            {            {
2866            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2867                {
2868                SCHECK_PARTIAL();
2869                break;
2870                }
2871              if (fc != md->lcc[*eptr]) break;
2872            eptr++;            eptr++;
2873            }            }
2874    
2875          if (possessive) continue;          if (possessive) continue;
2876    
2877          while (eptr >= pp)          while (eptr >= pp)
2878            {            {
2879            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2880            eptr--;            eptr--;
2881            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2882            }            }
2883          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2884          }          }
2885        /* Control never gets here */        /* Control never gets here */
2886        }        }
# Line 2229  for (;;) Line 2889  for (;;)
2889    
2890      else      else
2891        {        {
2892        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2893            {
2894            if (eptr >= md->end_subject)
2895              {
2896              SCHECK_PARTIAL();
2897              MRRETURN(MATCH_NOMATCH);
2898              }
2899            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2900            }
2901    
2902        if (min == max) continue;        if (min == max) continue;
2903    
2904        if (minimize)        if (minimize)
2905          {          {
2906          for (fi = min;; fi++)          for (fi = min;; fi++)
2907            {            {
2908            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2909            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2910            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2911              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2912                {
2913                SCHECK_PARTIAL();
2914                MRRETURN(MATCH_NOMATCH);
2915                }
2916              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
2917            }            }
2918          /* Control never gets here */          /* Control never gets here */
2919          }          }
# Line 2247  for (;;) Line 2922  for (;;)
2922          pp = eptr;          pp = eptr;
2923          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2924            {            {
2925            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
2926                {
2927                SCHECK_PARTIAL();
2928                break;
2929                }
2930              if (fc != *eptr) break;
2931            eptr++;            eptr++;
2932            }            }
2933          if (possessive) continue;          if (possessive) continue;
2934    
2935          while (eptr >= pp)          while (eptr >= pp)
2936            {            {
2937            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2938            eptr--;            eptr--;
2939            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2940            }            }
2941          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2942          }          }
2943        }        }
2944      /* Control never gets here */      /* Control never gets here */
# Line 2266  for (;;) Line 2947  for (;;)
2947      checking can be multibyte. */      checking can be multibyte. */
2948    
2949      case OP_NOT:      case OP_NOT:
2950      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2951          {
2952          SCHECK_PARTIAL();
2953          MRRETURN(MATCH_NOMATCH);
2954          }
2955      ecode++;      ecode++;
2956      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2957      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2275  for (;;) Line 2960  for (;;)
2960        if (c < 256)        if (c < 256)
2961  #endif  #endif
2962        c = md->lcc[c];        c = md->lcc[c];
2963        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
2964        }        }
2965      else      else
2966        {        {
2967        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
2968        }        }
2969      break;      break;
2970    
# Line 2343  for (;;) Line 3028  for (;;)
3028      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3029      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3030    
3031      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3032    
3033      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3034      fc = *ecode++;      fc = *ecode++;
3035    
3036      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2373  for (;;) Line 3055  for (;;)
3055          register unsigned int d;          register unsigned int d;
3056          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3057            {            {
3058              if (eptr >= md->end_subject)
3059                {
3060                SCHECK_PARTIAL();
3061                MRRETURN(MATCH_NOMATCH);
3062                }
3063            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3064            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3065            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3066            }            }
3067          }          }
3068        else        else
# Line 2384  for (;;) Line 3071  for (;;)
3071        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3072          {          {
3073          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3074            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3075              if (eptr >= md->end_subject)
3076                {
3077                SCHECK_PARTIAL();
3078                MRRETURN(MATCH_NOMATCH);
3079                }
3080              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3081              }
3082          }          }
3083    
3084        if (min == max) continue;        if (min == max) continue;
# Line 2400  for (;;) Line 3094  for (;;)
3094              {              {
3095              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3096              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3097                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3098                if (eptr >= md->end_subject)
3099                  {
3100                  SCHECK_PARTIAL();
3101                  MRRETURN(MATCH_NOMATCH);
3102                  }
3103              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3104              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3105              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3106              }              }
3107            }            }
3108          else          else
# Line 2414  for (;;) Line 3113  for (;;)
3113              {              {
3114              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3115              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3116              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3117                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3118                  {
3119                  SCHECK_PARTIAL();
3120                  MRRETURN(MATCH_NOMATCH);
3121                  }
3122                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3123              }              }
3124            }            }
3125          /* Control never gets here */          /* Control never gets here */
# Line 2435  for (;;) Line 3139  for (;;)
3139            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3140              {              {
3141              int len = 1;              int len = 1;
3142              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3143                  {
3144                  SCHECK_PARTIAL();
3145                  break;
3146                  }
3147              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3148              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3149              if (fc == d) break;              if (fc == d) break;
# Line 2456  for (;;) Line 3164  for (;;)
3164            {            {
3165            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3166              {              {
3167              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3168                  {
3169                  SCHECK_PARTIAL();
3170                  break;
3171                  }
3172                if (fc == md->lcc[*eptr]) break;
3173              eptr++;              eptr++;
3174              }              }
3175            if (possessive) continue;            if (possessive) continue;
# Line 2468  for (;;) Line 3181  for (;;)
3181              }              }
3182            }            }
3183    
3184          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3185          }          }
3186        /* Control never gets here */        /* Control never gets here */
3187        }        }
# Line 2484  for (;;) Line 3197  for (;;)
3197          register unsigned int d;          register unsigned int d;
3198          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3199            {            {
3200              if (eptr >= md->end_subject)
3201                {
3202                SCHECK_PARTIAL();
3203                MRRETURN(MATCH_NOMATCH);
3204                }
3205            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3206            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3207            }            }
3208          }          }
3209        else        else
# Line 2493  for (;;) Line 3211  for (;;)
3211        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3212          {          {
3213          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3214            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3215              if (eptr >= md->end_subject)
3216                {
3217                SCHECK_PARTIAL();
3218                MRRETURN(MATCH_NOMATCH);
3219                }
3220              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3221              }
3222          }          }
3223    
3224        if (min == max) continue;        if (min == max) continue;
# Line 2509  for (;;) Line 3234  for (;;)
3234              {              {
3235              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3236              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3237                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3238                if (eptr >= md->end_subject)
3239                  {
3240                  SCHECK_PARTIAL();
3241                  MRRETURN(MATCH_NOMATCH);
3242                  }
3243              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3244              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3245              }              }
3246            }            }
3247          else          else
# Line 2522  for (;;) Line 3252  for (;;)
3252              {              {
3253              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3254              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3255              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3256                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3257                  {
3258                  SCHECK_PARTIAL();
3259                  MRRETURN(MATCH_NOMATCH);
3260                  }
3261                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3262              }              }
3263            }            }
3264          /* Control never gets here */          /* Control never gets here */
# Line 2543  for (;;) Line 3278  for (;;)
3278            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3279              {              {
3280              int len = 1;              int len = 1;
3281              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3282                  {
3283                  SCHECK_PARTIAL();
3284                  break;
3285                  }
3286              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3287              if (fc == d) break;              if (fc == d) break;
3288              eptr += len;              eptr += len;
# Line 2563  for (;;) Line 3302  for (;;)
3302            {            {
3303            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3304              {              {
3305              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3306                  {
3307                  SCHECK_PARTIAL();
3308                  break;
3309                  }
3310                if (fc == *eptr) break;
3311              eptr++;              eptr++;
3312              }              }
3313            if (possessive) continue;            if (possessive) continue;
# Line 2575  for (;;) Line 3319  for (;;)
3319              }              }
3320            }            }
3321    
3322          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3323          }          }
3324        }        }
3325      /* Control never gets here */      /* Control never gets here */
# Line 2657  for (;;) Line 3401  for (;;)
3401    
3402      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3403      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3404      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3405      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3406      and single-bytes. */      and single-bytes. */
3407    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3408      if (min > 0)      if (min > 0)
3409        {        {
3410  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2672  for (;;) Line 3413  for (;;)
3413          switch(prop_type)          switch(prop_type)
3414            {            {
3415            case PT_ANY:            case PT_ANY:
3416            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3417            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3418              {              {
3419              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3420              GETCHARINC(c, eptr);                {
3421                  SCHECK_PARTIAL();
3422                  MRRETURN(MATCH_NOMATCH);
3423                  }
3424                GETCHARINCTEST(c, eptr);
3425              }              }
3426            break;            break;
3427    
3428            case PT_LAMP:            case PT_LAMP:
3429            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3430              {              {
3431              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3432              GETCHARINC(c, eptr);                {
3433              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3434                  MRRETURN(MATCH_NOMATCH);
3435                  }
3436                GETCHARINCTEST(c, eptr);
3437                prop_chartype = UCD_CHARTYPE(c);
3438              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3439                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3440                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3441                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3442              }              }
3443            break;            break;
3444    
3445            case PT_GC:            case PT_GC:
3446            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3447              {              {
3448              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3449              GETCHARINC(c, eptr);                {
3450              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3451                  MRRETURN(MATCH_NOMATCH);
3452                  }
3453                GETCHARINCTEST(c, eptr);
3454                prop_category = UCD_CATEGORY(c);
3455              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3456                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3457              }              }
3458            break;            break;
3459    
3460            case PT_PC:            case PT_PC:
3461            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3462              {              {
3463              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3464              GETCHARINC(c, eptr);                {
3465              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3466                  MRRETURN(MATCH_NOMATCH);
3467                  }
3468                GETCHARINCTEST(c, eptr);
3469                prop_chartype = UCD_CHARTYPE(c);
3470              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3471                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3472              }              }
3473            break;            break;
3474    
3475            case PT_SC:            case PT_SC:
3476            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3477              {              {
3478              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3479              GETCHARINC(c, eptr);                {
3480              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3481                  MRRETURN(MATCH_NOMATCH);
3482                  }
3483                GETCHARINCTEST(c, eptr);
3484                prop_script = UCD_SCRIPT(c);
3485              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3486                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3487              }              }
3488            break;            break;
3489    
# Line 2738  for (;;) Line 3499  for (;;)
3499          {          {
3500          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3501            {            {
3502              if (eptr >= md->end_subject)
3503                {
3504                SCHECK_PARTIAL();
3505                MRRETURN(MATCH_NOMATCH);
3506                }
3507            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3508            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3509            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3510            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3511              {              {
3512              int len = 1;              int len = 1;
3513              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3514                {                else { GETCHARLEN(c, eptr, len); }
3515                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3516              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3517              eptr += len;              eptr += len;
3518              }              }
# Line 2766  for (;;) Line 3530  for (;;)
3530          case OP_ANY:          case OP_ANY:
3531          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3532            {            {
3533            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3534                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))              {
3535              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3536                MRRETURN(MATCH_NOMATCH);
3537                }
3538              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3539              eptr++;
3540              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3541              }
3542            break;
3543    
3544            case OP_ALLANY:
3545            for (i = 1; i <= min; i++)
3546              {
3547              if (eptr >= md->end_subject)
3548                {
3549                SCHECK_PARTIAL();
3550                MRRETURN(MATCH_NOMATCH);
3551                }
3552            eptr++;            eptr++;
3553            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3554            }            }
3555          break;          break;
3556    
3557          case OP_ANYBYTE:          case OP_ANYBYTE:
3558            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3559          eptr += min;          eptr += min;
3560          break;          break;
3561    
3562          case OP_ANYNL:          case OP_ANYNL:
3563          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3564            {            {
3565            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3566                {
3567                SCHECK_PARTIAL();
3568                MRRETURN(MATCH_NOMATCH);
3569                }
3570            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3571            switch(c)            switch(c)
3572              {              {
3573              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3574              case 0x000d:              case 0x000d:
3575              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3576              break;              break;
3577    
3578              case 0x000a:              case 0x000a:
3579                break;
3580    
3581              case 0x000b:              case 0x000b:
3582              case 0x000c:              case 0x000c:
3583              case 0x0085:              case 0x0085:
3584              case 0x2028:              case 0x2028:
3585              case 0x2029:              case 0x2029:
3586                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3587                break;
3588                }
3589              }
3590            break;
3591    
3592            case OP_NOT_HSPACE:
3593            for (i = 1; i <= min; i++)
3594              {
3595              if (eptr >= md->end_subject)
3596                {
3597                SCHECK_PARTIAL();
3598                MRRETURN(MATCH_NOMATCH);
3599                }
3600              GETCHARINC(c, eptr);
3601              switch(c)
3602                {
3603                default: break;
3604                case 0x09:      /* HT */
3605                case 0x20:      /* SPACE */
3606                case 0xa0:      /* NBSP */
3607                case 0x1680:    /* OGHAM SPACE MARK */
3608                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3609                case 0x2000:    /* EN QUAD */
3610                case 0x2001:    /* EM QUAD */
3611                case 0x2002:    /* EN SPACE */
3612                case 0x2003:    /* EM SPACE */
3613                case 0x2004:    /* THREE-PER-EM SPACE */
3614                case 0x2005:    /* FOUR-PER-EM SPACE */
3615                case 0x2006:    /* SIX-PER-EM SPACE */
3616                case 0x2007:    /* FIGURE SPACE */
3617                case 0x2008:    /* PUNCTUATION SPACE */
3618                case 0x2009:    /* THIN SPACE */
3619                case 0x200A:    /* HAIR SPACE */
3620                case 0x202f:    /* NARROW NO-BREAK SPACE */
3621                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3622                case 0x3000:    /* IDEOGRAPHIC SPACE */
3623                MRRETURN(MATCH_NOMATCH);
3624                }
3625              }
3626            break;
3627    
3628            case OP_HSPACE:
3629            for (i = 1; i <= min; i++)
3630              {
3631              if (eptr >= md->end_subject)
3632                {
3633                SCHECK_PARTIAL();
3634                MRRETURN(MATCH_NOMATCH);
3635                }
3636              GETCHARINC(c, eptr);
3637              switch(c)
3638                {
3639                default: MRRETURN(MATCH_NOMATCH);
3640                case 0x09:      /* HT */
3641                case 0x20:      /* SPACE */
3642                case 0xa0:      /* NBSP */
3643                case 0x1680:    /* OGHAM SPACE MARK */
3644                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3645                case 0x2000:    /* EN QUAD */
3646                case 0x2001:    /* EM QUAD */
3647                case 0x2002:    /* EN SPACE */
3648                case 0x2003:    /* EM SPACE */
3649                case 0x2004:    /* THREE-PER-EM SPACE */
3650                case 0x2005:    /* FOUR-PER-EM SPACE */
3651                case 0x2006:    /* SIX-PER-EM SPACE */
3652                case 0x2007:    /* FIGURE SPACE */
3653                case 0x2008:    /* PUNCTUATION SPACE */
3654                case 0x2009:    /* THIN SPACE */
3655                case 0x200A:    /* HAIR SPACE */
3656                case 0x202f:    /* NARROW NO-BREAK SPACE */
3657                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3658                case 0x3000:    /* IDEOGRAPHIC SPACE */
3659                break;
3660                }
3661              }
3662            break;
3663    
3664            case OP_NOT_VSPACE:
3665            for (i = 1; i <= min; i++)
3666              {
3667              if (eptr >= md->end_subject)
3668                {
3669                SCHECK_PARTIAL();
3670                MRRETURN(MATCH_NOMATCH);
3671                }
3672              GETCHARINC(c, eptr);
3673              switch(c)
3674                {
3675                default: break;
3676                case 0x0a:      /* LF */
3677                case 0x0b:      /* VT */
3678                case 0x0c:      /* FF */
3679                case 0x0d:      /* CR */
3680                case 0x85:      /* NEL */
3681                case 0x2028:    /* LINE SEPARATOR */
3682                case 0x2029:    /* PARAGRAPH SEPARATOR */
3683                MRRETURN(MATCH_NOMATCH);
3684                }
3685              }
3686            break;
3687    
3688            case OP_VSPACE:
3689            for (i = 1; i <= min; i++)
3690              {
3691              if (eptr >= md->end_subject)
3692                {
3693                SCHECK_PARTIAL();
3694                MRRETURN(MATCH_NOMATCH);
3695                }
3696              GETCHARINC(c, eptr);
3697              switch(c)
3698                {
3699                default: MRRETURN(MATCH_NOMATCH);
3700                case 0x0a:      /* LF */
3701                case 0x0b:      /* VT */
3702                case 0x0c:      /* FF */
3703                case 0x0d:      /* CR */
3704                case 0x85:      /* NEL */
3705                case 0x2028:    /* LINE SEPARATOR */
3706                case 0x2029:    /* PARAGRAPH SEPARATOR */
3707              break;              break;
3708              }              }
3709            }            }
# Line 2803  for (;;) Line 3712  for (;;)
3712          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3713          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3714            {            {
3715            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3716                {
3717                SCHECK_PARTIAL();
3718                MRRETURN(MATCH_NOMATCH);
3719                }
3720            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3721            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3722              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3723            }            }
3724          break;          break;
3725    
3726          case OP_DIGIT:          case OP_DIGIT:
3727          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3728            {            {
3729            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3730               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3731              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3732                MRRETURN(MATCH_NOMATCH);
3733                }
3734              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3735                MRRETURN(MATCH_NOMATCH);
3736            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3737            }            }
3738          break;          break;
# Line 2823  for (;;) Line 3740  for (;;)
3740          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3741          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3742            {            {
3743            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3744               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3745              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3746            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3747                }
3748              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3749                MRRETURN(MATCH_NOMATCH);
3750              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3751            }            }
3752          break;          break;
3753    
3754          case OP_WHITESPACE:          case OP_WHITESPACE:
3755          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3756            {            {
3757            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3758               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3759              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3760                MRRETURN(MATCH_NOMATCH);
3761                }
3762              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3763                MRRETURN(MATCH_NOMATCH);
3764            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3765            }            }
3766          break;          break;
# Line 2843  for (;;) Line 3768  for (;;)
3768          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3769          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3770            {            {
3771            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3772               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))              {
3773              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3774            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3775                }
3776              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3777                MRRETURN(MATCH_NOMATCH);
3778              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3779            }            }
3780          break;          break;
3781    
3782          case OP_WORDCHAR:          case OP_WORDCHAR:
3783          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3784            {            {
3785            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3786               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3787              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3788                MRRETURN(MATCH_NOMATCH);
3789                }
3790              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3791                MRRETURN(MATCH_NOMATCH);
3792            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3793            }            }
3794          break;          break;
# Line 2868  for (;;) Line 3801  for (;;)
3801  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3802    
3803        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3804        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
3805    
3806        switch(ctype)        switch(ctype)
3807          {          {
3808          case OP_ANY:          case OP_ANY:
3809          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3810            {            {
3811            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
3812              {              {
3813              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3814              eptr++;              MRRETURN(MATCH_NOMATCH);
3815              }              }
3816              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3817              eptr++;
3818            }            }
         else eptr += min;  
3819          break;          break;
3820    
3821          case OP_ANYBYTE:          case OP_ALLANY:
3822            if (eptr > md->end_subject - min)
3823              {
3824              SCHECK_PARTIAL();
3825              MRRETURN(MATCH_NOMATCH);
3826              }
3827          eptr += min;          eptr += min;
3828          break;          break;
3829    
3830          /* Because of the CRLF case, we can't assume the minimum number of          case OP_ANYBYTE:
3831          bytes are present in this case. */          if (eptr > md->end_subject - min)
3832              {
3833              SCHECK_PARTIAL();
3834              MRRETURN(MATCH_NOMATCH);
3835              }
3836            eptr += min;
3837            break;
3838    
3839          case OP_ANYNL:          case OP_ANYNL:
3840          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3841            {            {
3842            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3843                {
3844                SCHECK_PARTIAL();
3845                MRRETURN(MATCH_NOMATCH);
3846                }
3847            switch(*eptr++)            switch(*eptr++)
3848              {              {
3849              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3850              case 0x000d:              case 0x000d:
3851              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3852              break;              break;
3853              case 0x000a:              case 0x000a:
3854                break;
3855    
3856              case 0x000b:              case 0x000b:
3857              case 0x000c:              case 0x000c:
3858              case 0x0085:              case 0x0085:
3859                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3860              break;              break;
3861              }              }
3862            }            }
3863          break;          break;
3864    
3865          case OP_NOT_DIGIT:          case OP_NOT_HSPACE:
3866          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3867            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            {
3868              if (eptr >= md->end_subject)
3869                {
3870                SCHECK_PARTIAL();
3871                MRRETURN(MATCH_NOMATCH);
3872                }
3873              switch(*eptr++)
3874                {
3875                default: break;
3876                case 0x09:      /* HT */
3877                case 0x20:      /* SPACE */
3878                case 0xa0:      /* NBSP */
3879                MRRETURN(MATCH_NOMATCH);
3880                }
3881              }
3882          break;          break;
3883    
3884          case OP_DIGIT:          case OP_HSPACE:
3885          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3886            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            {
3887              if (eptr >= md->end_subject)
3888                {
3889                SCHECK_PARTIAL();
3890                MRRETURN(MATCH_NOMATCH);
3891                }
3892              switch(*eptr++)
3893                {
3894                default: MRRETURN(MATCH_NOMATCH);
3895                case 0x09:      /* HT */
3896                case 0x20:      /* SPACE */
3897                case 0xa0:      /* NBSP */
3898                break;
3899                }
3900              }
3901          break;          break;
3902    
3903          case OP_NOT_WHITESPACE:          case OP_NOT_VSPACE:
3904          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3905            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            {
3906              if (eptr >= md->end_subject)
3907                {
3908                SCHECK_PARTIAL();
3909                MRRETURN(MATCH_NOMATCH);
3910                }
3911              switch(*eptr++)
3912                {
3913                default: break;
3914                case 0x0a:      /* LF */
3915                case 0x0b:      /* VT */
3916                case 0x0c:      /* FF */
3917                case 0x0d:      /* CR */
3918                case 0x85:      /* NEL */
3919                MRRETURN(MATCH_NOMATCH);
3920                }
3921              }
3922            break;
3923    
3924            case OP_VSPACE:
3925            for (i = 1; i <= min; i++)
3926              {
3927              if (eptr >= md->end_subject)
3928                {
3929                SCHECK_PARTIAL();
3930                MRRETURN(MATCH_NOMATCH);
3931                }
3932              switch(*eptr++)
3933                {
3934                default: MRRETURN(MATCH_NOMATCH);
3935                case 0x0a:      /* LF */
3936                case 0x0b:      /* VT */
3937                case 0x0c:      /* FF */
3938                case 0x0d:      /* CR */
3939                case 0x85:      /* NEL */
3940                break;
3941                }
3942              }
3943            break;
3944    
3945            case OP_NOT_DIGIT:
3946            for (i = 1; i <= min; i++)
3947              {
3948              if (eptr >= md->end_subject)
3949                {
3950                SCHECK_PARTIAL();
3951                MRRETURN(MATCH_NOMATCH);
3952                }
3953              if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
3954              }
3955            break;
3956    
3957            case OP_DIGIT:
3958            for (i = 1; i <= min; i++)
3959              {
3960              if (eptr >= md->end_subject)
3961                {
3962                SCHECK_PARTIAL();
3963                MRRETURN(MATCH_NOMATCH);
3964                }
3965              if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
3966              }
3967            break;
3968    
3969            case OP_NOT_WHITESPACE:
3970            for (i = 1; i <= min; i++)
3971              {
3972              if (eptr >= md->end_subject)
3973                {
3974                SCHECK_PARTIAL();
3975                MRRETURN(MATCH_NOMATCH);
3976                }
3977              if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
3978              }
3979          break;          break;
3980    
3981          case OP_WHITESPACE:          case OP_WHITESPACE:
3982          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3983            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            {
3984              if (eptr >= md->end_subject)
3985                {
3986                SCHECK_PARTIAL();
3987                MRRETURN(MATCH_NOMATCH);
3988                }
3989              if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
3990              }
3991          break;          break;
3992    
3993          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3994          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3995              {
3996              if (eptr >= md->end_subject)
3997                {
3998                SCHECK_PARTIAL();
3999                MRRETURN(MATCH_NOMATCH);
4000                }
4001            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
4002              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4003              }
4004          break;          break;
4005    
4006          case OP_WORDCHAR:          case OP_WORDCHAR:
4007          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4008              {
4009              if (eptr >= md->end_subject)
4010                {
4011                SCHECK_PARTIAL();
4012                MRRETURN(MATCH_NOMATCH);
4013                }
4014            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
4015              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4016              }
4017          break;          break;
4018    
4019          default:          default:
# Line 2968  for (;;) Line 4041  for (;;)
4041              {              {
4042              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
4043              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4044              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4045                if (eptr >= md->end_subject)
4046                  {
4047                  SCHECK_PARTIAL();
4048                  MRRETURN(MATCH_NOMATCH);
4049                  }
4050              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
4051              if (prop_fail_result) RRETURN(MATCH_NOMATCH);              if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4052              }              }
4053            /* Control never gets here */            /* Control never gets here */
4054    
# Line 2979  for (;;) Line 4057  for (;;)
4057              {              {
4058              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
4059              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4060              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4061                if (eptr >= md->end_subject)
4062                  {
4063                  SCHECK_PARTIAL();
4064                  MRRETURN(MATCH_NOMATCH);
4065                  }
4066              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
4067              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4068              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4069                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
4070                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
4071                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4072              }              }
4073            /* Control never gets here */            /* Control never gets here */
4074    
# Line 2994  for (;;) Line 4077  for (;;)
4077              {              {
4078              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4079              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4080              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4081                if (eptr >= md->end_subject)
4082                  {
4083                  SCHECK_PARTIAL();
4084                  MRRETURN(MATCH_NOMATCH);
4085                  }
4086              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
4087              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4088              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4089                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4090              }              }
4091            /* Control never gets here */            /* Control never gets here */
4092    
# Line 3007  for (;;) Line 4095  for (;;)
4095              {              {
4096              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4097              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4098              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4099                if (eptr >= md->end_subject)
4100                  {
4101                  SCHECK_PARTIAL();
4102                  MRRETURN(MATCH_NOMATCH);
4103                  }
4104              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
4105              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4106              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4107                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4108              }              }
4109            /* Control never gets here */            /* Control never gets here */
4110    
# Line 3020  for (;;) Line 4113  for (;;)
4113              {              {
4114              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
4115              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4116              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4117                if (eptr >= md->end_subject)
4118                  {
4119                  SCHECK_PARTIAL();
4120                  MRRETURN(MATCH_NOMATCH);
4121                  }
4122              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
4123              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
4124              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4125                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4126              }              }
4127            /* Control never gets here */            /* Control never gets here */
4128    
# Line 3042  for (;;) Line 4140  for (;;)
4140            {            {
4141            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
4142            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4143            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4144              if (eptr >= md->end_subject)
4145                {
4146                SCHECK_PARTIAL();
4147                MRRETURN(MATCH_NOMATCH);
4148                }
4149            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4150            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
4151            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
4152            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4153              {              {
4154              int len = 1;              int len = 1;
4155              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
4156                {                else { GETCHARLEN(c, eptr, len); }
4157                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
4158              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4159              eptr += len;              eptr += len;
4160              }              }
# Line 3071  for (;;) Line 4172  for (;;)
4172            {            {
4173            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
4174            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4175            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4176                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&            if (eptr >= md->end_subject)
4177                  IS_NEWLINE(eptr)))              {
4178              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
4179                MRRETURN(MATCH_NOMATCH);
4180                }
4181              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4182                MRRETURN(MATCH_NOMATCH);
4183            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4184            switch(ctype)            switch(ctype)
4185              {              {
4186              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
4187              break;              case OP_ALLANY:
   
4188              case OP_ANYBYTE:              case OP_ANYBYTE:
4189              break;              break;
4190    
4191              case OP_ANYNL:              case OP_ANYNL:
4192              switch(c)              switch(c)
4193                {                {
4194                default: RRETURN(MATCH_NOMATCH);                default: MRRETURN(MATCH_NOMATCH);
4195                case 0x000d:                case 0x000d:
4196                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4197                break;                break;
4198                case 0x000a:                case 0x000a:
4199                  break;
4200    
4201                case 0x000b:                case 0x000b:
4202                case 0x000c:                case 0x000c:
4203                case 0x0085:                case 0x0085:
4204                case 0x2028:                case 0x2028:
4205                case 0x2029:                case 0x2029:
4206                  if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4207                  break;
4208                  }
4209                break;
4210    
4211                case OP_NOT_HSPACE:
4212                switch(c)
4213                  {
4214                  default: break;
4215                  case 0x09:      /* HT */
4216                  case 0x20:      /* SPACE */
4217                  case 0xa0:      /* NBSP */
4218                  case 0x1680:    /* OGHAM SPACE MARK */
4219                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4220                  case 0x2000:    /* EN QUAD */
4221                  case 0x2001:    /* EM QUAD */
4222                  case 0x2002:    /* EN SPACE */
4223                  case 0x2003:    /* EM SPACE */
4224                  case 0x2004:    /* THREE-PER-EM SPACE */
4225                  case 0x2005:    /* FOUR-PER-EM SPACE */
4226                  case 0x2006:    /* SIX-PER-EM SPACE */
4227                  case 0x2007:    /* FIGURE SPACE */
4228                  case 0x2008:    /* PUNCTUATION SPACE */
4229                  case 0x2009:    /* THIN SPACE */
4230                  case 0x200A:    /* HAIR SPACE */
4231                  case 0x202f:    /* NARROW NO-BREAK SPACE */
4232                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4233                  case 0x3000:    /* IDEOGRAPHIC SPACE */
4234                  MRRETURN(MATCH_NOMATCH);
4235                  }
4236                break;
4237    
4238                case OP_HSPACE:
4239                switch(c)
4240                  {
4241                  default: MRRETURN(MATCH_NOMATCH);
4242                  case 0x09:      /* HT */
4243                  case 0x20:      /* SPACE */
4244                  case 0xa0:      /* NBSP */
4245                  case 0x1680:    /* OGHAM SPACE MARK */
4246                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
4247                  case 0x2000:    /* EN QUAD */
4248                  case 0x2001:    /* EM QUAD */
4249                  case 0x2002:    /* EN SPACE */
4250                  case 0x2003:    /* EM SPACE */
4251                  case 0x2004:    /* THREE-PER-EM SPACE */
4252                  case 0x2005:    /* FOUR-PER-EM SPACE */
4253                  case 0x2006:    /* SIX-PER-EM SPACE */
4254                  case 0x2007:    /* FIGURE SPACE */
4255                  case 0x2008:    /* PUNCTUATION SPACE */
4256                  case 0x2009:    /* THIN SPACE */
4257                  case 0x200A:    /* HAIR SPACE */
4258                  case 0x202f:    /* NARROW NO-BREAK SPACE */
4259                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4260                  case 0x3000:    /* IDEOGRAPHIC SPACE */
4261                  break;
4262                  }
4263                break;
4264    
4265                case OP_NOT_VSPACE:
4266                switch(c)
4267                  {
4268                  default: break;
4269                  case 0x0a:      /* LF */
4270                  case 0x0b:      /* VT */
4271                  case 0x0c:      /* FF */
4272                  case 0x0d:      /* CR */
4273                  case 0x85:      /* NEL */
4274                  case 0x2028:    /* LINE SEPARATOR */
4275                  case 0x2029:    /* PARAGRAPH SEPARATOR */
4276                  MRRETURN(MATCH_NOMATCH);
4277                  }
4278                break;
4279    
4280                case OP_VSPACE:
4281                switch(c)
4282                  {
4283                  default: MRRETURN(MATCH_NOMATCH);
4284                  case 0x0a:      /* LF */
4285                  case 0x0b:      /* VT */
4286                  case 0x0c:      /* FF */
4287                  case 0x0d:      /* CR */
4288                  case 0x85:      /* NEL */
4289                  case 0x2028:    /* LINE SEPARATOR */
4290                  case 0x2029:    /* PARAGRAPH SEPARATOR */
4291                break;                break;
4292                }                }
4293              break;              break;
4294    
4295              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
4296              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
4297                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4298              break;              break;
4299    
4300              case OP_DIGIT:              case OP_DIGIT:
4301              if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)              if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
4302                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4303              break;              break;
4304    
4305              case OP_NOT_WHITESPACE:              case OP_NOT_WHITESPACE:
4306              if (c < 256 && (md->ctypes[c] & ctype_space) != 0)              if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
4307                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4308              break;              break;
4309    
4310              case OP_WHITESPACE:              case OP_WHITESPACE:
4311              if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)              if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
4312                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4313              break;              break;
4314    
4315              case OP_NOT_WORDCHAR:              case OP_NOT_WORDCHAR:
4316              if (c < 256 && (md->ctypes[c] & ctype_word) != 0)              if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
4317                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4318              break;              break;
4319    
4320              case OP_WORDCHAR:              case OP_WORDCHAR:
4321              if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)              if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
4322                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4323              break;              break;
4324    
4325              default:              default:
# Line 3145  for (;;) Line 4335  for (;;)
4335            {            {
4336            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4337            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4338            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4339                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))            if (eptr >= md->end_subject)
4340              RRETURN(MATCH_NOMATCH);              {
4341                SCHECK_PARTIAL();
4342                MRRETURN(MATCH_NOMATCH);
4343                }
4344              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4345                MRRETURN(MATCH_NOMATCH);
4346            c = *eptr++;            c = *eptr++;
4347            switch(ctype)            switch(ctype)
4348              {              {
4349              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
4350              break;              case OP_ALLANY:
   
4351              case OP_ANYBYTE:              case OP_ANYBYTE:
4352              break;              break;
4353    
4354              case OP_ANYNL:              case OP_ANYNL:
4355              switch(c)              switch(c)
4356                {                {
4357                default: RRETURN(MATCH_NOMATCH);                default: MRRETURN(MATCH_NOMATCH);
4358                case 0x000d:                case 0x000d:
4359                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4360                break;                break;
4361    
4362                case 0x000a:                case 0x000a:
4363                  break;
4364    
4365                case 0x000b:                case 0x000b:
4366                case 0x000c:                case 0x000c:
4367                case 0x0085:                case 0x0085:
4368                  if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4369                  break;
4370                  }
4371                break;
4372    
4373                case OP_NOT_HSPACE:
4374                switch(c)
4375                  {
4376                  default: break;
4377                  case 0x09:      /* HT */
4378                  case 0x20:      /* SPACE */
4379                  case 0xa0:      /* NBSP */
4380                  MRRETURN(MATCH_NOMATCH);
4381                  }
4382                break;
4383    
4384                case OP_HSPACE:
4385                switch(c)
4386                  {
4387                  default: MRRETURN(MATCH_NOMATCH);
4388                  case 0x09:      /* HT */
4389                  case 0x20:      /* SPACE */
4390                  case 0xa0:      /* NBSP */
4391                  break;
4392                  }
4393                break;
4394    
4395                case OP_NOT_VSPACE:
4396                switch(c)
4397                  {
4398                  default: break;
4399                  case 0x0a:      /* LF */
4400                  case 0x0b:      /* VT */
4401                  case 0x0c:      /* FF */
4402                  case 0x0d:      /* CR */
4403                  case 0x85:      /* NEL */
4404                  MRRETURN(MATCH_NOMATCH);
4405                  }
4406                break;
4407    
4408                case OP_VSPACE:
4409                switch(c)
4410                  {
4411                  default: MRRETURN(MATCH_NOMATCH);
4412                  case 0x0a:      /* LF */
4413                  case 0x0b:      /* VT */
4414                  case 0x0c:      /* FF */
4415                  case 0x0d:      /* CR */
4416                  case 0x85:      /* NEL */
4417                break;                break;
4418                }                }
4419              break;              break;
4420    
4421              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
4422              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4423              break;              break;
4424    
4425              case OP_DIGIT:              case OP_DIGIT:
4426              if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4427              break;              break;
4428    
4429              case OP_NOT_WHITESPACE:              case OP_NOT_WHITESPACE:
4430              if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4431              break;              break;
4432    
4433              case OP_WHITESPACE:              case OP_WHITESPACE:
4434              if  ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);              if  ((md->ctypes[c] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4435              break;              break;
4436    
4437              case OP_NOT_WORDCHAR:              case OP_NOT_WORDCHAR:
4438              if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_word) != 0) MRRETURN(MATCH_NOMATCH);
4439              break;              break;
4440    
4441              case OP_WORDCHAR:              case OP_WORDCHAR:
4442              if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_word) == 0) MRRETURN(MATCH_NOMATCH);
4443              break;              break;
4444    
4445              default:              default:
# Line 3222  for (;;) Line 4467  for (;;)
4467            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4468              {              {
4469              int len = 1;              int len = 1;
4470              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4471                  {
4472                  SCHECK_PARTIAL();
4473                  break;
4474                  }
4475              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4476              if (prop_fail_result) break;              if (prop_fail_result) break;
4477              eptr+= len;              eptr+= len;
# Line 3233  for (;;) Line 4482  for (;;)
4482            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4483              {              {
4484              int len = 1;              int len = 1;
4485              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4486                  {
4487                  SCHECK_PARTIAL();
4488                  break;
4489                  }
4490              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4491              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4492              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4493                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
4494                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3248  for (;;) Line 4501  for (;;)
4501            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4502              {              {
4503              int len = 1;              int len = 1;
4504              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4505                  {
4506                  SCHECK_PARTIAL();
4507                  break;
4508                  }
4509              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4510              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4511              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4512                break;                break;
4513              eptr+= len;              eptr+= len;
# Line 3261  for (;;) Line 4518  for (;;)
4518            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4519              {              {
4520              int len = 1;              int len = 1;
4521              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4522                  {
4523                  SCHECK_PARTIAL();
4524                  break;
4525                  }
4526              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4527              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
4528              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4529                break;                break;
4530              eptr+= len;              eptr+= len;
# Line 3274  for (;;) Line 4535  for (;;)
4535            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4536              {              {
4537              int len = 1;              int len = 1;
4538              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
4539                  {
4540                  SCHECK_PARTIAL();
4541                  break;
4542                  }
4543              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
4544              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
4545              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4546                break;                break;
4547              eptr+= len;              eptr+= len;
# Line 3292  for (;;) Line 4557  for (;;)
4557            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
4558            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4559            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4560            BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
4561            }            }
4562          }          }
4563    
# Line 3303  for (;;) Line 4568  for (;;)
4568          {          {
4569          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4570            {            {
4571            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
4572                {
4573                SCHECK_PARTIAL();
4574                break;
4575                }
4576            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4577            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
4578            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
4579            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4580              {              {
# Line 3314  for (;;) Line 4583  for (;;)
4583                {                {
4584                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
4585                }                }
4586              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4587              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4588              eptr += len;              eptr += len;
4589              }              }
# Line 3323  for (;;) Line 4592  for (;;)
4592          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
4593    
4594          if (possessive) continue;          if (possessive) continue;
4595    
4596          for(;;)          for(;;)
4597            {            {
4598            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
# Line 3331  for (;;) Line 4601  for (;;)
4601            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
4602              {              {
4603              int len = 1;              int len = 1;
             BACKCHAR(eptr);  
4604              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else
4605                {                {
4606                  BACKCHAR(eptr);
4607                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
4608                }                }
4609              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
4610              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4611              eptr--;              eptr--;
4612              }              }
# Line 3354  for (;;) Line 4624  for (;;)
4624          switch(ctype)          switch(ctype)
4625            {            {
4626            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is  
           unlimited we don't need it, so we repeat the non-UTF8 code. This is  
           probably worth it, because .* is quite a common idiom. */  
   
4627            if (max < INT_MAX)            if (max < INT_MAX)
4628              {              {
4629              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
4630                {                {
4631                for (i = min; i < max; i++)                if (eptr >= md->end_subject)
4632                  {                  {
4633                  if (eptr >= md->end_subject) break;                  SCHECK_PARTIAL();
4634                  eptr++;                  break;
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
4635                  }                  }
4636                  if (IS_NEWLINE(eptr)) break;
4637                  eptr++;
4638                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4639                }                }
4640              }              }
4641    
# Line 3385  for (;;) Line 4643  for (;;)
4643    
4644            else            else
4645              {              {
4646              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
4647                {                {
4648                for (i = min; i < max; i++)                if (eptr >= md->end_subject)
4649                  {                  {
4650                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                  SCHECK_PARTIAL();
4651                  eptr++;                  break;
4652                  }                  }
4653                break;                if (IS_NEWLINE(eptr)) break;
4654                  eptr++;
4655                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4656                }                }
4657              else              }
4658              break;
4659    
4660              case OP_ALLANY:
4661              if (max < INT_MAX)
4662                {
4663                for (i = min; i < max; i++)
4664                {                {
4665                c = max - min;                if (eptr >= md->end_subject)
4666                if (c > (unsigned int)(md->end_subject - eptr))                  {
4667                  c = md->end_subject - eptr;                  SCHECK_PARTIAL();
4668                eptr += c;                  break;
4669                    }
4670                  eptr++;
4671                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4672                }                }
4673              }              }
4674              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
4675            break;            break;
4676    
4677            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 3409  for (;;) Line 4679  for (;;)
4679            case OP_ANYBYTE:            case OP_ANYBYTE:
4680            c = max - min;            c = max - min;
4681            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
4682              c = md->end_subject - eptr;              {
4683            eptr += c;              eptr = md->end_subject;
4684                SCHECK_PARTIAL();
4685   &nbs