/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 190 by ph10, Thu Jul 19 10:38:20 2007 UTC revision 563 by ph10, Sun Oct 31 14:15:04 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 53  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
   
 #define EPTR_WORK_SIZE (1000)  
   
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 70  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 83  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 135  match_ref(int offset, register USPTR ept Line 151  match_ref(int offset, register USPTR ept
151  {  {
152  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
153    
154  #ifdef DEBUG  #ifdef PCRE_DEBUG
155  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
156    printf("matching subject <null>");    printf("matching subject <null>");
157  else  else
# Line 152  printf("\n"); Line 168  printf("\n");
168    
169  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
170    
171  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172    properly if Unicode properties are supported. Otherwise, we can check only
173    ASCII characters. */
174    
175  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
176    {    {
177    #ifdef SUPPORT_UTF8
178    #ifdef SUPPORT_UCP
179      if (md->utf8)
180        {
181        USPTR endptr = eptr + length;
182        while (eptr < endptr)
183          {
184          int c, d;
185          GETCHARINC(c, eptr);
186          GETCHARINC(d, p);
187          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188          }
189        }
190      else
191    #endif
192    #endif
193    
194      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195      is no UCP support. */
196    
197    while (length-- > 0)    while (length-- > 0)
198      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199    }    }
200    
201    /* In the caseful case, we can just compare the bytes, whether or not we
202    are in UTF-8 mode. */
203    
204  else  else
205    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
# Line 205  variable instead of being passed in the Line 247  variable instead of being passed in the
247  ****************************************************************************  ****************************************************************************
248  ***************************************************************************/  ***************************************************************************/
249    
250    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251  /* Numbers for RMATCH calls */  below must be updated in sync.  */
252    
253  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,  enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
254         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,         RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,         RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,         RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257         RM41,  RM42, RM43, RM44, RM45, RM46, RM47 };         RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258           RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259           RM61,  RM62 };
260    
261  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
262  versions and production versions. Note that the "rw" argument of RMATCH isn't  versions and production versions. Note that the "rw" argument of RMATCH isn't
263  actuall used in this definition. */  actually used in this definition. */
264    
265  #ifndef NO_RECURSE  #ifndef NO_RECURSE
266  #define REGISTER register  #define REGISTER register
267    
268  #ifdef DEBUG  #ifdef PCRE_DEBUG
269  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270    { \    { \
271    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
272    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
274    }    }
275  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 236  actuall used in this definition. */ Line 279  actuall used in this definition. */
279    }    }
280  #else  #else
281  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
284  #endif  #endif
285    
# Line 251  argument of match(), which never changes Line 294  argument of match(), which never changes
294    
295  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296    {\    {\
297    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
298      if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299    frame->Xwhere = rw; \    frame->Xwhere = rw; \
300    newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
301    newframe->Xecode = rb;\    newframe->Xecode = rb;\
302    newframe->Xmstart = mstart;\    newframe->Xmstart = mstart;\
303      newframe->Xmarkptr = markptr;\
304    newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
305    newframe->Xims = re;\    newframe->Xims = re;\
306    newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
# Line 271  argument of match(), which never changes Line 316  argument of match(), which never changes
316    
317  #define RRETURN(ra)\  #define RRETURN(ra)\
318    {\    {\
319    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
320    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
321    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
322    if (frame != NULL)\    if (frame != NULL)\
323      {\      {\
324      rrc = ra;\      rrc = ra;\
# Line 290  typedef struct heapframe { Line 335  typedef struct heapframe {
335    
336    /* Function arguments that may change */    /* Function arguments that may change */
337    
338    const uschar *Xeptr;    USPTR Xeptr;
339    const uschar *Xecode;    const uschar *Xecode;
340    const uschar *Xmstart;    USPTR Xmstart;
341      USPTR Xmarkptr;
342    int Xoffset_top;    int Xoffset_top;
343    long int Xims;    long int Xims;
344    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 301  typedef struct heapframe { Line 347  typedef struct heapframe {
347    
348    /* Function local variables */    /* Function local variables */
349    
350    const uschar *Xcallpat;    USPTR Xcallpat;
351    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
352    const uschar *Xdata;    USPTR Xcharptr;
353    const uschar *Xnext;  #endif
354    const uschar *Xpp;    USPTR Xdata;
355    const uschar *Xprev;    USPTR Xnext;
356    const uschar *Xsaved_eptr;    USPTR Xpp;
357      USPTR Xprev;
358      USPTR Xsaved_eptr;
359    
360    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
361    
# Line 328  typedef struct heapframe { Line 376  typedef struct heapframe {
376    uschar Xocchars[8];    uschar Xocchars[8];
377  #endif  #endif
378    
379      int Xcodelink;
380    int Xctype;    int Xctype;
381    unsigned int Xfc;    unsigned int Xfc;
382    int Xfi;    int Xfi;
# Line 363  typedef struct heapframe { Line 412  typedef struct heapframe {
412    
413  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
414  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
415  same response.  same response. */
416    
417    /* These macros pack up tests that are used for partial matching, and which
418    appears several times in the code. We set the "hit end" flag if the pointer is
419    at the end of the subject and also past the start of the subject (i.e.
420    something has been matched). For hard partial matching, we then return
421    immediately. The second one is used when we already know we are past the end of
422    the subject. */
423    
424    #define CHECK_PARTIAL()\
425      if (md->partial != 0 && eptr >= md->end_subject && \
426          eptr > md->start_used_ptr) \
427        { \
428        md->hitend = TRUE; \
429        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
430        }
431    
432    #define SCHECK_PARTIAL()\
433      if (md->partial != 0 && eptr > md->start_used_ptr) \
434        { \
435        md->hitend = TRUE; \
436        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
437        }
438    
439    
440  Performance note: It might be tempting to extract commonly used fields from the  /* Performance note: It might be tempting to extract commonly used fields from
441  md structure (e.g. utf8, end_subject) into individual variables to improve  the md structure (e.g. utf8, end_subject) into individual variables to improve
442  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
443  made performance worse.  made performance worse.
444    
# Line 375  Arguments: Line 447  Arguments:
447     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
448     mstart      pointer to the current match start position (can be modified     mstart      pointer to the current match start position (can be modified
449                   by encountering \K)                   by encountering \K)
450       markptr     pointer to the most recent MARK name, or NULL
451     offset_top  current top pointer     offset_top  current top pointer
452     md          pointer to "static" info for the match     md          pointer to "static" info for the match
453     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 384  Arguments: Line 457  Arguments:
457                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
458                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
459                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
460     rdepth      the recursion depth     rdepth      the recursion depth
461    
462  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
463                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
464                   a negative MATCH_xxx value for PRUNE, SKIP, etc
465                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
466                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
467  */  */
468    
469  static int  static int
470  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
471    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
472    int flags, unsigned int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
473  {  {
474  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
475  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 408  register unsigned int c; /* Character Line 481  register unsigned int c; /* Character
481  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
482    
483  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
484    int condcode;
485    
486  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
487  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 415  heap storage. Set up the top-level frame Line 489  heap storage. Set up the top-level frame
489  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
490    
491  #ifdef NO_RECURSE  #ifdef NO_RECURSE
492  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
493    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
494  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
495    
496  /* Copy in the original argument variables */  /* Copy in the original argument variables */
# Line 423  frame->Xprevframe = NULL; /* Line 498  frame->Xprevframe = NULL; /*
498  frame->Xeptr = eptr;  frame->Xeptr = eptr;
499  frame->Xecode = ecode;  frame->Xecode = ecode;
500  frame->Xmstart = mstart;  frame->Xmstart = mstart;
501    frame->Xmarkptr = markptr;
502  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
503  frame->Xims = ims;  frame->Xims = ims;
504  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 438  HEAP_RECURSE: Line 514  HEAP_RECURSE:
514  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
515  #define ecode              frame->Xecode  #define ecode              frame->Xecode
516  #define mstart             frame->Xmstart  #define mstart             frame->Xmstart
517    #define markptr            frame->Xmarkptr
518  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
519  #define ims                frame->Xims  #define ims                frame->Xims
520  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 450  HEAP_RECURSE: Line 527  HEAP_RECURSE:
527  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
528  #endif  #endif
529  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
530    #define codelink           frame->Xcodelink
531  #define data               frame->Xdata  #define data               frame->Xdata
532  #define next               frame->Xnext  #define next               frame->Xnext
533  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 530  int oclength; Line 608  int oclength;
608  uschar occhars[8];  uschar occhars[8];
609  #endif  #endif
610    
611    int codelink;
612  int ctype;  int ctype;
613  int length;  int length;
614  int max;  int max;
# Line 563  TAIL_RECURSE: Line 642  TAIL_RECURSE:
642  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
643  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
644  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
645  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
646  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
647  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
648  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
# Line 586  original_ims = ims; /* Save for reset Line 665  original_ims = ims; /* Save for reset
665  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
666  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
667  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
668  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
669  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
670  already used. */  block that is used is on the stack, so a new one may be required for each
671    match(). */
672    
673  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
674    {    {
675    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
676    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
677      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
678    }    }
679    
680  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 611  for (;;) Line 684  for (;;)
684    minimize = possessive = FALSE;    minimize = possessive = FALSE;
685    op = *ecode;    op = *ecode;
686    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > mstart)  
     md->hitend = TRUE;  
   
687    switch(op)    switch(op)
688      {      {
689        case OP_MARK:
690        markptr = ecode + 2;
691        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
692          ims, eptrb, flags, RM55);
693    
694        /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
695        argument, and we must check whether that argument matches this MARK's
696        argument. It is passed back in md->start_match_ptr (an overloading of that
697        variable). If it does match, we reset that variable to the current subject
698        position and return MATCH_SKIP. Otherwise, pass back the return code
699        unaltered. */
700    
701        if (rrc == MATCH_SKIP_ARG &&
702            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
703          {
704          md->start_match_ptr = eptr;
705          RRETURN(MATCH_SKIP);
706          }
707    
708        if (md->mark == NULL) md->mark = markptr;
709        RRETURN(rrc);
710    
711        case OP_FAIL:
712        MRRETURN(MATCH_NOMATCH);
713    
714        /* COMMIT overrides PRUNE, SKIP, and THEN */
715    
716        case OP_COMMIT:
717        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
718          ims, eptrb, flags, RM52);
719        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
720            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
721            rrc != MATCH_THEN)
722          RRETURN(rrc);
723        MRRETURN(MATCH_COMMIT);
724    
725        /* PRUNE overrides THEN */
726    
727        case OP_PRUNE:
728        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
729          ims, eptrb, flags, RM51);
730        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
731        MRRETURN(MATCH_PRUNE);
732    
733        case OP_PRUNE_ARG:
734        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
735          ims, eptrb, flags, RM56);
736        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
737        md->mark = ecode + 2;
738        RRETURN(MATCH_PRUNE);
739    
740        /* SKIP overrides PRUNE and THEN */
741    
742        case OP_SKIP:
743        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
744          ims, eptrb, flags, RM53);
745        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
746          RRETURN(rrc);
747        md->start_match_ptr = eptr;   /* Pass back current position */
748        MRRETURN(MATCH_SKIP);
749    
750        case OP_SKIP_ARG:
751        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
752          ims, eptrb, flags, RM57);
753        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
754          RRETURN(rrc);
755    
756        /* Pass back the current skip name by overloading md->start_match_ptr and
757        returning the special MATCH_SKIP_ARG return code. This will either be
758        caught by a matching MARK, or get to the top, where it is treated the same
759        as PRUNE. */
760    
761        md->start_match_ptr = ecode + 2;
762        RRETURN(MATCH_SKIP_ARG);
763    
764        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
765        the alt that is at the start of the current branch. This makes it possible
766        to skip back past alternatives that precede the THEN within the current
767        branch. */
768    
769        case OP_THEN:
770        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
771          ims, eptrb, flags, RM54);
772        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
773        md->start_match_ptr = ecode - GET(ecode, 1);
774        MRRETURN(MATCH_THEN);
775    
776        case OP_THEN_ARG:
777        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
778          offset_top, md, ims, eptrb, flags, RM58);
779        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
780        md->start_match_ptr = ecode - GET(ecode, 1);
781        md->mark = ecode + LINK_SIZE + 2;
782        RRETURN(MATCH_THEN);
783    
784      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
785      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
786      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 640  for (;;) Line 800  for (;;)
800      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
801      offset = number << 1;      offset = number << 1;
802    
803  #ifdef DEBUG  #ifdef PCRE_DEBUG
804      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
805      printf("subject=");      printf("subject=");
806      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
# Line 655  for (;;) Line 815  for (;;)
815        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
816    
817        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
818        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
819            (int)(eptr - md->start_subject);
820    
821        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
822        do        do
823          {          {
824          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
825            ims, eptrb, flags, RM1);            ims, eptrb, flags, RM1);
826          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
827                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
828              RRETURN(rrc);
829          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
830          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
831          }          }
# Line 674  for (;;) Line 837  for (;;)
837        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
838        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
839    
840          if (rrc != MATCH_THEN) md->mark = markptr;
841        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
842        }        }
843    
844      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
845      bracket. */      as a non-capturing bracket. */
846    
847        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
848        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
849    
850      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
851    
852        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
853        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
854    
855      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
856      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
857      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
858      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
859        is set.*/
860    
861      case OP_BRA:      case OP_BRA:
862      case OP_SBRA:      case OP_SBRA:
# Line 693  for (;;) Line 864  for (;;)
864      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
865      for (;;)      for (;;)
866        {        {
867        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
868          {          {
869          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
870          flags |= match_tail_recursed;            {
871          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
872          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
873              goto TAIL_RECURSE;
874              }
875    
876            /* Possibly empty group; can't use tail recursion. */
877    
878            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
879              eptrb, flags, RM48);
880            if (rrc == MATCH_NOMATCH) md->mark = markptr;
881            RRETURN(rrc);
882          }          }
883    
884        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
# Line 706  for (;;) Line 886  for (;;)
886    
887        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
888          eptrb, flags, RM2);          eptrb, flags, RM2);
889        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
890              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
891            RRETURN(rrc);
892        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
893        }        }
894      /* Control never reaches here. */      /* Control never reaches here. */
# Line 719  for (;;) Line 901  for (;;)
901    
902      case OP_COND:      case OP_COND:
903      case OP_SCOND:      case OP_SCOND:
904      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
905    
906        /* Because of the way auto-callout works during compile, a callout item is
907        inserted between OP_COND and an assertion condition. */
908    
909        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
910        {        {
911        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        if (pcre_callout != NULL)
912        condition = md->recursive != NULL &&          {
913          (offset == RREF_ANY || offset == md->recursive->group_num);          pcre_callout_block cb;
914        ecode += condition? 3 : GET(ecode, 1);          cb.version          = 1;   /* Version 1 of the callout block */
915            cb.callout_number   = ecode[LINK_SIZE+2];
916            cb.offset_vector    = md->offset_vector;
917            cb.subject          = (PCRE_SPTR)md->start_subject;
918            cb.subject_length   = (int)(md->end_subject - md->start_subject);
919            cb.start_match      = (int)(mstart - md->start_subject);
920            cb.current_position = (int)(eptr - md->start_subject);
921            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
922            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
923            cb.capture_top      = offset_top/2;
924            cb.capture_last     = md->capture_last;
925            cb.callout_data     = md->callout_data;
926            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
927            if (rrc < 0) RRETURN(rrc);
928            }
929          ecode += _pcre_OP_lengths[OP_CALLOUT];
930          }
931    
932        condcode = ecode[LINK_SIZE+1];
933    
934        /* Now see what the actual condition is */
935    
936        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
937          {
938          if (md->recursive == NULL)                /* Not recursing => FALSE */
939            {
940            condition = FALSE;
941            ecode += GET(ecode, 1);
942            }
943          else
944            {
945            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
946            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
947    
948            /* If the test is for recursion into a specific subpattern, and it is
949            false, but the test was set up by name, scan the table to see if the
950            name refers to any other numbers, and test them. The condition is true
951            if any one is set. */
952    
953            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
954              {
955              uschar *slotA = md->name_table;
956              for (i = 0; i < md->name_count; i++)
957                {
958                if (GET2(slotA, 0) == recno) break;
959                slotA += md->name_entry_size;
960                }
961    
962              /* Found a name for the number - there can be only one; duplicate
963              names for different numbers are allowed, but not vice versa. First
964              scan down for duplicates. */
965    
966              if (i < md->name_count)
967                {
968                uschar *slotB = slotA;
969                while (slotB > md->name_table)
970                  {
971                  slotB -= md->name_entry_size;
972                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
973                    {
974                    condition = GET2(slotB, 0) == md->recursive->group_num;
975                    if (condition) break;
976                    }
977                  else break;
978                  }
979    
980                /* Scan up for duplicates */
981    
982                if (!condition)
983                  {
984                  slotB = slotA;
985                  for (i++; i < md->name_count; i++)
986                    {
987                    slotB += md->name_entry_size;
988                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
989                      {
990                      condition = GET2(slotB, 0) == md->recursive->group_num;
991                      if (condition) break;
992                      }
993                    else break;
994                    }
995                  }
996                }
997              }
998    
999            /* Chose branch according to the condition */
1000    
1001            ecode += condition? 3 : GET(ecode, 1);
1002            }
1003        }        }
1004    
1005      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1006        {        {
1007        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1008        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
1009    
1010          /* If the numbered capture is unset, but the reference was by name,
1011          scan the table to see if the name refers to any other numbers, and test
1012          them. The condition is true if any one is set. This is tediously similar
1013          to the code above, but not close enough to try to amalgamate. */
1014    
1015          if (!condition && condcode == OP_NCREF)
1016            {
1017            int refno = offset >> 1;
1018            uschar *slotA = md->name_table;
1019    
1020            for (i = 0; i < md->name_count; i++)
1021              {
1022              if (GET2(slotA, 0) == refno) break;
1023              slotA += md->name_entry_size;
1024              }
1025    
1026            /* Found a name for the number - there can be only one; duplicate names
1027            for different numbers are allowed, but not vice versa. First scan down
1028            for duplicates. */
1029    
1030            if (i < md->name_count)
1031              {
1032              uschar *slotB = slotA;
1033              while (slotB > md->name_table)
1034                {
1035                slotB -= md->name_entry_size;
1036                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1037                  {
1038                  offset = GET2(slotB, 0) << 1;
1039                  condition = offset < offset_top &&
1040                    md->offset_vector[offset] >= 0;
1041                  if (condition) break;
1042                  }
1043                else break;
1044                }
1045    
1046              /* Scan up for duplicates */
1047    
1048              if (!condition)
1049                {
1050                slotB = slotA;
1051                for (i++; i < md->name_count; i++)
1052                  {
1053                  slotB += md->name_entry_size;
1054                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1055                    {
1056                    offset = GET2(slotB, 0) << 1;
1057                    condition = offset < offset_top &&
1058                      md->offset_vector[offset] >= 0;
1059                    if (condition) break;
1060                    }
1061                  else break;
1062                  }
1063                }
1064              }
1065            }
1066    
1067          /* Chose branch according to the condition */
1068    
1069        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
1070        }        }
1071    
1072      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
1073        {        {
1074        condition = FALSE;        condition = FALSE;
1075        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 754  for (;;) Line 1089  for (;;)
1089          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1090          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1091          }          }
1092        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH &&
1093                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1094          {          {
1095          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1096          }          }
1097        else        else
1098          {          {
1099          condition = FALSE;          condition = FALSE;
1100          ecode += GET(ecode, 1);          ecode += codelink;
1101          }          }
1102        }        }
1103    
1104      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1105      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
1106      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
1107        group. If the second alternative doesn't exist, we can just plough on. */
1108    
1109      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1110        {        {
1111        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1112        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
1113        goto TAIL_RECURSE;          {
1114            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1115            RRETURN(rrc);
1116            }
1117          else                       /* Group must match something */
1118            {
1119            flags = 0;
1120            goto TAIL_RECURSE;
1121            }
1122        }        }
1123      else      else                         /* Condition false & no alternative */
1124        {        {
1125        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1126        }        }
1127      break;      break;
1128    
1129    
1130      /* End of the pattern. If we are in a top-level recursion, we should      /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1131      restore the offsets appropriately and continue from after the call. */      to close any currently open capturing brackets. */
1132    
1133        case OP_CLOSE:
1134        number = GET2(ecode, 1);
1135        offset = number << 1;
1136    
1137    #ifdef PCRE_DEBUG
1138          printf("end bracket %d at *ACCEPT", number);
1139          printf("\n");
1140    #endif
1141    
1142        md->capture_last = number;
1143        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1144          {
1145          md->offset_vector[offset] =
1146            md->offset_vector[md->offset_end - number];
1147          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1148          if (offset_top <= offset) offset_top = offset + 2;
1149          }
1150        ecode += 3;
1151        break;
1152    
1153    
1154        /* End of the pattern, either real or forced. If we are in a top-level
1155        recursion, we should restore the offsets appropriately and continue from
1156        after the call. */
1157    
1158        case OP_ACCEPT:
1159      case OP_END:      case OP_END:
1160      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1161        {        {
# Line 793  for (;;) Line 1164  for (;;)
1164        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1165        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1166          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1167        mstart = rec->save_start;        offset_top = rec->save_offset_top;
1168        ims = original_ims;        ims = original_ims;
1169        ecode = rec->after_call;        ecode = rec->after_call;
1170        break;        break;
1171        }        }
1172    
1173      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1174      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1175        the subject. In both cases, backtracking will then try other alternatives,
1176        if any. */
1177    
1178        if (eptr == mstart &&
1179            (md->notempty ||
1180              (md->notempty_atstart &&
1181                mstart == md->start_subject + md->start_offset)))
1182          MRRETURN(MATCH_NOMATCH);
1183    
1184        /* Otherwise, we have a match. */
1185    
     if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);  
1186      md->end_match_ptr = eptr;           /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
1187      md->end_offset_top = offset_top;    /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
1188      md->start_match_ptr = mstart;  /* and the start (\K can modify) */      md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1189      RRETURN(MATCH_MATCH);  
1190        /* For some reason, the macros don't work properly if an expression is
1191        given as the argument to MRRETURN when the heap is in use. */
1192    
1193        rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1194        MRRETURN(rrc);
1195    
1196      /* Change option settings */      /* Change option settings */
1197    
# Line 828  for (;;) Line 1213  for (;;)
1213        {        {
1214        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1215          RM4);          RM4);
1216        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1217        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          {
1218            mstart = md->start_match_ptr;   /* In case \K reset it */
1219            break;
1220            }
1221          if (rrc != MATCH_NOMATCH &&
1222              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1223            RRETURN(rrc);
1224        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1225        }        }
1226      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1227      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1228    
1229      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1230    
# Line 847  for (;;) Line 1238  for (;;)
1238      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1239      continue;      continue;
1240    
1241      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1242        PRUNE, or COMMIT means we must assume failure without checking subsequent
1243        branches. */
1244    
1245      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1246      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
# Line 855  for (;;) Line 1248  for (;;)
1248        {        {
1249        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1250          RM5);          RM5);
1251        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1252        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1253            {
1254            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1255            break;
1256            }
1257          if (rrc != MATCH_NOMATCH &&
1258              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1259            RRETURN(rrc);
1260        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1261        }        }
1262      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 879  for (;;) Line 1279  for (;;)
1279        while (i-- > 0)        while (i-- > 0)
1280          {          {
1281          eptr--;          eptr--;
1282          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1283          BACKCHAR(eptr)          BACKCHAR(eptr);
1284          }          }
1285        }        }
1286      else      else
# Line 890  for (;;) Line 1290  for (;;)
1290    
1291        {        {
1292        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1293        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1294        }        }
1295    
1296      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1297    
1298        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1299      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1300      break;      break;
1301    
# Line 910  for (;;) Line 1311  for (;;)
1311        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1312        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1313        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1314        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1315        cb.start_match      = mstart - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1316        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1317        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1318        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1319        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1320        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1321        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1322        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1323        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1324        }        }
1325      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 973  for (;;) Line 1374  for (;;)
1374    
1375        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1376              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1377        new_recursive.save_start = mstart;        new_recursive.save_offset_top = offset_top;
       mstart = eptr;  
1378    
1379        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1380        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 985  for (;;) Line 1385  for (;;)
1385          {          {
1386          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1387            md, ims, eptrb, flags, RM6);            md, ims, eptrb, flags, RM6);
1388          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1389            {            {
1390            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1391            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1392            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1393              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1394            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1395            }            }
1396          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH &&
1397                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1398            {            {
1399            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1400              if (new_recursive.offset_save != stacksave)
1401                (pcre_free)(new_recursive.offset_save);
1402            RRETURN(rrc);            RRETURN(rrc);
1403            }            }
1404    
# Line 1010  for (;;) Line 1413  for (;;)
1413        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1414        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1415          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1416        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1417        }        }
1418      /* Control never reaches here */      /* Control never reaches here */
1419    
# Line 1019  for (;;) Line 1422  for (;;)
1422      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1423      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1424      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1425      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1426        the start-of-match value in case it was changed by \K. */
1427    
1428      case OP_ONCE:      case OP_ONCE:
1429      prev = ecode;      prev = ecode;
# Line 1027  for (;;) Line 1431  for (;;)
1431    
1432      do      do
1433        {        {
1434        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1435          eptrb, 0, RM7);        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1436        if (rrc == MATCH_MATCH) break;          {
1437        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          mstart = md->start_match_ptr;
1438            break;
1439            }
1440          if (rrc != MATCH_NOMATCH &&
1441              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1442            RRETURN(rrc);
1443        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1444        }        }
1445      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1073  for (;;) Line 1482  for (;;)
1482    
1483      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1484        {        {
1485        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
         RM8);  
1486        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1487        ecode = prev;        ecode = prev;
1488        flags = match_tail_recursed;        flags = 0;
1489        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1490        }        }
1491      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
# Line 1085  for (;;) Line 1493  for (;;)
1493        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1494        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1495        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1496        flags = match_tail_recursed;        flags = 0;
1497        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1498        }        }
1499      /* Control never gets here */      /* Control never gets here */
# Line 1097  for (;;) Line 1505  for (;;)
1505      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1506      break;      break;
1507    
1508      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1509      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1510      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1511      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1512      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1513    
1514      case OP_BRAZERO:      case OP_BRAZERO:
1515        {        {
# Line 1123  for (;;) Line 1531  for (;;)
1531        }        }
1532      break;      break;
1533    
1534        case OP_SKIPZERO:
1535          {
1536          next = ecode+1;
1537          do next += GET(next,1); while (*next == OP_ALT);
1538          ecode = next + 1 + LINK_SIZE;
1539          }
1540        break;
1541    
1542      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1543    
1544      case OP_KET:      case OP_KET:
# Line 1141  for (;;) Line 1557  for (;;)
1557        }        }
1558      else saved_eptr = NULL;      else saved_eptr = NULL;
1559    
1560      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or an atomic group, stop
1561      MATCH_MATCH, but record the current high water mark for use by positive      matching and return MATCH_MATCH, but record the current high water mark for
1562      assertions. Do this also for the "once" (atomic) groups. */      use by positive assertions. We also need to record the match start in case
1563        it was changed by \K. */
1564    
1565      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1566          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1151  for (;;) Line 1568  for (;;)
1568        {        {
1569        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1570        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1571        RRETURN(MATCH_MATCH);        md->start_match_ptr = mstart;
1572          MRRETURN(MATCH_MATCH);
1573        }        }
1574    
1575      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
# Line 1165  for (;;) Line 1583  for (;;)
1583        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1584        offset = number << 1;        offset = number << 1;
1585    
1586  #ifdef DEBUG  #ifdef PCRE_DEBUG
1587        printf("end bracket %d", number);        printf("end bracket %d", number);
1588        printf("\n");        printf("\n");
1589  #endif  #endif
# Line 1175  for (;;) Line 1593  for (;;)
1593          {          {
1594          md->offset_vector[offset] =          md->offset_vector[offset] =
1595            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1596          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1597          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1598          }          }
1599    
# Line 1187  for (;;) Line 1605  for (;;)
1605          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1606          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1607          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
         mstart = rec->save_start;  
1608          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1609            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1610            offset_top = rec->save_offset_top;
1611          ecode = rec->after_call;          ecode = rec->after_call;
1612          ims = original_ims;          ims = original_ims;
1613          break;          break;
# Line 1216  for (;;) Line 1634  for (;;)
1634    
1635      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1636      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1637      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1638        unlimited repeat of a group that can match an empty string. */
1639    
1640      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1641    
1642      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1643        {        {
1644        RMATCH(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
         RM12);  
1645        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1646          if (flags != 0)    /* Could match an empty string */
1647            {
1648            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1649            RRETURN(rrc);
1650            }
1651        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1652        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1653        }        }
1654      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
# Line 1234  for (;;) Line 1656  for (;;)
1656        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1657        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1658        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1659        flags = match_tail_recursed;        flags = 0;
1660        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1661        }        }
1662      /* Control never gets here */      /* Control never gets here */
# Line 1242  for (;;) Line 1664  for (;;)
1664      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1665    
1666      case OP_CIRC:      case OP_CIRC:
1667      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1668      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1669        {        {
1670        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1671            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1672          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1673        ecode++;        ecode++;
1674        break;        break;
1675        }        }
# Line 1256  for (;;) Line 1678  for (;;)
1678      /* Start of subject assertion */      /* Start of subject assertion */
1679    
1680      case OP_SOD:      case OP_SOD:
1681      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1682      ecode++;      ecode++;
1683      break;      break;
1684    
1685      /* Start of match assertion */      /* Start of match assertion */
1686    
1687      case OP_SOM:      case OP_SOM:
1688      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1689      ecode++;      ecode++;
1690      break;      break;
1691    
# Line 1281  for (;;) Line 1703  for (;;)
1703      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1704        {        {
1705        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1706          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1707        else        else
1708          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          {
1709            if (md->noteol) MRRETURN(MATCH_NOMATCH);
1710            SCHECK_PARTIAL();
1711            }
1712        ecode++;        ecode++;
1713        break;        break;
1714        }        }
1715      else      else  /* Not multiline */
1716        {        {
1717        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1718        if (!md->endonly)        if (!md->endonly) goto ASSERT_NL_OR_EOS;
         {  
         if (eptr != md->end_subject &&  
             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))  
           RRETURN(MATCH_NOMATCH);  
         ecode++;  
         break;  
         }  
1719        }        }
1720    
1721      /* ... else fall through for endonly */      /* ... else fall through for endonly */
1722    
1723      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1724    
1725      case OP_EOD:      case OP_EOD:
1726      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1727        SCHECK_PARTIAL();
1728      ecode++;      ecode++;
1729      break;      break;
1730    
1731      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1732    
1733      case OP_EODN:      case OP_EODN:
1734      if (eptr != md->end_subject &&      ASSERT_NL_OR_EOS:
1735        if (eptr < md->end_subject &&
1736          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1737        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1738    
1739        /* Either at end of string or \n before end. */
1740    
1741        SCHECK_PARTIAL();
1742      ecode++;      ecode++;
1743      break;      break;
1744    
# Line 1325  for (;;) Line 1750  for (;;)
1750    
1751        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1752        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1753        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1754          partial matching. */
1755    
1756  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1757        if (utf8)        if (utf8)
1758          {          {
1759            /* Get status of previous character */
1760    
1761          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1762            {            {
1763            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1764            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1765              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1766            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1767    #ifdef SUPPORT_UCP
1768              if (md->use_ucp)
1769                {
1770                if (c == '_') prev_is_word = TRUE; else
1771                  {
1772                  int cat = UCD_CATEGORY(c);
1773                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1774                  }
1775                }
1776              else
1777    #endif
1778            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1779            }            }
1780          if (eptr >= md->end_subject) cur_is_word = FALSE; else  
1781            /* Get status of next character */
1782    
1783            if (eptr >= md->end_subject)
1784              {
1785              SCHECK_PARTIAL();
1786              cur_is_word = FALSE;
1787              }
1788            else
1789            {            {
1790            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1791    #ifdef SUPPORT_UCP
1792              if (md->use_ucp)
1793                {
1794                if (c == '_') cur_is_word = TRUE; else
1795                  {
1796                  int cat = UCD_CATEGORY(c);
1797                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1798                  }
1799                }
1800              else
1801    #endif
1802            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1803            }            }
1804          }          }
1805        else        else
1806  #endif  #endif
1807    
1808        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1809          consistency with the behaviour of \w we do use it in this case. */
1810    
1811          {          {
1812          prev_is_word = (eptr != md->start_subject) &&          /* Get status of previous character */
1813            ((md->ctypes[eptr[-1]] & ctype_word) != 0);  
1814          cur_is_word = (eptr < md->end_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1815            ((md->ctypes[*eptr] & ctype_word) != 0);            {
1816              if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1817    #ifdef SUPPORT_UCP
1818              if (md->use_ucp)
1819                {
1820                c = eptr[-1];
1821                if (c == '_') prev_is_word = TRUE; else
1822                  {
1823                  int cat = UCD_CATEGORY(c);
1824                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1825                  }
1826                }
1827              else
1828    #endif
1829              prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1830              }
1831    
1832            /* Get status of next character */
1833    
1834            if (eptr >= md->end_subject)
1835              {
1836              SCHECK_PARTIAL();
1837              cur_is_word = FALSE;
1838              }
1839            else
1840    #ifdef SUPPORT_UCP
1841            if (md->use_ucp)
1842              {
1843              c = *eptr;
1844              if (c == '_') cur_is_word = TRUE; else
1845                {
1846                int cat = UCD_CATEGORY(c);
1847                cur_is_word = (cat == ucp_L || cat == ucp_N);
1848                }
1849              }
1850            else
1851    #endif
1852            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1853          }          }
1854    
1855        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1856    
1857        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1858             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1859          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1860        }        }
1861      break;      break;
1862    
1863      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1864    
1865      case OP_ANY:      case OP_ANY:
1866      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1867        /* Fall through */
1868    
1869        case OP_ALLANY:
1870        if (eptr++ >= md->end_subject)
1871        {        {
1872        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);        SCHECK_PARTIAL();
1873          MRRETURN(MATCH_NOMATCH);
1874        }        }
1875      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1876      ecode++;      ecode++;
1877      break;      break;
1878    
# Line 1380  for (;;) Line 1880  for (;;)
1880      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1881    
1882      case OP_ANYBYTE:      case OP_ANYBYTE:
1883      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1884          {
1885          SCHECK_PARTIAL();
1886          MRRETURN(MATCH_NOMATCH);
1887          }
1888      ecode++;      ecode++;
1889      break;      break;
1890    
1891      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1892      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1893          {
1894          SCHECK_PARTIAL();
1895          MRRETURN(MATCH_NOMATCH);
1896          }
1897      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1898      if (      if (
1899  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1393  for (;;) Line 1901  for (;;)
1901  #endif  #endif
1902         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1903         )         )
1904        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1905      ecode++;      ecode++;
1906      break;      break;
1907    
1908      case OP_DIGIT:      case OP_DIGIT:
1909      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1910          {
1911          SCHECK_PARTIAL();
1912          MRRETURN(MATCH_NOMATCH);
1913          }
1914      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1915      if (      if (
1916  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1406  for (;;) Line 1918  for (;;)
1918  #endif  #endif
1919         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1920         )         )
1921        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1922      ecode++;      ecode++;
1923      break;      break;
1924    
1925      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1926      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1927          {
1928          SCHECK_PARTIAL();
1929          MRRETURN(MATCH_NOMATCH);
1930          }
1931      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1932      if (      if (
1933  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1419  for (;;) Line 1935  for (;;)
1935  #endif  #endif
1936         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1937         )         )
1938        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1939      ecode++;      ecode++;
1940      break;      break;
1941    
1942      case OP_WHITESPACE:      case OP_WHITESPACE:
1943      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1944          {
1945          SCHECK_PARTIAL();
1946          MRRETURN(MATCH_NOMATCH);
1947          }
1948      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1949      if (      if (
1950  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1432  for (;;) Line 1952  for (;;)
1952  #endif  #endif
1953         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1954         )         )
1955        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1956      ecode++;      ecode++;
1957      break;      break;
1958    
1959      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1960      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1961          {
1962          SCHECK_PARTIAL();
1963          MRRETURN(MATCH_NOMATCH);
1964          }
1965      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1966      if (      if (
1967  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1445  for (;;) Line 1969  for (;;)
1969  #endif  #endif
1970         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1971         )         )
1972        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1973      ecode++;      ecode++;
1974      break;      break;
1975    
1976      case OP_WORDCHAR:      case OP_WORDCHAR:
1977      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1978          {
1979          SCHECK_PARTIAL();
1980          MRRETURN(MATCH_NOMATCH);
1981          }
1982      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1983      if (      if (
1984  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1458  for (;;) Line 1986  for (;;)
1986  #endif  #endif
1987         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1988         )         )
1989        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1990      ecode++;      ecode++;
1991      break;      break;
1992    
1993      case OP_ANYNL:      case OP_ANYNL:
1994      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1995          {
1996          SCHECK_PARTIAL();
1997          MRRETURN(MATCH_NOMATCH);
1998          }
1999      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2000      switch(c)      switch(c)
2001        {        {
2002        default: RRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
2003        case 0x000d:        case 0x000d:
2004        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2005        break;        break;
2006    
2007        case 0x000a:        case 0x000a:
2008          break;
2009    
2010        case 0x000b:        case 0x000b:
2011        case 0x000c:        case 0x000c:
2012        case 0x0085:        case 0x0085:
2013        case 0x2028:        case 0x2028:
2014        case 0x2029:        case 0x2029:
2015          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2016        break;        break;
2017        }        }
2018      ecode++;      ecode++;
2019      break;      break;
2020    
2021      case OP_NOT_HSPACE:      case OP_NOT_HSPACE:
2022      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2023          {
2024          SCHECK_PARTIAL();
2025          MRRETURN(MATCH_NOMATCH);
2026          }
2027      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2028      switch(c)      switch(c)
2029        {        {
# Line 1507  for (;;) Line 2047  for (;;)
2047        case 0x202f:    /* NARROW NO-BREAK SPACE */        case 0x202f:    /* NARROW NO-BREAK SPACE */
2048        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */        case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2049        case 0x3000:    /* IDEOGRAPHIC SPACE */        case 0x3000:    /* IDEOGRAPHIC SPACE */
2050        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2051        }        }
2052      ecode++;      ecode++;
2053      break;      break;
2054    
2055      case OP_HSPACE:      case OP_HSPACE:
2056      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2057          {
2058          SCHECK_PARTIAL();
2059          MRRETURN(MATCH_NOMATCH);
2060          }
2061      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2062      switch(c)      switch(c)
2063        {        {
2064        default: RRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
2065        case 0x09:      /* HT */        case 0x09:      /* HT */
2066        case 0x20:      /* SPACE */        case 0x20:      /* SPACE */
2067        case 0xa0:      /* NBSP */        case 0xa0:      /* NBSP */
# Line 1543  for (;;) Line 2087  for (;;)
2087      break;      break;
2088    
2089      case OP_NOT_VSPACE:      case OP_NOT_VSPACE:
2090      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2091          {
2092          SCHECK_PARTIAL();
2093          MRRETURN(MATCH_NOMATCH);
2094          }
2095      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2096      switch(c)      switch(c)
2097        {        {
# Line 1555  for (;;) Line 2103  for (;;)
2103        case 0x85:      /* NEL */        case 0x85:      /* NEL */
2104        case 0x2028:    /* LINE SEPARATOR */        case 0x2028:    /* LINE SEPARATOR */
2105        case 0x2029:    /* PARAGRAPH SEPARATOR */        case 0x2029:    /* PARAGRAPH SEPARATOR */
2106        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
2107        }        }
2108      ecode++;      ecode++;
2109      break;      break;
2110    
2111      case OP_VSPACE:      case OP_VSPACE:
2112      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2113          {
2114          SCHECK_PARTIAL();
2115          MRRETURN(MATCH_NOMATCH);
2116          }
2117      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2118      switch(c)      switch(c)
2119        {        {
2120        default: RRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
2121        case 0x0a:      /* LF */        case 0x0a:      /* LF */
2122        case 0x0b:      /* VT */        case 0x0b:      /* VT */
2123        case 0x0c:      /* FF */        case 0x0c:      /* FF */
# Line 1584  for (;;) Line 2136  for (;;)
2136    
2137      case OP_PROP:      case OP_PROP:
2138      case OP_NOTPROP:      case OP_NOTPROP:
2139      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2140          {
2141          SCHECK_PARTIAL();
2142          MRRETURN(MATCH_NOMATCH);
2143          }
2144      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2145        {        {
2146        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2147    
2148        switch(ecode[1])        switch(ecode[1])
2149          {          {
2150          case PT_ANY:          case PT_ANY:
2151          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2152          break;          break;
2153    
2154          case PT_LAMP:          case PT_LAMP:
2155          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2156               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2157               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2158            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2159           break;          break;
2160    
2161          case PT_GC:          case PT_GC:
2162          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2163            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2164          break;          break;
2165    
2166          case PT_PC:          case PT_PC:
2167          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2168            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2169          break;          break;
2170    
2171          case PT_SC:          case PT_SC:
2172          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2173            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2174          break;          break;
2175    
2176          default:          /* These are specials */
         RRETURN(PCRE_ERROR_INTERNAL);  
         }  
2177    
2178        ecode += 3;          case PT_ALNUM:
2179        }          if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2180      break;               _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2181              MRRETURN(MATCH_NOMATCH);
2182            break;
2183    
2184      /* Match an extended Unicode sequence. We will get here only if the support          case PT_SPACE:    /* Perl space */
2185            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2186                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2187                   == (op == OP_NOTPROP))
2188              MRRETURN(MATCH_NOMATCH);
2189            break;
2190    
2191            case PT_PXSPACE:  /* POSIX space */
2192            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2193                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2194                 c == CHAR_FF || c == CHAR_CR)
2195                   == (op == OP_NOTPROP))
2196              MRRETURN(MATCH_NOMATCH);
2197            break;
2198    
2199            case PT_WORD:
2200            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2201                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2202                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2203              MRRETURN(MATCH_NOMATCH);
2204            break;
2205    
2206            /* This should never occur */
2207    
2208            default:
2209            RRETURN(PCRE_ERROR_INTERNAL);
2210            }
2211    
2212          ecode += 3;
2213          }
2214        break;
2215    
2216        /* Match an extended Unicode sequence. We will get here only if the support
2217      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2218    
2219      case OP_EXTUNI:      case OP_EXTUNI:
2220      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2221          {
2222          SCHECK_PARTIAL();
2223          MRRETURN(MATCH_NOMATCH);
2224          }
2225      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2226        {        {
2227        int chartype, script;        int category = UCD_CATEGORY(c);
2228        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2229        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2230          {          {
2231          int len = 1;          int len = 1;
# Line 1643  for (;;) Line 2233  for (;;)
2233            {            {
2234            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2235            }            }
2236          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2237          if (category != ucp_M) break;          if (category != ucp_M) break;
2238          eptr += len;          eptr += len;
2239          }          }
# Line 1664  for (;;) Line 2254  for (;;)
2254      case OP_REF:      case OP_REF:
2255        {        {
2256        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2257        ecode += 3;                                 /* Advance past item */        ecode += 3;
2258    
2259          /* If the reference is unset, there are two possibilities:
2260    
2261        /* If the reference is unset, set the length to be longer than the amount        (a) In the default, Perl-compatible state, set the length to be longer
2262        of subject left; this ensures that every attempt at a match fails. We        than the amount of subject left; this ensures that every attempt at a
2263        can't just fail here, because of the possibility of quantifiers with zero        match fails. We can't just fail here, because of the possibility of
2264        minima. */        quantifiers with zero minima.
2265    
2266        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        (b) If the JavaScript compatibility flag is set, set the length to zero
2267          md->end_subject - eptr + 1 :        so that the back reference matches an empty string.
2268          md->offset_vector[offset+1] - md->offset_vector[offset];  
2269          Otherwise, set the length to the length of what was matched by the
2270          referenced subpattern. */
2271    
2272          if (offset >= offset_top || md->offset_vector[offset] < 0)
2273            length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2274          else
2275            length = md->offset_vector[offset+1] - md->offset_vector[offset];
2276    
2277        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2278    
# Line 1702  for (;;) Line 2301  for (;;)
2301          break;          break;
2302    
2303          default:               /* No repeat follows */          default:               /* No repeat follows */
2304          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2305              {
2306              CHECK_PARTIAL();
2307              MRRETURN(MATCH_NOMATCH);
2308              }
2309          eptr += length;          eptr += length;
2310          continue;              /* With the main loop */          continue;              /* With the main loop */
2311          }          }
# Line 1718  for (;;) Line 2321  for (;;)
2321    
2322        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2323          {          {
2324          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2325              {
2326              CHECK_PARTIAL();
2327              MRRETURN(MATCH_NOMATCH);
2328              }
2329          eptr += length;          eptr += length;
2330          }          }
2331    
# Line 1735  for (;;) Line 2342  for (;;)
2342            {            {
2343            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2344            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2345            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2346              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2347                {
2348                CHECK_PARTIAL();
2349                MRRETURN(MATCH_NOMATCH);
2350                }
2351            eptr += length;            eptr += length;
2352            }            }
2353          /* Control never gets here */          /* Control never gets here */
# Line 1749  for (;;) Line 2360  for (;;)
2360          pp = eptr;          pp = eptr;
2361          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2362            {            {
2363            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2364                {
2365                CHECK_PARTIAL();
2366                break;
2367                }
2368            eptr += length;            eptr += length;
2369            }            }
2370          while (eptr >= pp)          while (eptr >= pp)
# Line 1758  for (;;) Line 2373  for (;;)
2373            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2374            eptr -= length;            eptr -= length;
2375            }            }
2376          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2377          }          }
2378        }        }
2379      /* Control never gets here */      /* Control never gets here */
2380    
   
   
2381      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2382      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2383      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1819  for (;;) Line 2432  for (;;)
2432          {          {
2433          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2434            {            {
2435            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2436                {
2437                SCHECK_PARTIAL();
2438                MRRETURN(MATCH_NOMATCH);
2439                }
2440            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2441            if (c > 255)            if (c > 255)
2442              {              {
2443              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2444              }              }
2445            else            else
2446              {              {
2447              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2448              }              }
2449            }            }
2450          }          }
# Line 1837  for (;;) Line 2454  for (;;)
2454          {          {
2455          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2456            {            {
2457            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2458                {
2459                SCHECK_PARTIAL();
2460                MRRETURN(MATCH_NOMATCH);
2461                }
2462            c = *eptr++;            c = *eptr++;
2463            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2464            }            }
2465          }          }
2466    
# Line 1861  for (;;) Line 2482  for (;;)
2482              {              {
2483              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2484              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2485              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2486                if (eptr >= md->end_subject)
2487                  {
2488                  SCHECK_PARTIAL();
2489                  MRRETURN(MATCH_NOMATCH);
2490                  }
2491              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2492              if (c > 255)              if (c > 255)
2493                {                {
2494                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2495                }                }
2496              else              else
2497                {                {
2498                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2499                }                }
2500              }              }
2501            }            }
# Line 1881  for (;;) Line 2507  for (;;)
2507              {              {
2508              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2509              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2510              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2511                if (eptr >= md->end_subject)
2512                  {
2513                  SCHECK_PARTIAL();
2514                  MRRETURN(MATCH_NOMATCH);
2515                  }
2516              c = *eptr++;              c = *eptr++;
2517              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2518              }              }
2519            }            }
2520          /* Control never gets here */          /* Control never gets here */
# Line 1902  for (;;) Line 2533  for (;;)
2533            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2534              {              {
2535              int len = 1;              int len = 1;
2536              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2537                  {
2538                  SCHECK_PARTIAL();
2539                  break;
2540                  }
2541              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2542              if (c > 255)              if (c > 255)
2543                {                {
# Line 1928  for (;;) Line 2563  for (;;)
2563            {            {
2564            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2565              {              {
2566              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2567                  {
2568                  SCHECK_PARTIAL();
2569                  break;
2570                  }
2571              c = *eptr;              c = *eptr;
2572              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2573              eptr++;              eptr++;
# Line 1941  for (;;) Line 2580  for (;;)
2580              }              }
2581            }            }
2582    
2583          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2584          }          }
2585        }        }
2586      /* Control never gets here */      /* Control never gets here */
2587    
2588    
2589      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2590      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2591        mode, because Unicode properties are supported in non-UTF-8 mode. */
2592    
2593  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2594      case OP_XCLASS:      case OP_XCLASS:
# Line 1989  for (;;) Line 2629  for (;;)
2629    
2630        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2631          {          {
2632          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2633          GETCHARINC(c, eptr);            {
2634          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2635              MRRETURN(MATCH_NOMATCH);
2636              }
2637            GETCHARINCTEST(c, eptr);
2638            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2639          }          }
2640    
2641        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 2008  for (;;) Line 2652  for (;;)
2652            {            {
2653            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2654            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2655            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2656            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2657            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2658                SCHECK_PARTIAL();
2659                MRRETURN(MATCH_NOMATCH);
2660                }
2661              GETCHARINCTEST(c, eptr);
2662              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2663            }            }
2664          /* Control never gets here */          /* Control never gets here */
2665          }          }
# Line 2023  for (;;) Line 2672  for (;;)
2672          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2673            {            {
2674            int len = 1;            int len = 1;
2675            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2676            GETCHARLEN(c, eptr, len);              {
2677                SCHECK_PARTIAL();
2678                break;
2679                }
2680              GETCHARLENTEST(c, eptr, len);
2681            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2682            eptr += len;            eptr += len;
2683            }            }
# Line 2033  for (;;) Line 2686  for (;;)
2686            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2687            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2688            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2689            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2690            }            }
2691          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2692          }          }
2693    
2694        /* Control never gets here */        /* Control never gets here */
# Line 2051  for (;;) Line 2704  for (;;)
2704        length = 1;        length = 1;
2705        ecode++;        ecode++;
2706        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2707        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2708        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2709            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2710            MRRETURN(MATCH_NOMATCH);
2711            }
2712          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2713        }        }
2714      else      else
2715  #endif  #endif
2716    
2717      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2718        {        {
2719        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2720        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2721            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2722            MRRETURN(MATCH_NOMATCH);
2723            }
2724          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2725        ecode += 2;        ecode += 2;
2726        }        }
2727      break;      break;
# Line 2075  for (;;) Line 2736  for (;;)
2736        ecode++;        ecode++;
2737        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2738    
2739        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2740            {
2741            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2742            MRRETURN(MATCH_NOMATCH);
2743            }
2744    
2745        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2746        can use the fast lookup table. */        can use the fast lookup table. */
2747    
2748        if (fc < 128)        if (fc < 128)
2749          {          {
2750          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2751          }          }
2752    
2753        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
# Line 2099  for (;;) Line 2764  for (;;)
2764          if (fc != dc)          if (fc != dc)
2765            {            {
2766  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2767            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2768  #endif  #endif
2769              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2770            }            }
2771          }          }
2772        }        }
# Line 2110  for (;;) Line 2775  for (;;)
2775    
2776      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2777        {        {
2778        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2779        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2780            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2781            MRRETURN(MATCH_NOMATCH);
2782            }
2783          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2784        ecode += 2;        ecode += 2;
2785        }        }
2786      break;      break;
# Line 2164  for (;;) Line 2833  for (;;)
2833      case OP_MINQUERY:      case OP_MINQUERY:
2834      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2835      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2836    
2837      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2838      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2839      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2840    
2841      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2842    
2843      REPEATCHAR:      REPEATCHAR:
2844  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2179  for (;;) Line 2847  for (;;)
2847        length = 1;        length = 1;
2848        charptr = ecode;        charptr = ecode;
2849        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2850        ecode += length;        ecode += length;
2851    
2852        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2190  for (;;) Line 2857  for (;;)
2857  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2858          unsigned int othercase;          unsigned int othercase;
2859          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2860              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2861            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2862          else oclength = 0;          else oclength = 0;
2863  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2864    
2865          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2866            {            {
2867            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2868                memcmp(eptr, charptr, length) == 0) eptr += length;
2869  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2870            /* Need braces because of following else */            else if (oclength > 0 &&
2871            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                     eptr <= md->end_subject - oclength &&
2872                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2873    #endif  /* SUPPORT_UCP */
2874            else            else
2875              {              {
2876              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2877              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2878              }              }
 #else   /* without SUPPORT_UCP */  
           else { RRETURN(MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2879            }            }
2880    
2881          if (min == max) continue;          if (min == max) continue;
# Line 2219  for (;;) Line 2886  for (;;)
2886              {              {
2887              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2888              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2889              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2890              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2891                  memcmp(eptr, charptr, length) == 0) eptr += length;
2892  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2893              /* Need braces because of following else */              else if (oclength > 0 &&
2894              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                       eptr <= md->end_subject - oclength &&
2895                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2896    #endif  /* SUPPORT_UCP */
2897              else              else
2898                {                {
2899                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2900                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2901                }                }
 #else   /* without SUPPORT_UCP */  
             else { RRETURN (MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2902              }              }
2903            /* Control never gets here */            /* Control never gets here */
2904            }            }
# Line 2241  for (;;) Line 2908  for (;;)
2908            pp = eptr;            pp = eptr;
2909            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2910              {              {
2911              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2912              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2913  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2914              else if (oclength == 0) break;              else if (oclength > 0 &&
2915                         eptr <= md->end_subject - oclength &&
2916                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2917    #endif  /* SUPPORT_UCP */
2918              else              else
2919                {                {
2920                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2921                eptr += oclength;                break;
2922                }                }
 #else   /* without SUPPORT_UCP */  
             else break;  
 #endif  /* SUPPORT_UCP */  
2923              }              }
2924    
2925            if (possessive) continue;            if (possessive) continue;
2926    
2927            for(;;)            for(;;)
2928             {              {
2929             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2930             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2931             if (eptr == pp) RRETURN(MATCH_NOMATCH);              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2932  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2933             eptr--;              eptr--;
2934             BACKCHAR(eptr);              BACKCHAR(eptr);
2935  #else   /* without SUPPORT_UCP */  #else   /* without SUPPORT_UCP */
2936             eptr -= length;              eptr -= length;
2937  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2938             }              }
2939            }            }
2940          /* Control never gets here */          /* Control never gets here */
2941          }          }
# Line 2280  for (;;) Line 2948  for (;;)
2948  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2949    
2950      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2951        {  
2952        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2953    
2954      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2955      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2301  for (;;) Line 2967  for (;;)
2967        {        {
2968        fc = md->lcc[fc];        fc = md->lcc[fc];
2969        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2970          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2971            if (eptr >= md->end_subject)
2972              {
2973              SCHECK_PARTIAL();
2974              MRRETURN(MATCH_NOMATCH);
2975              }
2976            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2977            }
2978        if (min == max) continue;        if (min == max) continue;
2979        if (minimize)        if (minimize)
2980          {          {
# Line 2309  for (;;) Line 2982  for (;;)
2982            {            {
2983            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2984            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2985            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2986                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2987              RRETURN(MATCH_NOMATCH);              {
2988                SCHECK_PARTIAL();
2989                MRRETURN(MATCH_NOMATCH);
2990                }
2991              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2992            }            }
2993          /* Control never gets here */          /* Control never gets here */
2994          }          }
# Line 2320  for (;;) Line 2997  for (;;)
2997          pp = eptr;          pp = eptr;
2998          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2999            {            {
3000            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
3001                {
3002                SCHECK_PARTIAL();
3003                break;
3004                }
3005              if (fc != md->lcc[*eptr]) break;
3006            eptr++;            eptr++;
3007            }            }
3008    
3009          if (possessive) continue;          if (possessive) continue;
3010    
3011          while (eptr >= pp)          while (eptr >= pp)
3012            {            {
3013            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3014            eptr--;            eptr--;
3015            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3016            }            }
3017          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3018          }          }
3019        /* Control never gets here */        /* Control never gets here */
3020        }        }
# Line 2339  for (;;) Line 3023  for (;;)
3023    
3024      else      else
3025        {        {
3026        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
3027            {
3028            if (eptr >= md->end_subject)
3029              {
3030              SCHECK_PARTIAL();
3031              MRRETURN(MATCH_NOMATCH);
3032              }
3033            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3034            }
3035    
3036        if (min == max) continue;        if (min == max) continue;
3037    
3038        if (minimize)        if (minimize)
3039          {          {
3040          for (fi = min;; fi++)          for (fi = min;; fi++)
3041            {            {
3042            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3043            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3044            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3045              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3046                {
3047                SCHECK_PARTIAL();
3048                MRRETURN(MATCH_NOMATCH);
3049                }
3050              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3051            }            }
3052          /* Control never gets here */          /* Control never gets here */
3053          }          }
# Line 2357  for (;;) Line 3056  for (;;)
3056          pp = eptr;          pp = eptr;
3057          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3058            {            {
3059            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
3060                {
3061                SCHECK_PARTIAL();
3062                break;
3063                }
3064              if (fc != *eptr) break;
3065            eptr++;            eptr++;
3066            }            }
3067          if (possessive) continue;          if (possessive) continue;
3068    
3069          while (eptr >= pp)          while (eptr >= pp)
3070            {            {
3071            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3072            eptr--;            eptr--;
3073            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3074            }            }
3075          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3076          }          }
3077        }        }
3078      /* Control never gets here */      /* Control never gets here */
# Line 2376  for (;;) Line 3081  for (;;)
3081      checking can be multibyte. */      checking can be multibyte. */
3082    
3083      case OP_NOT:      case OP_NOT:
3084      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
3085          {
3086          SCHECK_PARTIAL();
3087          MRRETURN(MATCH_NOMATCH);
3088          }
3089      ecode++;      ecode++;
3090      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3091      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2385  for (;;) Line 3094  for (;;)
3094        if (c < 256)        if (c < 256)
3095  #endif  #endif
3096        c = md->lcc[c];        c = md->lcc[c];
3097        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3098        }        }
3099      else      else
3100        {        {
3101        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3102        }        }
3103      break;      break;
3104    
# Line 2453  for (;;) Line 3162  for (;;)
3162      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3163      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3164    
3165      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3166    
3167      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3168      fc = *ecode++;      fc = *ecode++;
3169    
3170      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2483  for (;;) Line 3189  for (;;)
3189          register unsigned int d;          register unsigned int d;
3190          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3191            {            {
3192              if (eptr >= md->end_subject)
3193                {
3194                SCHECK_PARTIAL();
3195                MRRETURN(MATCH_NOMATCH);
3196                }
3197            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3198            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3199            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3200            }            }
3201          }          }
3202        else        else
# Line 2494  for (;;) Line 3205  for (;;)
3205        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3206          {          {
3207          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3208            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3209              if (eptr >= md->end_subject)
3210                {
3211                SCHECK_PARTIAL();
3212                MRRETURN(MATCH_NOMATCH);
3213                }
3214              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3215              }
3216          }          }
3217    
3218        if (min == max) continue;        if (min == max) continue;
# Line 2510  for (;;) Line 3228  for (;;)
3228              {              {
3229              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3230              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3231                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3232                if (eptr >= md->end_subject)
3233                  {
3234                  SCHECK_PARTIAL();
3235                  MRRETURN(MATCH_NOMATCH);
3236                  }
3237              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3238              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3239              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3240              }              }
3241            }            }
3242          else          else
# Line 2524  for (;;) Line 3247  for (;;)
3247              {              {
3248              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3249              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3250              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3251                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3252                  {
3253                  SCHECK_PARTIAL();
3254                  MRRETURN(MATCH_NOMATCH);
3255                  }
3256                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3257              }              }
3258            }            }
3259          /* Control never gets here */          /* Control never gets here */
# Line 2545  for (;;) Line 3273  for (;;)
3273            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3274              {              {
3275              int len = 1;              int len = 1;
3276              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3277                  {
3278                  SCHECK_PARTIAL();
3279                  break;
3280                  }
3281              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3282              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3283              if (fc == d) break;              if (fc == d) break;
# Line 2566  for (;;) Line 3298  for (;;)
3298            {            {
3299            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3300              {              {
3301              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3302                  {
3303                  SCHECK_PARTIAL();
3304                  break;
3305                  }
3306                if (fc == md->lcc[*eptr]) break;
3307              eptr++;              eptr++;
3308              }              }
3309            if (possessive) continue;            if (possessive) continue;
# Line 2578  for (;;) Line 3315  for (;;)
3315              }              }
3316            }            }
3317    
3318          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3319          }          }
3320        /* Control never gets here */        /* Control never gets here */
3321        }        }
# Line 2594  for (;;) Line 3331  for (;;)
3331          register unsigned int d;          register unsigned int d;
3332          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3333            {            {
3334              if (eptr >= md->end_subject)
3335                {
3336                SCHECK_PARTIAL();
3337                MRRETURN(MATCH_NOMATCH);
3338                }
3339            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3340            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3341            }            }
3342          }          }
3343        else        else
# Line 2603  for (;;) Line 3345  for (;;)
3345        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3346          {          {
3347          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3348            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3349              if (eptr >= md->end_subject)
3350                {
3351                SCHECK_PARTIAL();
3352                MRRETURN(MATCH_NOMATCH);
3353                }
3354              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3355              }
3356          }          }
3357    
3358        if (min == max) continue;        if (min == max) continue;
# Line 2619  for (;;) Line 3368  for (;;)
3368              {              {
3369              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3370              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3371                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3372                if (eptr >= md->end_subject)
3373                  {
3374                  SCHECK_PARTIAL();
3375                  MRRETURN(MATCH_NOMATCH);
3376                  }
3377              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3378              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3379              }              }
3380            }            }
3381          else          else
# Line 2632  for (;;) Line 3386  for (;;)
3386              {              {
3387              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3388              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3389              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3390                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3391                  {
3392                  SCHECK_PARTIAL();
3393                  MRRETURN(MATCH_NOMATCH);
3394                  }
3395                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3396              }              }
3397            }            }
3398          /* Control never gets here */          /* Control never gets here */
# Line 2653  for (;;) Line 3412  for (;;)
3412            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3413              {              {
3414              int len = 1;              int len = 1;
3415              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3416                  {
3417                  SCHECK_PARTIAL();
3418                  break;
3419                  }
3420              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3421              if (fc == d) break;              if (fc == d) break;
3422              eptr += len;              eptr += len;
# Line 2673  for (;;) Line 3436  for (;;)
3436            {            {
3437            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3438              {              {
3439              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3440                  {
3441                  SCHECK_PARTIAL();
3442                  break;
3443                  }
3444                if (fc == *eptr) break;
3445              eptr++;              eptr++;
3446              }              }
3447            if (possessive) continue;            if (possessive) continue;
# Line 2685  for (;;) Line 3453  for (;;)
3453              }              }
3454            }            }
3455    
3456          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3457          }          }
3458        }        }
3459      /* Control never gets here */      /* Control never gets here */
# Line 2767  for (;;) Line 3535  for (;;)
3535    
3536      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3537      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3538      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3539      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3540      and single-bytes. */      and single-bytes. */
3541    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3542      if (min > 0)      if (min > 0)
3543        {        {
3544  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2782  for (;;) Line 3547  for (;;)
3547          switch(prop_type)          switch(prop_type)
3548            {            {
3549            case PT_ANY:            case PT_ANY:
3550            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3551            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3552              {              {
3553              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3554                  {
3555                  SCHECK_PARTIAL();
3556                  MRRETURN(MATCH_NOMATCH);
3557                  }
3558              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3559              }              }
3560            break;            break;
# Line 2793  for (;;) Line 3562  for (;;)
3562            case PT_LAMP:            case PT_LAMP:
3563            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3564              {              {
3565              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3566                  {
3567                  SCHECK_PARTIAL();
3568                  MRRETURN(MATCH_NOMATCH);
3569                  }
3570              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3571              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3572              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3573                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3574                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3575                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3576              }              }
3577            break;            break;
3578    
3579            case PT_GC:            case PT_GC:
3580            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3581              {              {
3582              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3583                  {
3584                  SCHECK_PARTIAL();
3585                  MRRETURN(MATCH_NOMATCH);
3586                  }
3587              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3588              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3589              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3590                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3591              }              }
3592            break;            break;
3593    
3594            case PT_PC:            case PT_PC:
3595            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3596              {              {
3597              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3598                  {
3599                  SCHECK_PARTIAL();
3600                  MRRETURN(MATCH_NOMATCH);
3601                  }
3602              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3603              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3604              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3605                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3606              }              }
3607            break;            break;
3608    
3609            case PT_SC:            case PT_SC:
3610            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3611              {              {
3612              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3613                  {
3614                  SCHECK_PARTIAL();
3615                  MRRETURN(MATCH_NOMATCH);
3616                  }
3617              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
3618              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3619              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3620                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3621                }
3622              break;
3623    
3624              case PT_ALNUM:
3625              for (i = 1; i <= min; i++)
3626                {
3627                if (eptr >= md->end_subject)
3628                  {
3629                  SCHECK_PARTIAL();
3630                  MRRETURN(MATCH_NOMATCH);
3631                  }
3632                GETCHARINCTEST(c, eptr);
3633                prop_category = UCD_CATEGORY(c);
3634                if ((prop_category == ucp_L || prop_category == ucp_N)
3635                       == prop_fail_result)
3636                  MRRETURN(MATCH_NOMATCH);
3637                }
3638              break;
3639    
3640              case PT_SPACE:    /* Perl space */
3641              for (i = 1; i <= min; i++)
3642                {
3643                if (eptr >= md->end_subject)
3644                  {
3645                  SCHECK_PARTIAL();
3646                  MRRETURN(MATCH_NOMATCH);
3647                  }
3648                GETCHARINCTEST(c, eptr);
3649                prop_category = UCD_CATEGORY(c);
3650                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3651                     c == CHAR_FF || c == CHAR_CR)
3652                       == prop_fail_result)
3653                  MRRETURN(MATCH_NOMATCH);
3654                }
3655              break;
3656    
3657              case PT_PXSPACE:  /* POSIX space */
3658              for (i = 1; i <= min; i++)
3659                {
3660                if (eptr >= md->end_subject)
3661                  {
3662                  SCHECK_PARTIAL();
3663                  MRRETURN(MATCH_NOMATCH);
3664                  }
3665                GETCHARINCTEST(c, eptr);
3666                prop_category = UCD_CATEGORY(c);
3667                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3668                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3669                       == prop_fail_result)
3670                  MRRETURN(MATCH_NOMATCH);
3671                }
3672              break;
3673    
3674              case PT_WORD:
3675              for (i = 1; i <= min; i++)
3676                {
3677                if (eptr >= md->end_subject)
3678                  {
3679                  SCHECK_PARTIAL();
3680                  MRRETURN(MATCH_NOMATCH);
3681                  }
3682                GETCHARINCTEST(c, eptr);
3683                prop_category = UCD_CATEGORY(c);
3684                if ((prop_category == ucp_L || prop_category == ucp_N ||
3685                     c == CHAR_UNDERSCORE)
3686                       == prop_fail_result)
3687                  MRRETURN(MATCH_NOMATCH);
3688              }              }
3689            break;            break;
3690    
3691              /* This should not occur */
3692    
3693            default:            default:
3694            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
3695            }            }
# Line 2848  for (;;) Line 3702  for (;;)
3702          {          {
3703          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3704            {            {
3705              if (eptr >= md->end_subject)
3706                {
3707                SCHECK_PARTIAL();
3708                MRRETURN(MATCH_NOMATCH);
3709                }
3710            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3711            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3712            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3713            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3714              {              {
3715              int len = 1;              int len = 1;
3716              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3717                {                else { GETCHARLEN(c, eptr, len); }
3718                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3719              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3720              eptr += len;              eptr += len;
3721              }              }
# Line 2876  for (;;) Line 3733  for (;;)
3733          case OP_ANY:          case OP_ANY:
3734          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3735            {            {
3736            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3737                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))              {
3738              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3739                MRRETURN(MATCH_NOMATCH);
3740                }
3741              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3742              eptr++;
3743              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3744              }
3745            break;
3746    
3747            case OP_ALLANY:
3748            for (i = 1; i <= min; i++)
3749              {
3750              if (eptr >= md->end_subject)
3751                {
3752                SCHECK_PARTIAL();
3753                MRRETURN(MATCH_NOMATCH);
3754                }
3755            eptr++;            eptr++;
3756            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3757            }            }
3758          break;          break;
3759    
3760          case OP_ANYBYTE:          case OP_ANYBYTE:
3761            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3762          eptr += min;          eptr += min;
3763          break;          break;
3764    
3765          case OP_ANYNL:          case OP_ANYNL:
3766          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3767            {            {
3768            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3769                {
3770                SCHECK_PARTIAL();
3771                MRRETURN(MATCH_NOMATCH);
3772                }
3773            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3774            switch(c)            switch(c)
3775              {              {
3776              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3777              case 0x000d:              case 0x000d:
3778              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3779              break;              break;
3780    
3781              case 0x000a:              case 0x000a:
3782                break;
3783    
3784              case 0x000b:              case 0x000b:
3785              case 0x000c:              case 0x000c:
3786              case 0x0085:              case 0x0085:
3787              case 0x2028:              case 0x2028:
3788              case 0x2029:              case 0x2029:
3789                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3790              break;              break;
3791              }              }
3792            }            }
# Line 2913  for (;;) Line 3795  for (;;)
3795          case OP_NOT_HSPACE:          case OP_NOT_HSPACE:
3796          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3797            {            {
3798            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3799                {
3800                SCHECK_PARTIAL();
3801                MRRETURN(MATCH_NOMATCH);
3802                }
3803            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3804            switch(c)            switch(c)
3805              {              {
# Line 2937  for (;;) Line 3823  for (;;)
3823              case 0x202f:    /* NARROW NO-BREAK SPACE */              case 0x202f:    /* NARROW NO-BREAK SPACE */
3824              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3825              case 0x3000:    /* IDEOGRAPHIC SPACE */              case 0x3000:    /* IDEOGRAPHIC SPACE */
3826              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3827              }              }
3828            }            }
3829          break;          break;
# Line 2945  for (;;) Line 3831  for (;;)
3831          case OP_HSPACE:          case OP_HSPACE:
3832          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3833            {            {
3834            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3835                {
3836                SCHECK_PARTIAL();
3837                MRRETURN(MATCH_NOMATCH);
3838                }
3839            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3840            switch(c)            switch(c)
3841              {              {
3842              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3843              case 0x09:      /* HT */              case 0x09:      /* HT */
3844              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
3845              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
# Line 2977  for (;;) Line 3867  for (;;)
3867          case OP_NOT_VSPACE:          case OP_NOT_VSPACE:
3868          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3869            {            {
3870            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3871                {
3872                SCHECK_PARTIAL();
3873                MRRETURN(MATCH_NOMATCH);
3874                }
3875            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3876            switch(c)            switch(c)
3877              {              {
# Line 2989  for (;;) Line 3883  for (;;)
3883              case 0x85:      /* NEL */              case 0x85:      /* NEL */
3884              case 0x2028:    /* LINE SEPARATOR */              case 0x2028:    /* LINE SEPARATOR */
3885              case 0x2029:    /* PARAGRAPH SEPARATOR */              case 0x2029:    /* PARAGRAPH SEPARATOR */
3886              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3887              }              }
3888            }            }
3889          break;          break;
# Line 2997  for (;;) Line 3891  for (;;)
3891          case OP_VSPACE:          case OP_VSPACE:
3892          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3893            {            {
3894            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3895                {
3896                SCHECK_PARTIAL();
3897                MRRETURN(MATCH_NOMATCH);
3898                }
3899            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3900            switch(c)            switch(c)
3901              {              {
3902              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3903              case 0x0a:      /* LF */              case 0x0a:      /* LF */
3904              case 0x0b:      /* VT */              case 0x0b:      /* VT */
3905              case 0x0c:      /* FF */              case 0x0c:      /* FF */
# Line 3017  for (;;) Line 3915  for (;;)
3915          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3916          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3917            {            {
3918            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3919                {
3920                SCHECK_PARTIAL();
3921                MRRETURN(MATCH_NOMATCH);
3922                }
3923            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3924            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3925              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3926            }            }
3927          break;          break;
3928    
3929          case OP_DIGIT:          case OP_DIGIT:
3930          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3931            {            {
3932            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3933               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3934              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3935                MRRETURN(MATCH_NOMATCH);
3936                }
3937              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3938                MRRETURN(MATCH_NOMATCH);
3939            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3940            }            }
3941          break;          break;
# Line 3037  for (;;) Line 3943  for (;;)
3943          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3944          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3945            {            {
3946            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3947               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3948              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3949            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3950                }
3951              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3952                MRRETURN(MATCH_NOMATCH);
3953              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3954            }            }
3955          break;          break;
3956    
3957          case OP_WHITESPACE:          case OP_WHITESPACE:
3958          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3959            {            {
3960            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3961               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3962              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3963                MRRETURN(MATCH_NOMATCH);
3964                }
3965              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3966                MRRETURN(MATCH_NOMATCH);
3967            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3968            }            }
3969          break;          break;
# Line 3057  for (;;) Line 3971  for (;;)
3971          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3972          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3973            {            {
3974            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3975               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))              {
3976              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3977            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3978                }
3979              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3980                MRRETURN(MATCH_NOMATCH);
3981              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3982            }            }
3983          break;          break;
3984    
3985          case OP_WORDCHAR:          case OP_WORDCHAR:
3986          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3987            {            {
3988            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3989               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3990              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3991                MRRETURN(MATCH_NOMATCH);
3992                }
3993              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3994                MRRETURN(MATCH_NOMATCH);
3995            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3996            }            }
3997          break;          break;
# Line 3082  for (;;) Line 4004  for (;;)
4004  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
4005    
4006        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
4007        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
4008    
4009        switch(ctype)        switch(ctype)
4010          {          {
4011          case OP_ANY:          case OP_ANY:
4012          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
4013            {            {
4014            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
4015              {              {
4016              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
4017              eptr++;              MRRETURN(MATCH_NOMATCH);
4018              }              }
4019              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
4020              eptr++;
4021            }            }
         else eptr += min;  
4022          break;          break;
4023    
4024          case OP_ANYBYTE:          case OP_ALLANY:
4025            if (eptr > md->end_subject - min)
4026              {
4027              SCHECK_PARTIAL();
4028              MRRETURN(MATCH_NOMATCH);
4029              }
4030          eptr += min;          eptr += min;
4031          break;          break;
4032    
4033          /* Because of the CRLF case, we can't assume the minimum number of          case OP_ANYBYTE:
4034          bytes are present in this case. */          if (eptr > md->end_subject - min)
4035              {
4036              SCHECK_PARTIAL();
4037              MRRETURN(MATCH_NOMATCH);
4038              }
4039            eptr += min;
4040            break;
4041    
4042          case OP_ANYNL:          case OP_ANYNL:
4043          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4044            {            {
4045            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
4046                {
4047                SCHECK_PARTIAL();
4048                MRRETURN(MATCH_NOMATCH);
4049                }
4050            switch(*eptr++)            switch(*eptr++)
4051              {              {
4052              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4053              case 0x000d:              case 0x000d:
4054              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4055              break;              break;
4056              case 0x000a:              case 0x000a:
4057                break;
4058    
4059              case 0x000b:              case 0x000b:
4060              case 0x000c:              case 0x000c:
4061              case 0x0085:              case 0x0085:
4062                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4063              break;              break;
4064              }              }
4065            }            }
# Line 3128  for (;;) Line 4068  for (;;)
4068          case OP_NOT_HSPACE:          case OP_NOT_HSPACE:
4069          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4070            {            {
4071            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
4072                {
4073                SCHECK_PARTIAL();
4074                MRRETURN(MATCH_NOMATCH);
4075                }
4076            switch(*eptr++)            switch(*eptr++)
4077              {              {
4078              default: break;              default: break;
4079              case 0x09:      /* HT */              case 0x09:      /* HT */
4080              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4081              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
4082              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4083              }              }
4084            }            }
4085          break;          break;
# Line 3143  for (;;) Line 4087  for (;;)
4087          case OP_HSPACE:          case OP_HSPACE:
4088          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4089            {            {
4090            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
4091                {
4092                SCHECK_PARTIAL();
4093                MRRETURN(MATCH_NOMATCH);
4094                }
4095            switch(*eptr++)            switch(*eptr++)
4096              {              {
4097              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4098              case 0x09:      /* HT */              case 0x09:      /* HT */
4099              case 0x20:      /* SPACE */              case 0x20:      /* SPACE */
4100              case 0xa0:      /* NBSP */              case 0xa0:      /* NBSP */
# Line 3158  for (;;) Line 4106  for (;;)
4106          case OP_NOT_VSPACE:          case OP_NOT_VSPACE:
4107          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4108            {            {
4109            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
4110                {
4111                SCHECK_PARTIAL();
4112                MRRETURN(MATCH_NOMATCH);
4113                }
4114            switch(*eptr++)            switch(*eptr++)
4115              {              {
4116              default: break;              default: break;
# Line 3167  for (;;) Line 4119  for (;;)
4119              case 0x0c:      /* FF */              case 0x0c:      /* FF */
4120              case 0x0d:      /* CR */              case 0x0d:      /* CR */
4121              case 0x85:      /* NEL */              case 0x85:      /* NEL */
4122              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4123              }              }
4124            }            }
4125          break;          break;
# Line 3175  for (;;) Line 4127  for (;;)
4127          case OP_VSPACE:          case OP_VSPACE:
4128          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4129            {            {
4130            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
4131                {
4132                SCHECK_PARTIAL();
4133                MRRETURN(MATCH_NOMATCH);
4134                }
4135            switch(*eptr++)            switch(*eptr++)
4136              {              {
4137              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4138              case 0x0a:      /* LF */              case 0x0a:      /* LF */
4139              case 0x0b:      /* VT */              case 0x0b:      /* VT */
4140              case 0x0c:      /* FF */              case 0x0c:      /* FF */
# Line 3191  for (;;) Line 4147  for (;;)
4147    
4148          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
4149          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4150            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            {
4151              if (eptr >= md->end_subject)
4152                {
4153                SCHECK_PARTIAL();
4154                MRRETURN(MATCH_NOMATCH);
4155                }
4156              if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4157              }
4158          break;          break;
4159    
4160          case OP_DIGIT:          case OP_DIGIT:
4161          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4162            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            {
4163              if (eptr >= md->end_subject)
4164                {
4165                SCHECK_PARTIAL();
4166                MRRETURN(MATCH_NOMATCH);
4167                }
4168              if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4169              }
4170          break;          break;
4171    
4172          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4173          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4174            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            {
4175              if (eptr >= md->end_subject)
4176                {
4177                SCHECK_PARTIAL();
4178                MRRETURN(MATCH_NOMATCH);
4179                }
4180              if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4181              }
4182          break;          break;
4183    
4184          case OP_WHITESPACE:          case OP_WHITESPACE:
4185          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4186            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            {
4187              if (eptr >= md->end_subject)
4188                {
4189                SCHECK_PARTIAL();
4190                MRRETURN(MATCH_NOMATCH);
4191                }
4192              if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4193              }
4194          break;          break;
4195    
4196          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4197          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4198              {
4199              if (eptr >= md->end_subject)
4200                {
4201                SCHECK_PARTIAL();
4202                MRRETURN(MATCH_NOMATCH);
4203                }
4204            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
4205              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4206              }
4207          break;          break;
4208    
4209          case OP_WORDCHAR:          case OP_WORDCHAR:
4210          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4211              {
4212              if (eptr >= md->end_subject)
4213                {
4214                SCHECK_PARTIAL();
4215                MRRETURN(MATCH_NOMATCH);
4216                }
4217            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
4218              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4219              }
4220          break;          break;
4221    
4222          default:          default:
# Line 3246  for (;;) Line 4244  for (;;)
4244              {              {
4245              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
4246              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4247              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4248              GETCHARINC(c, eptr);              if (eptr >= md->end_subject)
4249              if (prop_fail_result) RRETURN(MATCH_NOMATCH);                {
4250                  SCHECK_PARTIAL();
4251                  MRRETURN(MATCH_NOMATCH);
4252                  }
4253                GETCHARINCTEST(c, eptr);
4254                if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4255              }              }
4256            /* Control never gets here */            /* Control never gets here */
4257    
# Line 3257  for (;;) Line 4260  for (;;)
4260              {              {
4261              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
4262              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4263              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4264              GETCHARINC(c, eptr);              if (eptr >= md->end_subject)
4265              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                {
4266                  SCHECK_PARTIAL();
4267                  MRRETURN(MATCH_NOMATCH);
4268                  }
4269                GETCHARINCTEST(c, eptr);
4270                prop_chartype = UCD_CHARTYPE(c);
4271              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4272                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
4273                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
4274                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4275              }              }
4276            /* Control never gets here */            /* Control never gets here */
4277    
# Line 3272  for (;;) Line 4280  for (;;)
4280              {              {
4281              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4282              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4283              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4284              GETCHARINC(c, eptr);              if (eptr >= md->end_subject)
4285              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                {
4286                  SCHECK_PARTIAL();
4287                  MRRETURN(MATCH_NOMATCH);
4288                  }
4289                GETCHARINCTEST(c, eptr);
4290                prop_category = UCD_CATEGORY(c);
4291              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
4292                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4293              }              }
4294            /* Control never gets here */            /* Control never gets here */
4295    
# Line 3285  for (;;) Line 4298  for (;;)
4298              {              {
4299              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
4300              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4301              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4302              GETCHARINC(c, eptr);              if (eptr >= md->end_subject)
4303              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                {
4304                  SCHECK_PARTIAL();
4305                  MRRETURN(MATCH_NOMATCH);
4306                  }
4307                GETCHARINCTEST(c, eptr);
4308                prop_chartype = UCD_CHARTYPE(c);
4309              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
4310                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4311              }              }
4312            /* Control never gets here */            /* Control never gets here */
4313    
# Line 3298  for (;;) Line 4316  for (;;)
4316              {              {
4317              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
4318              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4319              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4320              GETCHARINC(c, eptr);              if (eptr >= md->end_subject)
4321              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                {
4322                  SCHECK_PARTIAL();
4323                  MRRETURN(MATCH_NOMATCH);
4324                  }
4325                GETCHARINCTEST(c, eptr);
4326                prop_script = UCD_SCRIPT(c);
4327              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
4328                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4329              }              }
4330            /* Control never gets here */            /* Control never gets here */
4331    
4332              case PT_ALNUM:
4333              for (fi = min;; fi++)
4334                {
4335                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM59);
4336                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4337                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4338                if (eptr >= md->end_subject)
4339                  {
4340                  SCHECK_PARTIAL();
4341                  MRRETURN(MATCH_NOMATCH);
4342                  }
4343                GETCHARINCTEST(c, eptr);
4344                prop_category = UCD_CATEGORY(c);
4345                if ((prop_category == ucp_L || prop_category == ucp_N)
4346                       == prop_fail_result)
4347                  MRRETURN(MATCH_NOMATCH);
4348                }
4349              /* Control never gets here */
4350    
4351              case PT_SPACE:    /* Perl space */
4352              for (fi = min;; fi++)
4353                {
4354                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM60);
4355                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4356                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4357                if (eptr >= md->end_subject)
4358                  {
4359                  SCHECK_PARTIAL();
4360                  MRRETURN(MATCH_NOMATCH);
4361                  }
4362                GETCHARINCTEST(c, eptr);
4363                prop_category = UCD_CATEGORY(c);
4364                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4365                     c == CHAR_FF || c == CHAR_CR)
4366                       == prop_fail_result)
4367                  MRRETURN(MATCH_NOMATCH);
4368                }
4369              /* Control never gets here */
4370    
4371              case PT_PXSPACE:  /* POSIX space */
4372              for (fi = min;; fi++)
4373                {
4374                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM61);
4375                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4376                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4377                if (eptr >= md->end_subject)
4378                  {
4379                  SCHECK_PARTIAL();
4380                  MRRETURN(MATCH_NOMATCH);
4381                  }
4382                GETCHARINCTEST(c, eptr);
4383                prop_category = UCD_CATEGORY(c);
4384                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
4385                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
4386                       == prop_fail_result)
4387                  MRRETURN(MATCH_NOMATCH);
4388                }
4389              /* Control never gets here */
4390    
4391              case PT_WORD:
4392              for (fi = min;; fi++)
4393                {
4394                RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM62);
4395                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4396                if (fi >= max) MRRETURN(MATCH_NOMATCH);
4397                if (eptr >= md->end_subject)
4398                  {
4399                  SCHECK_PARTIAL();
4400                  MRRETURN(MATCH_NOMATCH);
4401                  }
4402                GETCHARINCTEST(c, eptr);
4403                prop_category = UCD_CATEGORY(c);
4404                if ((prop_category == ucp_L ||
4405                     prop_category == ucp_N ||
4406                     c == CHAR_UNDERSCORE)
4407                       == prop_fail_result)
4408                  MRRETURN(MATCH_NOMATCH);
4409                }
4410              /* Control never gets here */
4411    
4412              /* This should never occur */
4413    
4414            default:            default:
4415            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
4416            }            }
# Line 3320  for (;;) Line 4425  for (;;)
4425            {            {
4426            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
4427            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4428            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4429              if (eptr >= md->end_subject)
4430                {
4431                SCHECK_PARTIAL();
4432                MRRETURN(MATCH_NOMATCH);
4433                }
4434            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
4435            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
4436            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
4437            while (eptr < md->end_subject)            while (eptr < md->end_subject)
4438              {              {
4439              int len = 1;              int len = 1;
4440              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
4441                {                else { GETCHARLEN(c, eptr, len); }
4442                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
4443              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
4444              eptr += len;              eptr += len;
4445              }              }
# Line 3349  for (;;) Line 4457  for (;;)
4457            {            {
4458            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
4459            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4460            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4461                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&            if (eptr >= md->end_subject)
4462                  IS_NEWLINE(eptr)))              {
4463              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
4464                MRRETURN(MATCH_NOMATCH);
4465                }
4466              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4467                MRRETURN(MATCH_NOMATCH);
4468            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
4469            switch(ctype)            switch(ctype)
4470              {              {
4471              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
4472              break;              case OP_ALLANY:
   
4473              case OP_ANYBYTE:              case OP_ANYBYTE:
4474              break;              break;
4475    
4476              case OP_ANYNL:              case OP_ANYNL:
4477              switch(c)              switch(c)
4478                {                {
4479                default: RRETURN(MATCH_NOMATCH);                default: MRRETURN(MATCH_NOMATCH);
4480                case 0x000d:                case 0x000d:
4481                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4482                break;                break;
4483                case 0x000a:                case 0x000a:
4484                  break;
4485    
4486                case 0x000b:                case 0x000b:
4487                case 0x000c:                case 0x000c:
4488                case 0x0085:                case 0x0085:
4489                case 0x2028:                case 0x2028:
4490                case 0x2029:                case 0x2029:
4491                  if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4492                break;                break;
4493                }                }
4494              break;              break;
# Line 3403  for (;;) Line 4516  for (;;)
4516                case 0x202f:    /* NARROW NO-BREAK SPACE */                case 0x202f:    /* NARROW NO-BREAK SPACE */
4517                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
4518                case 0x3000:    /* IDEOGRAPHIC SPACE */                case 0x3000:    /* IDEOGRAPHIC SPACE */
4519                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4520                }                }
4521              break;              break;
4522    
4523              case OP_HSPACE:              case OP_HSPACE:
4524              switch(c)              switch(c)
4525                {                {
4526                default: RRETURN(MATCH_NOMATCH);                default: MRRETURN(MATCH_NOMATCH);
4527                case 0x09:      /* HT */                case 0x09:      /* HT */
4528                case 0x20:      /* SPACE */                case 0x20:      /* SPACE */
4529                case 0xa0:      /* NBSP */                case 0xa0:      /* NBSP */
# Line 3445  for (;;) Line 4558  for (;;)
4558                case 0x85:      /* NEL */                case 0x85:      /* NEL */
4559                case 0x2028:    /* LINE SEPARATOR */                case 0x2028:    /* LINE SEPARATOR */
4560                case 0x2029:    /* PARAGRAPH SEPARATOR */                case 0x2029:    /* PARAGRAPH SEPARATOR */
4561                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4562                }                }
4563              break;              break;
4564    
4565              case OP_VSPACE:              case OP_VSPACE:
4566              switch(c)              switch(c)
4567                {                {
4568                default: RRETURN(MATCH_NOMATCH);                default: MRRETURN(MATCH_NOMATCH);
4569                case 0x0a:      /* LF */                case 0x0a:      /* LF */
4570                case 0x0b:      /* VT */                case 0x0b:      /* VT */
4571                case 0x0c:      /* FF */                case 0x0c:      /* FF */
# Line 3466  for (;;) Line 4579  for (;;)
4579    
4580              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
4581              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
4582                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4583              break;              break;
4584    
4585              case OP_DIGIT:              case OP_DIGIT:
4586              if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)              if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
4587                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4588              break;              break;
4589    
4590              case OP_NOT_WHITESPACE:              case OP_NOT_WHITESPACE:
4591              if (c < 256 && (md->ctypes[c] & ctype_space) != 0)              if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
4592                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4593              break;              break;
4594    
4595              case OP_WHITESPACE:              case OP_WHITESPACE:
4596              if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)              if  (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
4597                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4598              break;              break;
4599    
4600              case OP_NOT_WORDCHAR:              case OP_NOT_WORDCHAR:
4601              if (c < 256 && (md->ctypes[c] & ctype_word) != 0)              if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
4602                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4603              break;              break;
4604    
4605              case OP_WORDCHAR:              case OP_WORDCHAR:
4606              if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)              if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
4607                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4608              break;              break;
4609    
4610              default:              default:
# Line 3507  for (;;) Line 4620  for (;;)
4620            {            {
4621            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
4622            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4623            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
4624                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))            if (eptr >= md->end_subject)
4625              RRETURN(MATCH_NOMATCH);              {
4626                SCHECK_PARTIAL();
4627                MRRETURN(MATCH_NOMATCH);
4628                }
4629              if (ctype == OP_ANY && IS_NEWLINE(eptr))
4630                MRRETURN(MATCH_NOMATCH);
4631            c = *eptr++;            c = *eptr++;
4632            switch(ctype)            switch(ctype)
4633              {              {
4634              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
4635              break;              case OP_ALLANY:
   
4636              case OP_ANYBYTE:              case OP_ANYBYTE:
4637              break;              break;
4638    
4639              case OP_ANYNL:              case OP_ANYNL:
4640              switch(c)              switch(c)
4641                {                {
4642                default: RRETURN(MATCH_NOMATCH);                default: MRRETURN(MATCH_NOMATCH);
4643                case 0x000d:                case 0x000d:
4644                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4645                break;                break;
4646    
4647                case 0x000a:                case 0x000a:
4648                  break;
4649    
4650                case 0x000b:                case 0x000b:
4651                case 0x000c:                case 0x000c:
4652                case 0x0085:                case 0x0085:
4653                  if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4654                break;                break;
4655                }                }
4656              break;              break;
# Line 3542  for (;;) Line 4662  for (;;)
4662                case 0x09:      /* HT */                case 0x09:      /* HT */
4663                case 0x20:      /* SPACE */                case 0x20:      /* SPACE */
4664                case 0xa0:      /* NBSP */                case 0xa0:      /* NBSP */
4665                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4666                }                }
4667              break;              break;
4668    
4669              case OP_HSPACE:              case OP_HSPACE:
4670              switch(c)              switch(c)
4671                {                {
4672                default: RRETURN(MATCH_NOMATCH);                default: MRRETURN(MATCH_NOMATCH);
4673                case 0x09:      /* HT */                case 0x09:      /* HT */
4674                case 0x20:      /* SPACE */                case 0x20:      /* SPACE */
4675                case 0xa0:      /* NBSP */                case 0xa0:      /* NBSP */
# Line 3566  for (;;) Line 4686  for (;;)
4686                case 0x0c:      /* FF */                case 0x0c:      /* FF */
4687                case 0x0d:      /* CR */                case 0x0d:      /* CR */
4688                case 0x85:      /* NEL */                case 0x85:      /* NEL */
4689                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4690                }                }
4691              break;              break;
4692    
4693              case OP_VSPACE:              case OP_VSPACE:
4694              switch(c)              switch(c)
4695                {                {
4696                default: RRETURN(MATCH_NOMATCH);                default: MRRETURN(MATCH_NOMATCH);
4697                case 0x0a:      /* LF */                case 0x0a:      /* LF */
4698                case 0x0b:      /* VT */                case 0x0b:      /* VT */
4699                case 0x0c:      /* FF */                case 0x0c:      /* FF */
# Line 3584  for (;;) Line 4704  for (;;)
4704              break;              break;
4705    
4706              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
4707              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4708              break;              break;
4709    
4710              case OP_DIGIT:              case OP_DIGIT:
4711              if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);