/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC revision 543 by ph10, Tue Jun 15 16:33:29 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55  /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  /* Undefine some potentially clashing cpp symbols */
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
56    
57  #define EPTR_WORK_SIZE (1000)  #undef min
58    #undef max
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 65  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 78  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 130  match_ref(int offset, register USPTR ept Line 151  match_ref(int offset, register USPTR ept
151  {  {
152  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
153    
154  #ifdef DEBUG  #ifdef PCRE_DEBUG
155  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
156    printf("matching subject <null>");    printf("matching subject <null>");
157  else  else
# Line 147  printf("\n"); Line 168  printf("\n");
168    
169  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
170    
171  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172    properly if Unicode properties are supported. Otherwise, we can check only
173    ASCII characters. */
174    
175  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
176    {    {
177    #ifdef SUPPORT_UTF8
178    #ifdef SUPPORT_UCP
179      if (md->utf8)
180        {
181        USPTR endptr = eptr + length;
182        while (eptr < endptr)
183          {
184          int c, d;
185          GETCHARINC(c, eptr);
186          GETCHARINC(d, p);
187          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188          }
189        }
190      else
191    #endif
192    #endif
193    
194      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195      is no UCP support. */
196    
197    while (length-- > 0)    while (length-- > 0)
198      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199    }    }
200    
201    /* In the caseful case, we can just compare the bytes, whether or not we
202    are in UTF-8 mode. */
203    
204  else  else
205    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
# Line 183  calls by keeping local variables that ne Line 230  calls by keeping local variables that ne
230  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
231  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
232  always used to.  always used to.
233    
234    The original heap-recursive code used longjmp(). However, it seems that this
235    can be very slow on some operating systems. Following a suggestion from Stan
236    Switzer, the use of longjmp() has been abolished, at the cost of having to
237    provide a unique number for each call to RMATCH. There is no way of generating
238    a sequence of numbers at compile time in C. I have given them names, to make
239    them stand out more clearly.
240    
241    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243    tests. Furthermore, not using longjmp() means that local dynamic variables
244    don't have indeterminate values; this has meant that the frame size can be
245    reduced because the result can be "passed back" by straight setting of the
246    variable instead of being passed in the frame.
247  ****************************************************************************  ****************************************************************************
248  ***************************************************************************/  ***************************************************************************/
249    
250    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251    below must be updated in sync.  */
252    
253    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
254           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258           RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259           RM61,  RM62 };
260    
261  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
262  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
263    actually used in this definition. */
264    
265  #ifndef NO_RECURSE  #ifndef NO_RECURSE
266  #define REGISTER register  #define REGISTER register
267  #ifdef DEBUG  
268  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #ifdef PCRE_DEBUG
269    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270    { \    { \
271    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
272    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
274    }    }
275  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 205  versions and production versions. */ Line 278  versions and production versions. */
278    return ra; \    return ra; \
279    }    }
280  #else  #else
281  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
284  #endif  #endif
285    
286  #else  #else
287    
288    
289  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
290  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291  match(), which never changes. */  argument of match(), which never changes. */
292    
293  #define REGISTER  #define REGISTER
294    
295  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296    {\    {\
297    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
298    if (setjmp(frame->Xwhere) == 0)\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299      {\    frame->Xwhere = rw; \
300      newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
301      newframe->Xecode = rb;\    newframe->Xecode = rb;\
302      newframe->Xoffset_top = rc;\    newframe->Xmstart = mstart;\
303      newframe->Xims = re;\    newframe->Xmarkptr = markptr;\
304      newframe->Xeptrb = rf;\    newframe->Xoffset_top = rc;\
305      newframe->Xflags = rg;\    newframe->Xims = re;\
306      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xeptrb = rf;\
307      newframe->Xprevframe = frame;\    newframe->Xflags = rg;\
308      frame = newframe;\    newframe->Xrdepth = frame->Xrdepth + 1;\
309      DPRINTF(("restarting from line %d\n", __LINE__));\    newframe->Xprevframe = frame;\
310      goto HEAP_RECURSE;\    frame = newframe;\
311      }\    DPRINTF(("restarting from line %d\n", __LINE__));\
312    else\    goto HEAP_RECURSE;\
313      {\    L_##rw:\
314      DPRINTF(("longjumped back to line %d\n", __LINE__));\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
315    }    }
316    
317  #define RRETURN(ra)\  #define RRETURN(ra)\
318    {\    {\
319    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
320    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
321    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
322    if (frame != NULL)\    if (frame != NULL)\
323      {\      {\
324      frame->Xresult = ra;\      rrc = ra;\
325      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
326      }\      }\
327    return ra;\    return ra;\
328    }    }
# Line 266  typedef struct heapframe { Line 335  typedef struct heapframe {
335    
336    /* Function arguments that may change */    /* Function arguments that may change */
337    
338    const uschar *Xeptr;    USPTR Xeptr;
339    const uschar *Xecode;    const uschar *Xecode;
340      USPTR Xmstart;
341      USPTR Xmarkptr;
342    int Xoffset_top;    int Xoffset_top;
343    long int Xims;    long int Xims;
344    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 276  typedef struct heapframe { Line 347  typedef struct heapframe {
347    
348    /* Function local variables */    /* Function local variables */
349    
350    const uschar *Xcallpat;    USPTR Xcallpat;
351    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
352    const uschar *Xdata;    USPTR Xcharptr;
353    const uschar *Xnext;  #endif
354    const uschar *Xpp;    USPTR Xdata;
355    const uschar *Xprev;    USPTR Xnext;
356    const uschar *Xsaved_eptr;    USPTR Xpp;
357      USPTR Xprev;
358      USPTR Xsaved_eptr;
359    
360    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
361    
# Line 299  typedef struct heapframe { Line 372  typedef struct heapframe {
372    int Xprop_category;    int Xprop_category;
373    int Xprop_chartype;    int Xprop_chartype;
374    int Xprop_script;    int Xprop_script;
375      int Xoclength;
376      uschar Xocchars[8];
377  #endif  #endif
378    
379      int Xcodelink;
380    int Xctype;    int Xctype;
381    unsigned int Xfc;    unsigned int Xfc;
382    int Xfi;    int Xfi;
# Line 316  typedef struct heapframe { Line 392  typedef struct heapframe {
392    
393    eptrblock Xnewptrb;    eptrblock Xnewptrb;
394    
395    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
396    
397    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
398    
399  } heapframe;  } heapframe;
400    
# Line 337  typedef struct heapframe { Line 412  typedef struct heapframe {
412    
413  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
414  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
415  same response.  same response. */
416    
417  Performance note: It might be tempting to extract commonly used fields from the  /* These macros pack up tests that are used for partial matching, and which
418  md structure (e.g. utf8, end_subject) into individual variables to improve  appears several times in the code. We set the "hit end" flag if the pointer is
419    at the end of the subject and also past the start of the subject (i.e.
420    something has been matched). For hard partial matching, we then return
421    immediately. The second one is used when we already know we are past the end of
422    the subject. */
423    
424    #define CHECK_PARTIAL()\
425      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
426        {\
427        md->hitend = TRUE;\
428        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
429        }
430    
431    #define SCHECK_PARTIAL()\
432      if (md->partial != 0 && eptr > mstart)\
433        {\
434        md->hitend = TRUE;\
435        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
436        }
437    
438    
439    /* Performance note: It might be tempting to extract commonly used fields from
440    the md structure (e.g. utf8, end_subject) into individual variables to improve
441  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
442  made performance worse.  made performance worse.
443    
444  Arguments:  Arguments:
445     eptr        pointer to current character in subject     eptr        pointer to current character in subject
446     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
447       mstart      pointer to the current match start position (can be modified
448                     by encountering \K)
449       markptr     pointer to the most recent MARK name, or NULL
450     offset_top  current top pointer     offset_top  current top pointer
451     md          pointer to "static" info for the match     md          pointer to "static" info for the match
452     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 356  Arguments: Line 456  Arguments:
456                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
457                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
458                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
459     rdepth      the recursion depth     rdepth      the recursion depth
460    
461  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
462                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
463                   a negative MATCH_xxx value for PRUNE, SKIP, etc
464                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
465                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
466  */  */
467    
468  static int  static int
469  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
470    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
471    int flags, unsigned int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
472  {  {
473  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
474  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 380  register unsigned int c; /* Character Line 480  register unsigned int c; /* Character
480  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
481    
482  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
483    int condcode;
484    
485  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
486  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 388  heap whenever RMATCH() does a "recursion Line 489  heap whenever RMATCH() does a "recursion
489    
490  #ifdef NO_RECURSE  #ifdef NO_RECURSE
491  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
492    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
493  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
494    
495  /* Copy in the original argument variables */  /* Copy in the original argument variables */
496    
497  frame->Xeptr = eptr;  frame->Xeptr = eptr;
498  frame->Xecode = ecode;  frame->Xecode = ecode;
499    frame->Xmstart = mstart;
500    frame->Xmarkptr = markptr;
501  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
502  frame->Xims = ims;  frame->Xims = ims;
503  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 408  HEAP_RECURSE: Line 512  HEAP_RECURSE:
512    
513  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
514  #define ecode              frame->Xecode  #define ecode              frame->Xecode
515    #define mstart             frame->Xmstart
516    #define markptr            frame->Xmarkptr
517  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
518  #define ims                frame->Xims  #define ims                frame->Xims
519  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 420  HEAP_RECURSE: Line 526  HEAP_RECURSE:
526  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
527  #endif  #endif
528  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
529    #define codelink           frame->Xcodelink
530  #define data               frame->Xdata  #define data               frame->Xdata
531  #define next               frame->Xnext  #define next               frame->Xnext
532  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 441  HEAP_RECURSE: Line 548  HEAP_RECURSE:
548  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
549  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
550  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
551    #define oclength           frame->Xoclength
552    #define occhars            frame->Xocchars
553  #endif  #endif
554    
555  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 494  int prop_fail_result; Line 603  int prop_fail_result;
603  int prop_category;  int prop_category;
604  int prop_chartype;  int prop_chartype;
605  int prop_script;  int prop_script;
606    int oclength;
607    uschar occhars[8];
608  #endif  #endif
609    
610    int codelink;
611  int ctype;  int ctype;
612  int length;  int length;
613  int max;  int max;
# Line 529  TAIL_RECURSE: Line 641  TAIL_RECURSE:
641  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
642  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
643  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
644  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
645  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
646  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
647  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
648    
649    #ifdef SUPPORT_UTF8
650    utf8 = md->utf8;       /* Local copy of the flag */
651    #else
652    utf8 = FALSE;
653    #endif
654    
655  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
656  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
657    
# Line 542  if (rdepth >= md->match_limit_recursion) Line 660  if (rdepth >= md->match_limit_recursion)
660    
661  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
662    
 #ifdef SUPPORT_UTF8  
 utf8 = md->utf8;       /* Local copy of the flag */  
 #else  
 utf8 = FALSE;  
 #endif  
   
663  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
664  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
665  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
666  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
667  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
668  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
669  already used. */  block that is used is on the stack, so a new one may be required for each
670    match(). */
671    
672  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
673    {    {
674    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
675    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
676      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
677    }    }
678    
679  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 577  for (;;) Line 683  for (;;)
683    minimize = possessive = FALSE;    minimize = possessive = FALSE;
684    op = *ecode;    op = *ecode;
685    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
686    switch(op)    switch(op)
687      {      {
688        case OP_MARK:
689        markptr = ecode + 2;
690        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
691          ims, eptrb, flags, RM55);
692    
693        /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
694        argument, and we must check whether that argument matches this MARK's
695        argument. It is passed back in md->start_match_ptr (an overloading of that
696        variable). If it does match, we reset that variable to the current subject
697        position and return MATCH_SKIP. Otherwise, pass back the return code
698        unaltered. */
699    
700        if (rrc == MATCH_SKIP_ARG &&
701            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
702          {
703          md->start_match_ptr = eptr;
704          RRETURN(MATCH_SKIP);
705          }
706    
707        if (md->mark == NULL) md->mark = markptr;
708        RRETURN(rrc);
709    
710        case OP_FAIL:
711        MRRETURN(MATCH_NOMATCH);
712    
713        case OP_COMMIT:
714        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
715          ims, eptrb, flags, RM52);
716        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
717        MRRETURN(MATCH_COMMIT);
718    
719        case OP_PRUNE:
720        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
721          ims, eptrb, flags, RM51);
722        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
723        MRRETURN(MATCH_PRUNE);
724    
725        case OP_PRUNE_ARG:
726        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
727          ims, eptrb, flags, RM56);
728        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
729        md->mark = ecode + 2;
730        RRETURN(MATCH_PRUNE);
731    
732        case OP_SKIP:
733        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
734          ims, eptrb, flags, RM53);
735        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
736        md->start_match_ptr = eptr;   /* Pass back current position */
737        MRRETURN(MATCH_SKIP);
738    
739        case OP_SKIP_ARG:
740        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
741          ims, eptrb, flags, RM57);
742        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
743    
744        /* Pass back the current skip name by overloading md->start_match_ptr and
745        returning the special MATCH_SKIP_ARG return code. This will either be
746        caught by a matching MARK, or get to the top, where it is treated the same
747        as PRUNE. */
748    
749        md->start_match_ptr = ecode + 2;
750        RRETURN(MATCH_SKIP_ARG);
751    
752        case OP_THEN:
753        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
754          ims, eptrb, flags, RM54);
755        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
756        MRRETURN(MATCH_THEN);
757    
758        case OP_THEN_ARG:
759        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
760          ims, eptrb, flags, RM58);
761        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
762        md->mark = ecode + 2;
763        RRETURN(MATCH_THEN);
764    
765      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
766      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
767      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 606  for (;;) Line 781  for (;;)
781      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
782      offset = number << 1;      offset = number << 1;
783    
784  #ifdef DEBUG  #ifdef PCRE_DEBUG
785      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
786      printf("subject=");      printf("subject=");
787      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
# Line 621  for (;;) Line 796  for (;;)
796        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
797    
798        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
799        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
800            (int)(eptr - md->start_subject);
801    
802        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
803        do        do
804          {          {
805          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
806            ims, eptrb, flags);            ims, eptrb, flags, RM1);
807          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
808          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
809          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
810          }          }
# Line 640  for (;;) Line 816  for (;;)
816        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
817        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
818    
819          if (rrc != MATCH_THEN) md->mark = markptr;
820        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
821        }        }
822    
823      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
824      bracket. */      as a non-capturing bracket. */
825    
826        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
827        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
828    
829      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
830    
831        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
832        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
833    
834      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
835      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
836      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
837      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
838        is set.*/
839    
840      case OP_BRA:      case OP_BRA:
841      case OP_SBRA:      case OP_SBRA:
# Line 659  for (;;) Line 843  for (;;)
843      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
844      for (;;)      for (;;)
845        {        {
846        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
847          {          {
848          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
849          flags |= match_tail_recursed;            {
850          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
851          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
852              goto TAIL_RECURSE;
853              }
854    
855            /* Possibly empty group; can't use tail recursion. */
856    
857            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
858              eptrb, flags, RM48);
859            if (rrc == MATCH_NOMATCH) md->mark = markptr;
860            RRETURN(rrc);
861          }          }
862    
863        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
864        otherwise return. */        otherwise return. */
865    
866        RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
867          eptrb, flags);          eptrb, flags, RM2);
868        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
869        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
870        }        }
871      /* Control never reaches here. */      /* Control never reaches here. */
# Line 685  for (;;) Line 878  for (;;)
878    
879      case OP_COND:      case OP_COND:
880      case OP_SCOND:      case OP_SCOND:
881      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
882    
883        /* Because of the way auto-callout works during compile, a callout item is
884        inserted between OP_COND and an assertion condition. */
885    
886        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
887        {        {
888        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        if (pcre_callout != NULL)
889        condition = md->recursive != NULL &&          {
890          (offset == RREF_ANY || offset == md->recursive->group_num);          pcre_callout_block cb;
891        ecode += condition? 3 : GET(ecode, 1);          cb.version          = 1;   /* Version 1 of the callout block */
892            cb.callout_number   = ecode[LINK_SIZE+2];
893            cb.offset_vector    = md->offset_vector;
894            cb.subject          = (PCRE_SPTR)md->start_subject;
895            cb.subject_length   = (int)(md->end_subject - md->start_subject);
896            cb.start_match      = (int)(mstart - md->start_subject);
897            cb.current_position = (int)(eptr - md->start_subject);
898            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
899            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
900            cb.capture_top      = offset_top/2;
901            cb.capture_last     = md->capture_last;
902            cb.callout_data     = md->callout_data;
903            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
904            if (rrc < 0) RRETURN(rrc);
905            }
906          ecode += _pcre_OP_lengths[OP_CALLOUT];
907          }
908    
909        condcode = ecode[LINK_SIZE+1];
910    
911        /* Now see what the actual condition is */
912    
913        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
914          {
915          if (md->recursive == NULL)                /* Not recursing => FALSE */
916            {
917            condition = FALSE;
918            ecode += GET(ecode, 1);
919            }
920          else
921            {
922            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
923            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
924    
925            /* If the test is for recursion into a specific subpattern, and it is
926            false, but the test was set up by name, scan the table to see if the
927            name refers to any other numbers, and test them. The condition is true
928            if any one is set. */
929    
930            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
931              {
932              uschar *slotA = md->name_table;
933              for (i = 0; i < md->name_count; i++)
934                {
935                if (GET2(slotA, 0) == recno) break;
936                slotA += md->name_entry_size;
937                }
938    
939              /* Found a name for the number - there can be only one; duplicate
940              names for different numbers are allowed, but not vice versa. First
941              scan down for duplicates. */
942    
943              if (i < md->name_count)
944                {
945                uschar *slotB = slotA;
946                while (slotB > md->name_table)
947                  {
948                  slotB -= md->name_entry_size;
949                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
950                    {
951                    condition = GET2(slotB, 0) == md->recursive->group_num;
952                    if (condition) break;
953                    }
954                  else break;
955                  }
956    
957                /* Scan up for duplicates */
958    
959                if (!condition)
960                  {
961                  slotB = slotA;
962                  for (i++; i < md->name_count; i++)
963                    {
964                    slotB += md->name_entry_size;
965                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
966                      {
967                      condition = GET2(slotB, 0) == md->recursive->group_num;
968                      if (condition) break;
969                      }
970                    else break;
971                    }
972                  }
973                }
974              }
975    
976            /* Chose branch according to the condition */
977    
978            ecode += condition? 3 : GET(ecode, 1);
979            }
980        }        }
981    
982      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
983        {        {
984        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
985        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
986    
987          /* If the numbered capture is unset, but the reference was by name,
988          scan the table to see if the name refers to any other numbers, and test
989          them. The condition is true if any one is set. This is tediously similar
990          to the code above, but not close enough to try to amalgamate. */
991    
992          if (!condition && condcode == OP_NCREF)
993            {
994            int refno = offset >> 1;
995            uschar *slotA = md->name_table;
996    
997            for (i = 0; i < md->name_count; i++)
998              {
999              if (GET2(slotA, 0) == refno) break;
1000              slotA += md->name_entry_size;
1001              }
1002    
1003            /* Found a name for the number - there can be only one; duplicate names
1004            for different numbers are allowed, but not vice versa. First scan down
1005            for duplicates. */
1006    
1007            if (i < md->name_count)
1008              {
1009              uschar *slotB = slotA;
1010              while (slotB > md->name_table)
1011                {
1012                slotB -= md->name_entry_size;
1013                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1014                  {
1015                  offset = GET2(slotB, 0) << 1;
1016                  condition = offset < offset_top &&
1017                    md->offset_vector[offset] >= 0;
1018                  if (condition) break;
1019                  }
1020                else break;
1021                }
1022    
1023              /* Scan up for duplicates */
1024    
1025              if (!condition)
1026                {
1027                slotB = slotA;
1028                for (i++; i < md->name_count; i++)
1029                  {
1030                  slotB += md->name_entry_size;
1031                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1032                    {
1033                    offset = GET2(slotB, 0) << 1;
1034                    condition = offset < offset_top &&
1035                      md->offset_vector[offset] >= 0;
1036                    if (condition) break;
1037                    }
1038                  else break;
1039                  }
1040                }
1041              }
1042            }
1043    
1044          /* Chose branch according to the condition */
1045    
1046        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
1047        }        }
1048    
1049      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
1050        {        {
1051        condition = FALSE;        condition = FALSE;
1052        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 712  for (;;) Line 1058  for (;;)
1058    
1059      else      else
1060        {        {
1061        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1062            match_condassert);            match_condassert, RM3);
1063        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1064          {          {
1065          condition = TRUE;          condition = TRUE;
1066          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1067          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1068          }          }
1069        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1070          {          {
1071          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1072          }          }
1073        else        else
1074          {          {
1075          condition = FALSE;          condition = FALSE;
1076          ecode += GET(ecode, 1);          ecode += codelink;
1077          }          }
1078        }        }
1079    
1080      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1081      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
1082      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
1083        group. If the second alternative doesn't exist, we can just plough on. */
1084    
1085      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1086        {        {
1087        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1088        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
1089        goto TAIL_RECURSE;          {
1090            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1091            RRETURN(rrc);
1092            }
1093          else                       /* Group must match something */
1094            {
1095            flags = 0;
1096            goto TAIL_RECURSE;
1097            }
1098        }        }
1099      else      else                         /* Condition false & no alternative */
1100        {        {
1101        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1102        }        }
1103      break;      break;
1104    
1105    
1106      /* End of the pattern. If we are in a top-level recursion, we should      /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1107      restore the offsets appropriately and continue from after the call. */      to close any currently open capturing brackets. */
1108    
1109        case OP_CLOSE:
1110        number = GET2(ecode, 1);
1111        offset = number << 1;
1112    
1113    #ifdef PCRE_DEBUG
1114          printf("end bracket %d at *ACCEPT", number);
1115          printf("\n");
1116    #endif
1117    
1118        md->capture_last = number;
1119        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1120          {
1121          md->offset_vector[offset] =
1122            md->offset_vector[md->offset_end - number];
1123          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1124          if (offset_top <= offset) offset_top = offset + 2;
1125          }
1126        ecode += 3;
1127        break;
1128    
1129    
1130        /* End of the pattern, either real or forced. If we are in a top-level
1131        recursion, we should restore the offsets appropriately and continue from
1132        after the call. */
1133    
1134        case OP_ACCEPT:
1135      case OP_END:      case OP_END:
1136      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1137        {        {
# Line 759  for (;;) Line 1140  for (;;)
1140        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1141        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1142          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1143        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1144        ims = original_ims;        ims = original_ims;
1145        ecode = rec->after_call;        ecode = rec->after_call;
1146        break;        break;
1147        }        }
1148    
1149      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1150      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1151        the subject. In both cases, backtracking will then try other alternatives,
1152        if any. */
1153    
1154        if (eptr == mstart &&
1155            (md->notempty ||
1156              (md->notempty_atstart &&
1157                mstart == md->start_subject + md->start_offset)))
1158          MRRETURN(MATCH_NOMATCH);
1159    
1160        /* Otherwise, we have a match. */
1161    
1162        md->end_match_ptr = eptr;           /* Record where we ended */
1163        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1164        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1165    
1166        /* For some reason, the macros don't work properly if an expression is
1167        given as the argument to MRRETURN when the heap is in use. */
1168    
1169      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1170      md->end_match_ptr = eptr;          /* Record where we ended */      MRRETURN(rrc);
     md->end_offset_top = offset_top;   /* and how many extracts were taken */  
     RRETURN(MATCH_MATCH);  
1171    
1172      /* Change option settings */      /* Change option settings */
1173    
# Line 791  for (;;) Line 1187  for (;;)
1187      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1188      do      do
1189        {        {
1190        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1191        if (rrc == MATCH_MATCH) break;          RM4);
1192        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1193            {
1194            mstart = md->start_match_ptr;   /* In case \K reset it */
1195            break;
1196            }
1197          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1198        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1199        }        }
1200      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1201      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1202    
1203      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1204    
# Line 811  for (;;) Line 1212  for (;;)
1212      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1213      continue;      continue;
1214    
1215      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1216        PRUNE, or COMMIT means we must assume failure without checking subsequent
1217        branches. */
1218    
1219      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1220      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1221      do      do
1222        {        {
1223        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1224        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);          RM5);
1225        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1226          if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1227            {
1228            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1229            break;
1230            }
1231          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1232        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1233        }        }
1234      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 842  for (;;) Line 1251  for (;;)
1251        while (i-- > 0)        while (i-- > 0)
1252          {          {
1253          eptr--;          eptr--;
1254          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1255          BACKCHAR(eptr)          BACKCHAR(eptr);
1256          }          }
1257        }        }
1258      else      else
# Line 853  for (;;) Line 1262  for (;;)
1262    
1263        {        {
1264        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1265        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1266        }        }
1267    
1268      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1269    
1270        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1271      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1272      break;      break;
1273    
# Line 873  for (;;) Line 1283  for (;;)
1283        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1284        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1285        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1286        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1287        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1288        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1289        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1290        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1291        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1292        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1293        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1294        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1295        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1296        }        }
1297      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 936  for (;;) Line 1346  for (;;)
1346    
1347        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1348              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1349        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1350    
1351        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1352        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 946  for (;;) Line 1355  for (;;)
1355        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1356        do        do
1357          {          {
1358          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1359            md, ims, eptrb, flags);            md, ims, eptrb, flags, RM6);
1360          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1361            {            {
1362            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1363            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1364            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1365              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1366            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1367            }            }
1368          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1369            {            {
1370            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1371              if (new_recursive.offset_save != stacksave)
1372                (pcre_free)(new_recursive.offset_save);
1373            RRETURN(rrc);            RRETURN(rrc);
1374            }            }
1375    
# Line 973  for (;;) Line 1384  for (;;)
1384        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1385        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1386          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1387        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1388        }        }
1389      /* Control never reaches here */      /* Control never reaches here */
1390    
# Line 982  for (;;) Line 1393  for (;;)
1393      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1394      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1395      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1396      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1397        the start-of-match value in case it was changed by \K. */
1398    
1399      case OP_ONCE:      case OP_ONCE:
1400      prev = ecode;      prev = ecode;
# Line 990  for (;;) Line 1402  for (;;)
1402    
1403      do      do
1404        {        {
1405        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1406          eptrb, 0);        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1407        if (rrc == MATCH_MATCH) break;          {
1408        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          mstart = md->start_match_ptr;
1409            break;
1410            }
1411          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1412        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1413        }        }
1414      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1036  for (;;) Line 1451  for (;;)
1451    
1452      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1453        {        {
1454        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1455        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1456        ecode = prev;        ecode = prev;
1457        flags = match_tail_recursed;        flags = 0;
1458        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1459        }        }
1460      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1461        {        {
1462        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1463        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1464        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1465        flags = match_tail_recursed;        flags = 0;
1466        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1467        }        }
1468      /* Control never gets here */      /* Control never gets here */
# Line 1059  for (;;) Line 1474  for (;;)
1474      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1475      break;      break;
1476    
1477      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1478      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1479      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1480      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1481      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1482    
1483      case OP_BRAZERO:      case OP_BRAZERO:
1484        {        {
1485        next = ecode+1;        next = ecode+1;
1486        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1487        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1488        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1489        ecode = next + 1 + LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
# Line 1079  for (;;) Line 1494  for (;;)
1494        {        {
1495        next = ecode+1;        next = ecode+1;
1496        do next += GET(next, 1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1497        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1498        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1499        ecode++;        ecode++;
1500        }        }
1501      break;      break;
1502    
1503        case OP_SKIPZERO:
1504          {
1505          next = ecode+1;
1506          do next += GET(next,1); while (*next == OP_ALT);
1507          ecode = next + 1 + LINK_SIZE;
1508          }
1509        break;
1510    
1511      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1512    
1513      case OP_KET:      case OP_KET:
# Line 1103  for (;;) Line 1526  for (;;)
1526        }        }
1527      else saved_eptr = NULL;      else saved_eptr = NULL;
1528    
1529      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or an atomic group, stop
1530      MATCH_MATCH, but record the current high water mark for use by positive      matching and return MATCH_MATCH, but record the current high water mark for
1531      assertions. Do this also for the "once" (atomic) groups. */      use by positive assertions. We also need to record the match start in case
1532        it was changed by \K. */
1533    
1534      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1535          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1113  for (;;) Line 1537  for (;;)
1537        {        {
1538        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1539        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1540        RRETURN(MATCH_MATCH);        md->start_match_ptr = mstart;
1541          MRRETURN(MATCH_MATCH);
1542        }        }
1543    
1544      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
# Line 1127  for (;;) Line 1552  for (;;)
1552        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1553        offset = number << 1;        offset = number << 1;
1554    
1555  #ifdef DEBUG  #ifdef PCRE_DEBUG
1556        printf("end bracket %d", number);        printf("end bracket %d", number);
1557        printf("\n");        printf("\n");
1558  #endif  #endif
# Line 1137  for (;;) Line 1562  for (;;)
1562          {          {
1563          md->offset_vector[offset] =          md->offset_vector[offset] =
1564            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1565          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1566          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1567          }          }
1568    
# Line 1149  for (;;) Line 1574  for (;;)
1574          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1575          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1576          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
         md->start_match = rec->save_start;  
1577          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1578            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1579            offset_top = rec->save_offset_top;
1580          ecode = rec->after_call;          ecode = rec->after_call;
1581          ims = original_ims;          ims = original_ims;
1582          break;          break;
# Line 1178  for (;;) Line 1603  for (;;)
1603    
1604      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1605      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1606      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1607        unlimited repeat of a group that can match an empty string. */
1608    
1609      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1610    
1611      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1612        {        {
1613        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1614        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1615          if (flags != 0)    /* Could match an empty string */
1616            {
1617            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1618            RRETURN(rrc);
1619            }
1620        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1621        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1622        }        }
1623      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1624        {        {
1625        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1626        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1627        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1628        flags = match_tail_recursed;        flags = 0;
1629        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1630        }        }
1631      /* Control never gets here */      /* Control never gets here */
# Line 1203  for (;;) Line 1633  for (;;)
1633      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1634    
1635      case OP_CIRC:      case OP_CIRC:
1636      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1637      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1638        {        {
1639        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1640            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1641          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1642        ecode++;        ecode++;
1643        break;        break;
1644        }        }
# Line 1217  for (;;) Line 1647  for (;;)
1647      /* Start of subject assertion */      /* Start of subject assertion */
1648    
1649      case OP_SOD:      case OP_SOD:
1650      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1651      ecode++;      ecode++;
1652      break;      break;
1653    
1654      /* Start of match assertion */      /* Start of match assertion */
1655    
1656      case OP_SOM:      case OP_SOM:
1657      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1658        ecode++;
1659        break;
1660    
1661        /* Reset the start of match point */
1662    
1663        case OP_SET_SOM:
1664        mstart = eptr;
1665      ecode++;      ecode++;
1666      break;      break;
1667    
# Line 1235  for (;;) Line 1672  for (;;)
1672      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1673        {        {
1674        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1675          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1676        else        else
1677          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1678        ecode++;        ecode++;
1679        break;        break;
1680        }        }
1681      else      else
1682        {        {
1683        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1684        if (!md->endonly)        if (!md->endonly)
1685          {          {
1686          if (eptr != md->end_subject &&          if (eptr != md->end_subject &&
1687              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1688            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
1689          ecode++;          ecode++;
1690          break;          break;
1691          }          }
# Line 1258  for (;;) Line 1695  for (;;)
1695      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1696    
1697      case OP_EOD:      case OP_EOD:
1698      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1699      ecode++;      ecode++;
1700      break;      break;
1701    
# Line 1267  for (;;) Line 1704  for (;;)
1704      case OP_EODN:      case OP_EODN:
1705      if (eptr != md->end_subject &&      if (eptr != md->end_subject &&
1706          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1707        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1708      ecode++;      ecode++;
1709      break;      break;
1710    
# Line 1279  for (;;) Line 1716  for (;;)
1716    
1717        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1718        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1719        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1720          partial matching. */
1721    
1722  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1723        if (utf8)        if (utf8)
1724          {          {
1725            /* Get status of previous character */
1726    
1727          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1728            {            {
1729            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1730            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1731              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1732            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1733    #ifdef SUPPORT_UCP
1734              if (md->use_ucp)
1735                {
1736                if (c == '_') prev_is_word = TRUE; else
1737                  {
1738                  int cat = UCD_CATEGORY(c);
1739                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1740                  }
1741                }
1742              else
1743    #endif
1744            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1745            }            }
1746          if (eptr >= md->end_subject) cur_is_word = FALSE; else  
1747            /* Get status of next character */
1748    
1749            if (eptr >= md->end_subject)
1750              {
1751              SCHECK_PARTIAL();
1752              cur_is_word = FALSE;
1753              }
1754            else
1755            {            {
1756            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1757    #ifdef SUPPORT_UCP
1758              if (md->use_ucp)
1759                {
1760                if (c == '_') cur_is_word = TRUE; else
1761                  {
1762                  int cat = UCD_CATEGORY(c);
1763                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1764                  }
1765                }
1766              else
1767    #endif
1768            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1769            }            }
1770          }          }
1771        else        else
1772  #endif  #endif
1773    
1774        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1775          consistency with the behaviour of \w we do use it in this case. */
1776    
1777          {          {
1778          prev_is_word = (eptr != md->start_subject) &&          /* Get status of previous character */
1779            ((md->ctypes[eptr[-1]] & ctype_word) != 0);  
1780          cur_is_word = (eptr < md->end_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1781            ((md->ctypes[*eptr] & ctype_word) != 0);            {
1782              if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1783    #ifdef SUPPORT_UCP
1784              if (md->use_ucp)
1785                {
1786                c = eptr[-1];
1787                if (c == '_') prev_is_word = TRUE; else
1788                  {
1789                  int cat = UCD_CATEGORY(c);
1790                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1791                  }
1792                }
1793              else
1794    #endif
1795              prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1796              }
1797    
1798            /* Get status of next character */
1799    
1800            if (eptr >= md->end_subject)
1801              {
1802              SCHECK_PARTIAL();
1803              cur_is_word = FALSE;
1804              }
1805            else
1806    #ifdef SUPPORT_UCP
1807            if (md->use_ucp)
1808              {
1809              c = *eptr;
1810              if (c == '_') cur_is_word = TRUE; else
1811                {
1812                int cat = UCD_CATEGORY(c);
1813                cur_is_word = (cat == ucp_L || cat == ucp_N);
1814                }
1815              }
1816            else
1817    #endif
1818            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1819          }          }
1820    
1821        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1822    
1823        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1824             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1825          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1826        }        }
1827      break;      break;
1828    
1829      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1830    
1831      case OP_ANY:      case OP_ANY:
1832      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1833        /* Fall through */
1834    
1835        case OP_ALLANY:
1836        if (eptr++ >= md->end_subject)
1837        {        {
1838        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);        SCHECK_PARTIAL();
1839          MRRETURN(MATCH_NOMATCH);
1840        }        }
1841      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1842      ecode++;      ecode++;
1843      break;      break;
1844    
# Line 1334  for (;;) Line 1846  for (;;)
1846      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1847    
1848      case OP_ANYBYTE:      case OP_ANYBYTE:
1849      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1850          {
1851          SCHECK_PARTIAL();
1852          MRRETURN(MATCH_NOMATCH);
1853          }
1854      ecode++;      ecode++;
1855      break;      break;
1856    
1857      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1858      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1859          {
1860          SCHECK_PARTIAL();
1861          MRRETURN(MATCH_NOMATCH);
1862          }
1863      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1864      if (      if (
1865  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1347  for (;;) Line 1867  for (;;)
1867  #endif  #endif
1868         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1869         )         )
1870        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1871      ecode++;      ecode++;
1872      break;      break;
1873    
1874      case OP_DIGIT:      case OP_DIGIT:
1875      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1876          {
1877          SCHECK_PARTIAL();
1878          MRRETURN(MATCH_NOMATCH);
1879          }
1880      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1881      if (      if (
1882  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1360  for (;;) Line 1884  for (;;)
1884  #endif  #endif
1885         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1886         )         )
1887        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1888      ecode++;      ecode++;
1889      break;      break;
1890    
1891      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1892      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1893          {
1894          SCHECK_PARTIAL();
1895          MRRETURN(MATCH_NOMATCH);
1896          }
1897      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1898      if (      if (
1899  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1373  for (;;) Line 1901  for (;;)
1901  #endif  #endif
1902         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1903         )         )
1904        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1905      ecode++;      ecode++;
1906      break;      break;
1907    
1908      case OP_WHITESPACE:      case OP_WHITESPACE:
1909      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1910          {
1911          SCHECK_PARTIAL();
1912          MRRETURN(MATCH_NOMATCH);
1913          }
1914      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1915      if (      if (
1916  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1386  for (;;) Line 1918  for (;;)
1918  #endif  #endif
1919         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1920         )         )
1921        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1922      ecode++;      ecode++;
1923      break;      break;
1924    
1925      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1926      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1927          {
1928          SCHECK_PARTIAL();
1929          MRRETURN(MATCH_NOMATCH);
1930          }
1931      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1932      if (      if (
1933  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1399  for (;;) Line 1935  for (;;)
1935  #endif  #endif
1936         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1937         )         )
1938        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1939      ecode++;      ecode++;
1940      break;      break;
1941    
1942      case OP_WORDCHAR:      case OP_WORDCHAR:
1943      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1944          {
1945          SCHECK_PARTIAL();
1946          MRRETURN(MATCH_NOMATCH);
1947          }
1948      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1949      if (      if (
1950  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1412  for (;;) Line 1952  for (;;)
1952  #endif  #endif
1953         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1954         )         )
1955        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1956      ecode++;      ecode++;
1957      break;      break;
1958    
1959      case OP_ANYNL:      case OP_ANYNL:
1960      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1961          {
1962          SCHECK_PARTIAL();
1963          MRRETURN(MATCH_NOMATCH);
1964          }
1965      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1966      switch(c)      switch(c)
1967        {        {
1968        default: RRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
1969        case 0x000d:        case 0x000d:
1970        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1971        break;        break;
1972    
1973        case 0x000a:        case 0x000a:
1974          break;
1975    
1976        case 0x000b:        case 0x000b:
1977        case 0x000c:        case 0x000c:
1978        case 0x0085:        case 0x0085:
1979        case 0x2028:        case 0x2028:
1980        case 0x2029:        case 0x2029:
1981          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1982        break;        break;
1983        }        }
1984      ecode++;      ecode++;
1985      break;      break;
1986    
1987  #ifdef SUPPORT_UCP      case OP_NOT_HSPACE:
1988      /* Check the next character by Unicode property. We will get here only      if (eptr >= md->end_subject)
1989      if the support is in the binary; otherwise a compile-time error occurs. */        {
1990          SCHECK_PARTIAL();
1991      case OP_PROP:        MRRETURN(MATCH_NOMATCH);
1992      case OP_NOTPROP:        }
1993      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      GETCHARINCTEST(c, eptr);
1994        switch(c)
1995          {
1996          default: break;
1997          case 0x09:      /* HT */
1998          case 0x20:      /* SPACE */
1999          case 0xa0:      /* NBSP */
2000          case 0x1680:    /* OGHAM SPACE MARK */
2001          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2002          case 0x2000:    /* EN QUAD */
2003          case 0x2001:    /* EM QUAD */
2004          case 0x2002:    /* EN SPACE */
2005          case 0x2003:    /* EM SPACE */
2006          case 0x2004:    /* THREE-PER-EM SPACE */
2007          case 0x2005:    /* FOUR-PER-EM SPACE */
2008          case 0x2006:    /* SIX-PER-EM SPACE */
2009          case 0x2007:    /* FIGURE SPACE */
2010          case 0x2008:    /* PUNCTUATION SPACE */
2011          case 0x2009:    /* THIN SPACE */
2012          case 0x200A:    /* HAIR SPACE */
2013          case 0x202f:    /* NARROW NO-BREAK SPACE */
2014          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2015          case 0x3000:    /* IDEOGRAPHIC SPACE */
2016          MRRETURN(MATCH_NOMATCH);
2017          }
2018        ecode++;
2019        break;
2020    
2021        case OP_HSPACE:
2022        if (eptr >= md->end_subject)
2023          {
2024          SCHECK_PARTIAL();
2025          MRRETURN(MATCH_NOMATCH);
2026          }
2027        GETCHARINCTEST(c, eptr);
2028        switch(c)
2029          {
2030          default: MRRETURN(MATCH_NOMATCH);
2031          case 0x09:      /* HT */
2032          case 0x20:      /* SPACE */
2033          case 0xa0:      /* NBSP */
2034          case 0x1680:    /* OGHAM SPACE MARK */
2035          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2036          case 0x2000:    /* EN QUAD */
2037          case 0x2001:    /* EM QUAD */
2038          case 0x2002:    /* EN SPACE */
2039          case 0x2003:    /* EM SPACE */
2040          case 0x2004:    /* THREE-PER-EM SPACE */
2041          case 0x2005:    /* FOUR-PER-EM SPACE */
2042          case 0x2006:    /* SIX-PER-EM SPACE */
2043          case 0x2007:    /* FIGURE SPACE */
2044          case 0x2008:    /* PUNCTUATION SPACE */
2045          case 0x2009:    /* THIN SPACE */
2046          case 0x200A:    /* HAIR SPACE */
2047          case 0x202f:    /* NARROW NO-BREAK SPACE */
2048          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2049          case 0x3000:    /* IDEOGRAPHIC SPACE */
2050          break;
2051          }
2052        ecode++;
2053        break;
2054    
2055        case OP_NOT_VSPACE:
2056        if (eptr >= md->end_subject)
2057          {
2058          SCHECK_PARTIAL();
2059          MRRETURN(MATCH_NOMATCH);
2060          }
2061        GETCHARINCTEST(c, eptr);
2062        switch(c)
2063          {
2064          default: break;
2065          case 0x0a:      /* LF */
2066          case 0x0b:      /* VT */
2067          case 0x0c:      /* FF */
2068          case 0x0d:      /* CR */
2069          case 0x85:      /* NEL */
2070          case 0x2028:    /* LINE SEPARATOR */
2071          case 0x2029:    /* PARAGRAPH SEPARATOR */
2072          MRRETURN(MATCH_NOMATCH);
2073          }
2074        ecode++;
2075        break;
2076    
2077        case OP_VSPACE:
2078        if (eptr >= md->end_subject)
2079          {
2080          SCHECK_PARTIAL();
2081          MRRETURN(MATCH_NOMATCH);
2082          }
2083        GETCHARINCTEST(c, eptr);
2084        switch(c)
2085          {
2086          default: MRRETURN(MATCH_NOMATCH);
2087          case 0x0a:      /* LF */
2088          case 0x0b:      /* VT */
2089          case 0x0c:      /* FF */
2090          case 0x0d:      /* CR */
2091          case 0x85:      /* NEL */
2092          case 0x2028:    /* LINE SEPARATOR */
2093          case 0x2029:    /* PARAGRAPH SEPARATOR */
2094          break;
2095          }
2096        ecode++;
2097        break;
2098    
2099    #ifdef SUPPORT_UCP
2100        /* Check the next character by Unicode property. We will get here only
2101        if the support is in the binary; otherwise a compile-time error occurs. */
2102    
2103        case OP_PROP:
2104        case OP_NOTPROP:
2105        if (eptr >= md->end_subject)
2106          {
2107          SCHECK_PARTIAL();
2108          MRRETURN(MATCH_NOMATCH);
2109          }
2110      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2111        {        {
2112        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2113    
2114        switch(ecode[1])        switch(ecode[1])
2115          {          {
2116          case PT_ANY:          case PT_ANY:
2117          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2118          break;          break;
2119    
2120          case PT_LAMP:          case PT_LAMP:
2121          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2122               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2123               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2124            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2125           break;          break;
2126    
2127          case PT_GC:          case PT_GC:
2128          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2129            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2130          break;          break;
2131    
2132          case PT_PC:          case PT_PC:
2133          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2134            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2135          break;          break;
2136    
2137          case PT_SC:          case PT_SC:
2138          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2139            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2140            break;
2141    
2142            /* These are specials */
2143    
2144            case PT_ALNUM:
2145            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2146                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2147              MRRETURN(MATCH_NOMATCH);
2148            break;
2149    
2150            case PT_SPACE:    /* Perl space */
2151            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2152                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2153                   == (op == OP_NOTPROP))
2154              MRRETURN(MATCH_NOMATCH);
2155            break;
2156    
2157            case PT_PXSPACE:  /* POSIX space */
2158            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2159                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2160                 c == CHAR_FF || c == CHAR_CR)
2161                   == (op == OP_NOTPROP))
2162              MRRETURN(MATCH_NOMATCH);
2163            break;
2164    
2165            case PT_WORD:
2166            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2167                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2168                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2169              MRRETURN(MATCH_NOMATCH);
2170          break;          break;
2171    
2172            /* This should never occur */
2173    
2174          default:          default:
2175          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
2176          }          }
# Line 1488  for (;;) Line 2183  for (;;)
2183      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2184    
2185      case OP_EXTUNI:      case OP_EXTUNI:
2186      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2187          {
2188          SCHECK_PARTIAL();
2189          MRRETURN(MATCH_NOMATCH);
2190          }
2191      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2192        {        {
2193        int chartype, script;        int category = UCD_CATEGORY(c);
2194        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2195        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2196          {          {
2197          int len = 1;          int len = 1;
# Line 1501  for (;;) Line 2199  for (;;)
2199            {            {
2200            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2201            }            }
2202          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2203          if (category != ucp_M) break;          if (category != ucp_M) break;
2204          eptr += len;          eptr += len;
2205          }          }
# Line 1522  for (;;) Line 2220  for (;;)
2220      case OP_REF:      case OP_REF:
2221        {        {
2222        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2223        ecode += 3;                                 /* Advance past item */        ecode += 3;
2224    
2225          /* If the reference is unset, there are two possibilities:
2226    
2227          (a) In the default, Perl-compatible state, set the length to be longer
2228          than the amount of subject left; this ensures that every attempt at a
2229          match fails. We can't just fail here, because of the possibility of
2230          quantifiers with zero minima.
2231    
2232          (b) If the JavaScript compatibility flag is set, set the length to zero
2233          so that the back reference matches an empty string.
2234    
2235          Otherwise, set the length to the length of what was matched by the
2236          referenced subpattern. */
2237    
2238        /* If the reference is unset, set the length to be longer than the amount        if (offset >= offset_top || md->offset_vector[offset] < 0)
2239        of subject left; this ensures that every attempt at a match fails. We          length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2240        can't just fail here, because of the possibility of quantifiers with zero        else
2241        minima. */          length = md->offset_vector[offset+1] - md->offset_vector[offset];
   
       length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
         md->end_subject - eptr + 1 :  
         md->offset_vector[offset+1] - md->offset_vector[offset];  
2242    
2243        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2244    
# Line 1560  for (;;) Line 2267  for (;;)
2267          break;          break;
2268    
2269          default:               /* No repeat follows */          default:               /* No repeat follows */
2270          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2271              {
2272              CHECK_PARTIAL();
2273              MRRETURN(MATCH_NOMATCH);
2274              }
2275          eptr += length;          eptr += length;
2276          continue;              /* With the main loop */          continue;              /* With the main loop */
2277          }          }
# Line 1576  for (;;) Line 2287  for (;;)
2287    
2288        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2289          {          {
2290          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2291              {
2292              CHECK_PARTIAL();
2293              MRRETURN(MATCH_NOMATCH);
2294              }
2295          eptr += length;          eptr += length;
2296          }          }
2297    
# Line 1591  for (;;) Line 2306  for (;;)
2306          {          {
2307          for (fi = min;; fi++)          for (fi = min;; fi++)
2308            {            {
2309            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2310            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2311            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2312              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2313                {
2314                CHECK_PARTIAL();
2315                MRRETURN(MATCH_NOMATCH);
2316                }
2317            eptr += length;            eptr += length;
2318            }            }
2319          /* Control never gets here */          /* Control never gets here */
# Line 1607  for (;;) Line 2326  for (;;)
2326          pp = eptr;          pp = eptr;
2327          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2328            {            {
2329            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2330                {
2331                CHECK_PARTIAL();
2332                break;
2333                }
2334            eptr += length;            eptr += length;
2335            }            }
2336          while (eptr >= pp)          while (eptr >= pp)
2337            {            {
2338            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2339            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2340            eptr -= length;            eptr -= length;
2341            }            }
2342          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2343          }          }
2344        }        }
2345      /* Control never gets here */      /* Control never gets here */
2346    
   
   
2347      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2348      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2349      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1677  for (;;) Line 2398  for (;;)
2398          {          {
2399          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2400            {            {
2401            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2402                {
2403                SCHECK_PARTIAL();
2404                MRRETURN(MATCH_NOMATCH);
2405                }
2406            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2407            if (c > 255)            if (c > 255)
2408              {              {
2409              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2410              }              }
2411            else            else
2412              {              {
2413              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2414              }              }
2415            }            }
2416          }          }
# Line 1695  for (;;) Line 2420  for (;;)
2420          {          {
2421          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2422            {            {
2423            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2424                {
2425                SCHECK_PARTIAL();
2426                MRRETURN(MATCH_NOMATCH);
2427                }
2428            c = *eptr++;            c = *eptr++;
2429            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2430            }            }
2431          }          }
2432    
# Line 1717  for (;;) Line 2446  for (;;)
2446            {            {
2447            for (fi = min;; fi++)            for (fi = min;; fi++)
2448              {              {
2449              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2450              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2451              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2452                if (eptr >= md->end_subject)
2453                  {
2454                  SCHECK_PARTIAL();
2455                  MRRETURN(MATCH_NOMATCH);
2456                  }
2457              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2458              if (c > 255)              if (c > 255)
2459                {                {
2460                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2461                }                }
2462              else              else
2463                {                {
2464                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2465                }                }
2466              }              }
2467            }            }
# Line 1737  for (;;) Line 2471  for (;;)
2471            {            {
2472            for (fi = min;; fi++)            for (fi = min;; fi++)
2473              {              {
2474              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2475              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2476              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2477                if (eptr >= md->end_subject)
2478                  {
2479                  SCHECK_PARTIAL();
2480                  MRRETURN(MATCH_NOMATCH);
2481                  }
2482              c = *eptr++;              c = *eptr++;
2483              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2484              }              }
2485            }            }
2486          /* Control never gets here */          /* Control never gets here */
# Line 1760  for (;;) Line 2499  for (;;)
2499            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2500              {              {
2501              int len = 1;              int len = 1;
2502              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2503                  {
2504                  SCHECK_PARTIAL();
2505                  break;
2506                  }
2507              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2508              if (c > 255)              if (c > 255)
2509                {                {
# Line 1774  for (;;) Line 2517  for (;;)
2517              }              }
2518            for (;;)            for (;;)
2519              {              {
2520              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2521              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2522              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2523              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1786  for (;;) Line 2529  for (;;)
2529            {            {
2530            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2531              {              {
2532              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2533                  {
2534                  SCHECK_PARTIAL();
2535                  break;
2536                  }
2537              c = *eptr;              c = *eptr;
2538              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2539              eptr++;              eptr++;
2540              }              }
2541            while (eptr >= pp)            while (eptr >= pp)
2542              {              {
2543              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2544              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2545              eptr--;              eptr--;
2546              }              }
2547            }            }
2548    
2549          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2550          }          }
2551        }        }
2552      /* Control never gets here */      /* Control never gets here */
2553    
2554    
2555      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2556      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2557        mode, because Unicode properties are supported in non-UTF-8 mode. */
2558    
2559  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2560      case OP_XCLASS:      case OP_XCLASS:
# Line 1847  for (;;) Line 2595  for (;;)
2595    
2596        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2597          {          {
2598          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2599          GETCHARINC(c, eptr);            {
2600          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2601              MRRETURN(MATCH_NOMATCH);
2602              }
2603            GETCHARINCTEST(c, eptr);
2604            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2605          }          }
2606    
2607        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1864  for (;;) Line 2616  for (;;)
2616          {          {
2617          for (fi = min;; fi++)          for (fi = min;; fi++)
2618            {            {
2619            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2620            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2621            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2622            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2623            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2624                SCHECK_PARTIAL();
2625                MRRETURN(MATCH_NOMATCH);
2626                }
2627              GETCHARINCTEST(c, eptr);
2628              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2629            }            }
2630          /* Control never gets here */          /* Control never gets here */
2631          }          }
# Line 1881  for (;;) Line 2638  for (;;)
2638          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2639            {            {
2640            int len = 1;            int len = 1;
2641            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2642            GETCHARLEN(c, eptr, len);              {
2643                SCHECK_PARTIAL();
2644                break;
2645                }
2646              GETCHARLENTEST(c, eptr, len);
2647            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2648            eptr += len;            eptr += len;
2649            }            }
2650          for(;;)          for(;;)
2651            {            {
2652            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2653            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2654            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2655            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2656            }            }
2657          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2658          }          }
2659    
2660        /* Control never gets here */        /* Control never gets here */
# Line 1909  for (;;) Line 2670  for (;;)
2670        length = 1;        length = 1;
2671        ecode++;        ecode++;
2672        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2673        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2674        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2675            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2676            MRRETURN(MATCH_NOMATCH);
2677            }
2678          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2679        }        }
2680      else      else
2681  #endif  #endif
2682    
2683      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2684        {        {
2685        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2686        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2687            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2688            MRRETURN(MATCH_NOMATCH);
2689            }
2690          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2691        ecode += 2;        ecode += 2;
2692        }        }
2693      break;      break;
# Line 1933  for (;;) Line 2702  for (;;)
2702        ecode++;        ecode++;
2703        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2704    
2705        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2706            {
2707            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2708            MRRETURN(MATCH_NOMATCH);
2709            }
2710    
2711        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2712        can use the fast lookup table. */        can use the fast lookup table. */
2713    
2714        if (fc < 128)        if (fc < 128)
2715          {          {
2716          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2717          }          }
2718    
2719        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
# Line 1957  for (;;) Line 2730  for (;;)
2730          if (fc != dc)          if (fc != dc)
2731            {            {
2732  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2733            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2734  #endif  #endif
2735              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2736            }            }
2737          }          }
2738        }        }
# Line 1968  for (;;) Line 2741  for (;;)
2741    
2742      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2743        {        {
2744        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2745        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2746            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2747            MRRETURN(MATCH_NOMATCH);
2748            }
2749          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2750        ecode += 2;        ecode += 2;
2751        }        }
2752      break;      break;
# Line 2022  for (;;) Line 2799  for (;;)
2799      case OP_MINQUERY:      case OP_MINQUERY:
2800      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2801      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2802    
2803      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2804      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2805      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2806    
2807      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2808    
2809      REPEATCHAR:      REPEATCHAR:
2810  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2037  for (;;) Line 2813  for (;;)
2813        length = 1;        length = 1;
2814        charptr = ecode;        charptr = ecode;
2815        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2816        ecode += length;        ecode += length;
2817    
2818        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2045  for (;;) Line 2820  for (;;)
2820    
2821        if (length > 1)        if (length > 1)
2822          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2823  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2824          unsigned int othercase;          unsigned int othercase;
2825          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2826              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2827            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2828            else oclength = 0;
2829  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2830    
2831          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2832            {            {
2833            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2834            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2835            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2836              else if (oclength > 0 &&
2837                       eptr <= md->end_subject - oclength &&
2838                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2839    #endif  /* SUPPORT_UCP */
2840            else            else
2841              {              {
2842              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2843              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2844              }              }
2845            }            }
2846    
# Line 2073  for (;;) Line 2850  for (;;)
2850            {            {
2851            for (fi = min;; fi++)            for (fi = min;; fi++)
2852              {              {
2853              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2854              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2855              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2856              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2857              /* Need braces because of following else */                memcmp(eptr, charptr, length) == 0) eptr += length;
2858              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2859                else if (oclength > 0 &&
2860                         eptr <= md->end_subject - oclength &&
2861                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2862    #endif  /* SUPPORT_UCP */
2863              else              else
2864                {                {
2865                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2866                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2867                }                }
2868              }              }
2869            /* Control never gets here */            /* Control never gets here */
# Line 2093  for (;;) Line 2874  for (;;)
2874            pp = eptr;            pp = eptr;
2875            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2876              {              {
2877              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2878              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2879              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2880                else if (oclength > 0 &&
2881                         eptr <= md->end_subject - oclength &&
2882                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2883    #endif  /* SUPPORT_UCP */
2884              else              else
2885                {                {
2886                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2887                eptr += oclength;                break;
2888                }                }
2889              }              }
2890    
2891            if (possessive) continue;            if (possessive) continue;
2892            while (eptr >= pp)  
2893             {            for(;;)
2894             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              {
2895             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2896             eptr -= length;              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2897             }              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2898            RRETURN(MATCH_NOMATCH);  #ifdef SUPPORT_UCP
2899                eptr--;
2900                BACKCHAR(eptr);
2901    #else   /* without SUPPORT_UCP */
2902                eptr -= length;
2903    #endif  /* SUPPORT_UCP */
2904                }
2905            }            }
2906          /* Control never gets here */          /* Control never gets here */
2907          }          }
# Line 2123  for (;;) Line 2914  for (;;)
2914  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2915    
2916      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2917        {  
2918        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2919    
2920      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2921      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2144  for (;;) Line 2933  for (;;)
2933        {        {
2934        fc = md->lcc[fc];        fc = md->lcc[fc];
2935        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2936          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2937            if (eptr >= md->end_subject)
2938              {
2939              SCHECK_PARTIAL();
2940              MRRETURN(MATCH_NOMATCH);
2941              }
2942            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2943            }
2944        if (min == max) continue;        if (min == max) continue;
2945        if (minimize)        if (minimize)
2946          {          {
2947          for (fi = min;; fi++)          for (fi = min;; fi++)
2948            {            {
2949            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2950            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2951            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2952                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2953              RRETURN(MATCH_NOMATCH);              {
2954                SCHECK_PARTIAL();
2955                MRRETURN(MATCH_NOMATCH);
2956                }
2957              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2958            }            }
2959          /* Control never gets here */          /* Control never gets here */
2960          }          }
# Line 2163  for (;;) Line 2963  for (;;)
2963          pp = eptr;          pp = eptr;
2964          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2965            {            {
2966            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2967                {
2968                SCHECK_PARTIAL();
2969                break;
2970                }
2971              if (fc != md->lcc[*eptr]) break;
2972            eptr++;            eptr++;
2973            }            }
2974    
2975          if (possessive) continue;          if (possessive) continue;
2976    
2977          while (eptr >= pp)          while (eptr >= pp)
2978            {            {
2979            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2980            eptr--;            eptr--;
2981            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2982            }            }
2983          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2984          }          }
2985        /* Control never gets here */        /* Control never gets here */
2986        }        }
# Line 2182  for (;;) Line 2989  for (;;)
2989    
2990      else      else
2991        {        {
2992        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
2993            {
2994            if (eptr >= md->end_subject)
2995              {
2996              SCHECK_PARTIAL();
2997              MRRETURN(MATCH_NOMATCH);
2998              }
2999            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3000            }
3001    
3002        if (min == max) continue;        if (min == max) continue;
3003    
3004        if (minimize)        if (minimize)
3005          {          {
3006          for (fi = min;; fi++)          for (fi = min;; fi++)
3007            {            {
3008            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3009            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3010            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3011              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3012                {
3013                SCHECK_PARTIAL();
3014                MRRETURN(MATCH_NOMATCH);
3015                }
3016              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3017            }            }
3018          /* Control never gets here */          /* Control never gets here */
3019          }          }
# Line 2200  for (;;) Line 3022  for (;;)
3022          pp = eptr;          pp = eptr;
3023          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3024            {            {
3025            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
3026                {
3027                SCHECK_PARTIAL();
3028                break;
3029                }
3030              if (fc != *eptr) break;
3031            eptr++;            eptr++;
3032            }            }
3033          if (possessive) continue;          if (possessive) continue;
3034    
3035          while (eptr >= pp)          while (eptr >= pp)
3036            {            {
3037            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3038            eptr--;            eptr--;
3039            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3040            }            }
3041          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3042          }          }
3043        }        }
3044      /* Control never gets here */      /* Control never gets here */
# Line 2219  for (;;) Line 3047  for (;;)
3047      checking can be multibyte. */      checking can be multibyte. */
3048    
3049      case OP_NOT:      case OP_NOT:
3050      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
3051          {
3052          SCHECK_PARTIAL();
3053          MRRETURN(MATCH_NOMATCH);
3054          }
3055      ecode++;      ecode++;
3056      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3057      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2228  for (;;) Line 3060  for (;;)
3060        if (c < 256)        if (c < 256)
3061  #endif  #endif
3062        c = md->lcc[c];        c = md->lcc[c];
3063        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3064        }        }
3065      else      else
3066        {        {
3067        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3068        }        }
3069      break;      break;
3070    
# Line 2296  for (;;) Line 3128  for (;;)
3128      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3129      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3130    
3131      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3132    
3133      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3134      fc = *ecode++;      fc = *ecode++;
3135    
3136      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2326  for (;;) Line 3155  for (;;)
3155          register unsigned int d;          register unsigned int d;
3156          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3157            {            {
3158              if (eptr >= md->end_subject)
3159                {
3160                SCHECK_PARTIAL();
3161                MRRETURN(MATCH_NOMATCH);
3162                }
3163            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3164            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3165            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3166            }            }
3167          }          }
3168        else        else
# Line 2337  for (;;) Line 3171  for (;;)
3171        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3172          {          {
3173          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3174            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3175              if (eptr >= md->end_subject)
3176                {
3177                SCHECK_PARTIAL();
3178                MRRETURN(MATCH_NOMATCH);
3179                }
3180              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3181              }
3182          }          }
3183    
3184        if (min == max) continue;        if (min == max) continue;
# Line 2351  for (;;) Line 3192  for (;;)
3192            register unsigned int d;            register unsigned int d;
3193            for (fi = min;; fi++)            for (fi = min;; fi++)
3194              {              {
3195              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3196              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3197                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3198                if (eptr >= md->end_subject)
3199                  {
3200                  SCHECK_PARTIAL();
3201                  MRRETURN(MATCH_NOMATCH);
3202                  }
3203              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3204              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3205              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3206              }              }
3207            }            }
3208          else          else
# Line 2365  for (;;) Line 3211  for (;;)
3211            {            {
3212            for (fi = min;; fi++)            for (fi = min;; fi++)
3213              {              {
3214              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3215              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3216              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3217                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3218                  {
3219                  SCHECK_PARTIAL();
3220                  MRRETURN(MATCH_NOMATCH);
3221                  }
3222                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3223              }              }
3224            }            }
3225          /* Control never gets here */          /* Control never gets here */
# Line 2388  for (;;) Line 3239  for (;;)
3239            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3240              {              {
3241              int len = 1;              int len = 1;
3242              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3243                  {
3244                  SCHECK_PARTIAL();
3245                  break;
3246                  }
3247              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3248              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3249              if (fc == d) break;              if (fc == d) break;
# Line 2397  for (;;) Line 3252  for (;;)
3252          if (possessive) continue;          if (possessive) continue;
3253          for(;;)          for(;;)
3254              {              {
3255              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3256              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3257              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3258              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2409  for (;;) Line 3264  for (;;)
3264            {            {
3265            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3266              {              {
3267              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3268                  {
3269                  SCHECK_PARTIAL();
3270                  break;
3271                  }
3272                if (fc == md->lcc[*eptr]) break;
3273              eptr++;              eptr++;
3274              }              }
3275            if (possessive) continue;            if (possessive) continue;
3276            while (eptr >= pp)            while (eptr >= pp)
3277              {              {
3278              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3279              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3280              eptr--;              eptr--;
3281              }              }
3282            }            }
3283    
3284          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3285          }          }
3286        /* Control never gets here */        /* Control never gets here */
3287        }        }
# Line 2437  for (;;) Line 3297  for (;;)
3297          register unsigned int d;          register unsigned int d;
3298          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3299            {            {
3300              if (eptr >= md->end_subject)
3301                {
3302                SCHECK_PARTIAL();
3303                MRRETURN(MATCH_NOMATCH);
3304                }
3305            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3306            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3307            }            }
3308          }          }
3309        else        else
# Line 2446  for (;;) Line 3311  for (;;)
3311        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3312          {          {
3313          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3314            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3315              if (eptr >= md->end_subject)
3316                {
3317                SCHECK_PARTIAL();
3318                MRRETURN(MATCH_NOMATCH);
3319                }
3320              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3321              }
3322          }          }
3323    
3324        if (min == max) continue;        if (min == max) continue;
# Line 2460  for (;;) Line 3332  for (;;)
3332            register unsigned int d;            register unsigned int d;
3333            for (fi = min;; fi++)            for (fi = min;; fi++)
3334              {              {
3335              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3336              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3337                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3338                if (eptr >= md->end_subject)
3339                  {
3340                  SCHECK_PARTIAL();
3341                  MRRETURN(MATCH_NOMATCH);
3342                  }
3343              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3344              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3345              }              }
3346            }            }
3347          else          else
# Line 2473  for (;;) Line 3350  for (;;)
3350            {            {
3351            for (fi = min;; fi++)            for (fi = min;; fi++)
3352              {              {
3353              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3354              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3355              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3356                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3357                  {
3358                  SCHECK_PARTIAL();
3359                  MRRETURN(MATCH_NOMATCH);
3360                  }
3361                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3362              }              }
3363            }            }
3364          /* Control never gets here */          /* Control never gets here */
# Line 2496  for (;;) Line 3378  for (;;)
3378            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3379              {              {
3380              int len = 1;              int len = 1;
3381              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3382                  {
3383                  SCHECK_PARTIAL();
3384                  break;
3385                  }
3386              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3387              if (fc == d) break;              if (fc == d) break;
3388              eptr += len;              eptr += len;
# Line 2504  for (;;) Line 3390  for (;;)
3390            if (possessive) continue;            if (possessive) continue;
3391            for(;;)            for(;;)
3392              {              {
3393              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3394              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3395              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3396              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2516  for (;;) Line 3402  for (;;)
3402            {            {
3403            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3404              {              {
3405              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3406                  {
3407                  SCHECK_PARTIAL();
3408                  break;
3409                  }
3410                if (fc == *eptr) break;
3411              eptr++;              eptr++;
3412              }              }
3413            if (possessive) continue;            if (possessive) continue;
3414            while (eptr >= pp)            while (eptr >= pp)
3415              {              {
3416              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3417              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3418              eptr--;              eptr--;
3419              }              }
3420            }            }
3421    
3422          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3423          }          }
3424        }        }
3425      /* Control never gets here */      /* Control never gets here */
# Line 2610  for (;;) Line 3501  for (;;)
3501    
3502      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3503      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3504      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3505      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3506      and single-bytes. */      and single-bytes. */
3507    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3508      if (min > 0)      if (min > 0)
3509        {        {
3510  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2625  for (;;) Line 3513  for (;;)
3513          switch(prop_type)          switch(prop_type)
3514            {            {
3515            case PT_ANY:            case PT_ANY:
3516            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3517            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3518              {              {
3519              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3520              GETCHARINC(c, eptr);                {
3521                  SCHECK_PARTIAL();
3522                  MRRETURN(MATCH_NOMATCH);
3523                  }
3524                GETCHARINCTEST(c, eptr);
3525              }              }
3526            break;            break;
3527    
3528            case PT_LAMP:            case PT_LAMP:
3529            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3530              {              {
3531              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3532              GETCHARINC(c, eptr);                {
3533              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3534                  MRRETURN(MATCH_NOMATCH);
3535                  }
3536                GETCHARINCTEST(c, eptr);
3537                prop_chartype = UCD_CHARTYPE(c);
3538              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3539                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3540                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3541                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3542              }              }
3543            break;            break;
3544    
3545            case PT_GC:            case PT_GC:
3546            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3547              {              {
3548              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3549              GETCHARINC(c, eptr);                {
3550              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3551                  MRRETURN(MATCH_NOMATCH);
3552                  }
3553                GETCHARINCTEST(c, eptr);
3554                prop_category = UCD_CATEGORY(c);
3555              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3556                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3557              }              }
3558            break;            break;
3559    
3560            case PT_PC:            case PT_PC:
3561            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3562              {              {
3563              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3564              GETCHARINC(c, eptr);                {
3565              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3566                  MRRETURN(MATCH_NOMATCH);
3567                  }
3568                GETCHARINCTEST(c, eptr);
3569                prop_chartype = UCD_CHARTYPE(c);
3570              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3571                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3572              }              }
3573            break;            break;
3574    
3575            case PT_SC:            case PT_SC:
3576            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3577              {              {
3578              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3579              GETCHARINC(c, eptr);                {
3580              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3581                  MRRETURN(MATCH_NOMATCH);
3582                  }
3583                GETCHARINCTEST(c, eptr);
3584                prop_script = UCD_SCRIPT(c);
3585              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3586                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3587                }
3588              break;
3589    
3590              case PT_ALNUM:
3591              for (i = 1; i <= min; i++)
3592                {
3593                if (eptr >= md->end_subject)
3594                  {
3595                  SCHECK_PARTIAL();
3596                  MRRETURN(MATCH_NOMATCH);
3597                  }
3598                GETCHARINCTEST(c, eptr);
3599                prop_category = UCD_CATEGORY(c);
3600                if ((prop_category == ucp_L || prop_category == ucp_N)
3601                       == prop_fail_result)
3602                  MRRETURN(MATCH_NOMATCH);
3603                }
3604              break;
3605    
3606              case PT_SPACE:    /* Perl space */
3607              for (i = 1; i <= min; i++)
3608                {
3609                if (eptr >= md->end_subject)
3610                  {
3611                  SCHECK_PARTIAL();
3612                  MRRETURN(MATCH_NOMATCH);
3613                  }
3614                GETCHARINCTEST(c, eptr);
3615                prop_category = UCD_CATEGORY(c);
3616                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3617                     c == CHAR_FF || c == CHAR_CR)
3618                       == prop_fail_result)
3619                  MRRETURN(MATCH_NOMATCH);
3620                }
3621              break;
3622    
3623              case PT_PXSPACE:  /* POSIX space */
3624              for (i = 1; i <= min; i++)
3625                {
3626                if (eptr >= md->end_subject)
3627                  {
3628                  SCHECK_PARTIAL();
3629                  MRRETURN(MATCH_NOMATCH);
3630                  }
3631                GETCHARINCTEST(c, eptr);
3632                prop_category = UCD_CATEGORY(c);
3633                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3634                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3635                       == prop_fail_result)
3636                  MRRETURN(MATCH_NOMATCH);
3637                }
3638              break;
3639    
3640              case PT_WORD:
3641              for (i = 1; i <= min; i++)
3642                {
3643                if (eptr >= md->end_subject)
3644                  {
3645                  SCHECK_PARTIAL();
3646                  MRRETURN(MATCH_NOMATCH);
3647                  }
3648                GETCHARINCTEST(c, eptr);
3649                prop_category = UCD_CATEGORY(c);
3650                if ((prop_category == ucp_L || prop_category == ucp_N ||
3651                     c == CHAR_UNDERSCORE)
3652                       == prop_fail_result)
3653                  MRRETURN(MATCH_NOMATCH);
3654              }              }
3655            break;            break;
3656    
3657              /* This should not occur */
3658    
3659            default:            default:
3660            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
3661            }            }
# Line 2691  for (;;) Line 3668  for (;;)
3668          {          {
3669          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3670            {            {
3671              if (eptr >= md->end_subject)
3672                {
3673                SCHECK_PARTIAL();
3674                MRRETURN(MATCH_NOMATCH);
3675                }
3676            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3677            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3678            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3679            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3680              {              {
3681              int len = 1;              int len = 1;
3682              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3683                {                else { GETCHARLEN(c, eptr, len); }
3684                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3685              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3686              eptr += len;              eptr += len;
3687              }              }
# Line 2719  for (;;) Line 3699  for (;;)
3699          case OP_ANY:          case OP_ANY:
3700          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3701            {            {
3702            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3703                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))              {
3704              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3705                MRRETURN(MATCH_NOMATCH);
3706                }
3707              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3708              eptr++;
3709              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3710              }
3711            break;
3712    
3713            case OP_ALLANY:
3714            for (i = 1; i <= min; i++)
3715              {
3716              if (eptr >= md->end_subject)
3717                {
3718                SCHECK_PARTIAL();
3719                MRRETURN(MATCH_NOMATCH);
3720                }
3721            eptr++;            eptr++;
3722            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3723            }            }
3724          break;          break;
3725    
3726          case OP_ANYBYTE:          case OP_ANYBYTE:
3727            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3728          eptr += min;          eptr += min;
3729          break;          break;
3730    
3731          case OP_ANYNL:          case OP_ANYNL:
3732          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3733            {            {
3734            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3735                {
3736                SCHECK_PARTIAL();
3737                MRRETURN(MATCH_NOMATCH);
3738                }
3739            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3740            switch(c)            switch(c)
3741              {              {
3742              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3743              case 0x000d:              case 0x000d:
3744              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3745              break;              break;
3746    
3747              case 0x000a:              case 0x000a:
3748                break;
3749    
3750              case 0x000b:              case 0x000b:
3751              case 0x000c:              case 0x000c:
3752              case 0x0085:              case 0x0085:
3753              case 0x2028:              case 0x2028:
3754              case 0x2029:              case 0x2029:
3755                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3756              break;              break;
3757              }              }
3758            }            }
3759          break;          break;
3760    
3761          case OP_NOT_DIGIT:          case OP_NOT_HSPACE:
3762          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3763            {            {
3764            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3765                {
3766                SCHECK_PARTIAL();
3767                MRRETURN(MATCH_NOMATCH);
3768                }
3769              GETCHARINC(c, eptr);
3770              switch(c)
3771                {
3772                default: break;
3773                case 0x09:      /* HT */
3774                case 0x20:      /* SPACE */
3775                case 0xa0:      /* NBSP */
3776                case 0x1680:    /* OGHAM SPACE MARK */
3777                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3778                case 0x2000:    /* EN QUAD */
3779                case 0x2001:    /* EM QUAD */
3780                case 0x2002:    /* EN SPACE */
3781                case 0x2003:    /* EM SPACE */
3782                case 0x2004:    /* THREE-PER-EM SPACE */
3783                case 0x2005:    /* FOUR-PER-EM SPACE */
3784                case 0x2006:    /* SIX-PER-EM SPACE */
3785                case 0x2007:    /* FIGURE SPACE */
3786                case 0x2008:    /* PUNCTUATION SPACE */
3787                case 0x2009:    /* THIN SPACE */
3788                case 0x200A:    /* HAIR SPACE */
3789                case 0x202f:    /* NARROW NO-BREAK SPACE */
3790                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3791                case 0x3000:    /* IDEOGRAPHIC SPACE */
3792                MRRETURN(MATCH_NOMATCH);
3793                }
3794              }
3795            break;
3796    
3797            case OP_HSPACE:
3798            for (i = 1; i <= min; i++)
3799              {
3800              if (eptr >= md->end_subject)
3801                {
3802                SCHECK_PARTIAL();
3803                MRRETURN(MATCH_NOMATCH);
3804                }
3805              GETCHARINC(c, eptr);
3806              switch(c)
3807                {
3808                default: MRRETURN(MATCH_NOMATCH);
3809                case 0x09:      /* HT */
3810                case 0x20:      /* SPACE */
3811                case 0xa0:      /* NBSP */
3812                case 0x1680:    /* OGHAM SPACE MARK */
3813                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3814                case 0x2000:    /* EN QUAD */
3815                case 0x2001:    /* EM QUAD */
3816                case 0x2002:    /* EN SPACE */
3817                case 0x2003:    /* EM SPACE */
3818                case 0x2004:    /* THREE-PER-EM SPACE */
3819                case 0x2005:    /* FOUR-PER-EM SPACE */
3820                case 0x2006:    /* SIX-PER-EM SPACE */
3821                case 0x2007:    /* FIGURE SPACE */
3822                case 0x2008:    /* PUNCTUATION SPACE */
3823                case 0x2009:    /* THIN SPACE */
3824                case 0x200A:    /* HAIR SPACE */
3825                case 0x202f:    /* NARROW NO-BREAK SPACE */
3826                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3827                case 0x3000:    /* IDEOGRAPHIC SPACE */
3828                break;
3829                }
3830              }
3831            break;
3832    
3833            case OP_NOT_VSPACE:
3834            for (i = 1; i <= min; i++)
3835              {
3836              if (eptr >= md->end_subject)
3837                {
3838                SCHECK_PARTIAL();
3839                MRRETURN(MATCH_NOMATCH);
3840                }
3841              GETCHARINC(c, eptr);
3842              switch(c)
3843                {
3844                default: break;
3845                case 0x0a:      /* LF */
3846                case 0x0b:      /* VT */
3847                case 0x0c:      /* FF */
3848                case 0x0d:      /* CR */
3849                case 0x85:      /* NEL */
3850                case 0x2028:    /* LINE SEPARATOR */
3851                case 0x2029:    /* PARAGRAPH SEPARATOR */
3852                MRRETURN(MATCH_NOMATCH);
3853                }
3854              }
3855            break;
3856    
3857            case OP_VSPACE:
3858            for (i = 1; i <= min; i++)
3859              {
3860              if (eptr >= md->end_subject)
3861                {
3862                SCHECK_PARTIAL();
3863                MRRETURN(MATCH_NOMATCH);
3864                }
3865              GETCHARINC(c, eptr);
3866              switch(c)
3867                {
3868                default: MRRETURN(MATCH_NOMATCH);
3869                case 0x0a:      /* LF */
3870                case 0x0b:      /* VT */
3871                case 0x0c:      /* FF */
3872                case 0x0d:      /* CR */
3873                case 0x85:      /* NEL */
3874                case 0x2028:    /* LINE SEPARATOR */
3875                case 0x2029:    /* PARAGRAPH SEPARATOR */
3876                break;
3877                }
3878              }
3879            break;
3880    
3881            case OP_NOT_DIGIT:
3882            for (i = 1; i <= min; i++)
3883              {
3884              if (eptr >= md->end_subject)
3885                {
3886                SCHECK_PARTIAL();
3887                MRRETURN(MATCH_NOMATCH);
3888                }
3889            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3890            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3891              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
3892            }            }
3893          break;          break;
3894    
3895          case OP_DIGIT:          case OP_DIGIT:
3896          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3897            {            {
3898            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3899               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3900              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3901                MRRETURN(MATCH_NOMATCH);
3902                }
3903              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3904                MRRETURN(MATCH_NOMATCH);
3905            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3906            }            }
3907          break;          break;
# Line 2776  for (;;) Line 3909  for (;;)
3909          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3910          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3911            {            {
3912            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3913               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3914              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3915            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3916                }
3917              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3918                MRRETURN(MATCH_NOMATCH);
3919              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3920            }            }
3921          break;          break;
3922    
3923          case OP_WHITESPACE:          case OP_WHITESPACE:
3924          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3925            {            {
3926            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3927               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3928              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3929                MRRETURN(MATCH_NOMATCH);
3930                }
3931              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3932                MRRETURN(MATCH_NOMATCH);
3933            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3934            }            }
3935          break;          break;
# Line 2796  for (;;) Line 3937  for (;;)
3937          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3938          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3939            {            {
3940            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3941               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))              {
3942              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3943            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3944                }
3945              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3946                MRRETURN(MATCH_NOMATCH);
3947              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3948            }            }
3949          break;          break;
3950    
3951          case OP_WORDCHAR:          case OP_WORDCHAR:
3952          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3953            {            {
3954            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3955               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3956              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3957                MRRETURN(MATCH_NOMATCH);
3958                }
3959              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3960                MRRETURN(MATCH_NOMATCH);
3961            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3962            }            }
3963          break;          break;
# Line 2821  for (;;) Line 3970  for (;;)
3970  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
3971    
3972        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
3973        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
3974    
3975        switch(ctype)        switch(ctype)
3976          {          {
3977          case OP_ANY:          case OP_ANY:
3978          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3979            {            {
3980            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
3981              {              {
3982              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3983              eptr++;              MRRETURN(MATCH_NOMATCH);
3984              }              }
3985              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3986              eptr++;
3987            }            }
         else eptr += min;  
3988          break;          break;
3989    
3990          case OP_ANYBYTE:          case OP_ALLANY:
3991            if (eptr > md->end_subject - min)
3992              {
3993              SCHECK_PARTIAL();
3994              MRRETURN(MATCH_NOMATCH);
3995              }
3996          eptr += min;          eptr += min;
3997          break;          break;
3998    
3999          /* Because of the CRLF case, we can't assume the minimum number of          case OP_ANYBYTE:
4000          bytes are present in this case. */          if (eptr > md->end_subject - min)
4001              {
4002              SCHECK_PARTIAL();
4003              MRRETURN(MATCH_NOMATCH);
4004              }
4005            eptr += min;
4006            break;
4007    
4008          case OP_ANYNL:          case OP_ANYNL:
4009          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4010            {            {
4011            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
4012                {
4013                SCHECK_PARTIAL();
4014                MRRETURN(MATCH_NOMATCH);
4015                }
4016            switch(*eptr++)            switch(*eptr++)
4017              {              {
4018              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4019              case 0x000d:              case 0x000d:
4020              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4021              break;              break;
4022              case 0x000a:              case 0x000a:
4023                break;
4024    
4025              case 0x000b:              case 0x000b:
4026              case 0x000c:              case 0x000c:
4027              case 0x0085:              case 0x0085:
4028                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4029                break;
4030                }
4031              }
4032            break;
4033    
4034            case OP_NOT_HSPACE:
4035            for (i = 1; i <= min; i++)
4036              {
4037              if (eptr >= md->end_subject)
4038                {
4039                SCHECK_PARTIAL();
4040                MRRETURN(MATCH_NOMATCH);
4041                }
4042              switch(*eptr++)
4043                {
4044                default: break;
4045                case 0x09:      /* HT */
4046                case 0x20:      /* SPACE */
4047                case 0xa0:      /* NBSP */
4048                MRRETURN(MATCH_NOMATCH);
4049                }
4050              }
4051            break;
4052    
4053            case OP_HSPACE:
4054            for (i = 1; i <= min; i++)
4055              {
4056              if (eptr >= md->end_subject)
4057                {
4058                SCHECK_PARTIAL();
4059                MRRETURN(MATCH_NOMATCH);
4060                }
4061              switch(*eptr++)
4062                {
4063                default: MRRETURN(MATCH_NOMATCH);
4064                case 0x09:      /* HT */
4065                case 0x20:      /* SPACE */
4066                case 0xa0:      /* NBSP */
4067                break;
4068                }
4069              }
4070            break;
4071    
4072            case OP_NOT_VSPACE:
4073            for (i = 1; i <= min; i++)
4074              {
4075              if (eptr >= md->end_subject)
4076                {
4077                SCHECK_PARTIAL();
4078                MRRETURN(MATCH_NOMATCH);
4079                }
4080              switch(*eptr++)
4081                {
4082                default: break;
4083                case 0x0a:      /* LF */
4084                case 0x0b:      /* VT */
4085                case 0x0c:      /* FF */
4086                case 0x0d:      /* CR */
4087                case 0x85:      /* NEL */
4088                MRRETURN(MATCH_NOMATCH);
4089                }
4090              }
4091            break;
4092    
4093            case OP_VSPACE:
4094            for (i = 1; i <= min; i++)
4095              {
4096              if (eptr >= md->end_subject)
4097                {
4098                SCHECK_PARTIAL();
4099                MRRETURN(MATCH_NOMATCH);
4100                }
4101              switch(*eptr++)
4102                {
4103                default: MRRETURN(MATCH_NOMATCH);
4104                case 0x0a:      /* LF */
4105                case 0x0b:      /* VT */
4106                case 0x0c:      /* FF */
4107                case 0x0d:      /* CR */
4108                case 0x85:      /* NEL */
4109              break;              break;
4110              }              }
4111            }            }
# Line 2866  for (;;) Line 4113  for (;;)
4113    
4114          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
4115          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4116            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            {
4117              if (eptr >= md->end_subject)
4118                {
4119                SCHECK_PARTIAL();
4120                MRRETURN(MATCH_NOMATCH);
4121                }
4122              if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4123              }
4124          break;          break;
4125    
4126          case OP_DIGIT:          case OP_DIGIT:
4127          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4128            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            {
4129              if (eptr >= md->end_subject)
4130                {
4131                SCHECK_PARTIAL();
4132                MRRETURN(MATCH_NOMATCH);
4133                }
4134              if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4135              }
4136          break;          break;
4137    
4138          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4139          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4140            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            {
4141              if (eptr >= md->end_subject)
4142                {
4143                SCHECK_PARTIAL();
4144                MRRETURN(MATCH_NOMATCH);
4145                }
4146              if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4147              }
4148          break;          break;
4149    
4150          case OP_WHITESPACE:          case OP_WHITESPACE:
4151          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4152            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            {
4153              if (eptr >= md->end_subject)
4154                {
4155                SCHECK_PARTIAL();
4156                MRRETURN(MATCH_NOMATCH);
4157                }
4158              if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4159              }
4160          break;          break;
4161    
4162          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4163          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4164              {
4165              if (eptr >= md->end_subject)
4166                {
4167                SCHECK_PARTIAL();
4168                MRRETURN(MATCH_NOMATCH);
4169                }
4170            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
4171              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4172              }
4173          break;          break;
4174    
4175          case OP_WORDCHAR:          case OP_WORDCHAR:
4176          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4177              {
4178              if (eptr >= md->end_subject)
4179                {
4180                SCHECK_PARTIAL();
4181                MRRETURN(MATCH_NOMATCH);
4182                }
4183            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
4184              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4185              }
4186          break;          break;
4187    
4188          default:          default:
# Line 2919  for (;;) Line 4208  for (;;)
4208            case PT_ANY:            case PT_ANY:
4209            for (fi = min;; fi++)            for (fi = min;; fi++)
4210              {              {
4211              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
4212              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4213              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4214              GETCHARINC(c, eptr);              if (eptr >= md->end_subject)
4215              if (prop_fail_result) RRETURN(MATCH_NOMATCH);                {
4216                  SCHECK_PARTIAL();
4217                  MRRETURN(MATCH_NOMATCH);
4218                  }
4219                GETCHARINCTEST(c, eptr);
4220                if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4221              }              }
4222            /* Control never gets here */            /* Control never gets here */
4223    
4224            case PT_LAMP:            case PT_LAMP:
4225            for (fi = min;; fi++)            for (fi = min;; fi++)
4226              {              {
4227              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
4228              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4229              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4230              GETCHARINC(c, eptr);              if (eptr >= md->end_subject)
4231              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                {
4232                  SCHECK_PARTIAL();
4233                  MRRETURN(MATCH_NOMATCH);
4234                  }
4235                GETCHARINCTEST(c, eptr);
4236                prop_chartype = UCD_CHARTYPE(c);
4237              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4238                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
4239                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
4240                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
4241              }              }
4242            /* Control never gets here */            /* Control never gets here */
4243    
4244            case PT_GC:            case PT_GC:
4245            for (fi = min;; fi++)            for (fi = min;; fi++)
4246              {              {
4247              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
4248              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4249              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4250              GETCHARINC(c, eptr);