/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 130 by ph10, Mon Mar 26 15:09:47 2007 UTC revision 551 by ph10, Sun Oct 10 17:33:07 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55  /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  /* Undefine some potentially clashing cpp symbols */
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
56    
57  #define EPTR_WORK_SIZE (1000)  #undef min
58    #undef max
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 65  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 78  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 130  match_ref(int offset, register USPTR ept Line 151  match_ref(int offset, register USPTR ept
151  {  {
152  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
153    
154  #ifdef DEBUG  #ifdef PCRE_DEBUG
155  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
156    printf("matching subject <null>");    printf("matching subject <null>");
157  else  else
# Line 147  printf("\n"); Line 168  printf("\n");
168    
169  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
170    
171  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
172    properly if Unicode properties are supported. Otherwise, we can check only
173    ASCII characters. */
174    
175  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
176    {    {
177    #ifdef SUPPORT_UTF8
178    #ifdef SUPPORT_UCP
179      if (md->utf8)
180        {
181        USPTR endptr = eptr + length;
182        while (eptr < endptr)
183          {
184          int c, d;
185          GETCHARINC(c, eptr);
186          GETCHARINC(d, p);
187          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
188          }
189        }
190      else
191    #endif
192    #endif
193    
194      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
195      is no UCP support. */
196    
197    while (length-- > 0)    while (length-- > 0)
198      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
199    }    }
200    
201    /* In the caseful case, we can just compare the bytes, whether or not we
202    are in UTF-8 mode. */
203    
204  else  else
205    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
206    
# Line 183  calls by keeping local variables that ne Line 230  calls by keeping local variables that ne
230  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
231  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
232  always used to.  always used to.
233    
234    The original heap-recursive code used longjmp(). However, it seems that this
235    can be very slow on some operating systems. Following a suggestion from Stan
236    Switzer, the use of longjmp() has been abolished, at the cost of having to
237    provide a unique number for each call to RMATCH. There is no way of generating
238    a sequence of numbers at compile time in C. I have given them names, to make
239    them stand out more clearly.
240    
241    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
242    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
243    tests. Furthermore, not using longjmp() means that local dynamic variables
244    don't have indeterminate values; this has meant that the frame size can be
245    reduced because the result can be "passed back" by straight setting of the
246    variable instead of being passed in the frame.
247  ****************************************************************************  ****************************************************************************
248  ***************************************************************************/  ***************************************************************************/
249    
250    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
251    below must be updated in sync.  */
252    
253    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
254           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
255           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
256           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
257           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
258           RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
259           RM61,  RM62 };
260    
261  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
262  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
263    actually used in this definition. */
264    
265  #ifndef NO_RECURSE  #ifndef NO_RECURSE
266  #define REGISTER register  #define REGISTER register
267  #ifdef DEBUG  
268  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #ifdef PCRE_DEBUG
269    #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
270    { \    { \
271    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
272    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
273    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
274    }    }
275  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 205  versions and production versions. */ Line 278  versions and production versions. */
278    return ra; \    return ra; \
279    }    }
280  #else  #else
281  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
282    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
283  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
284  #endif  #endif
285    
286  #else  #else
287    
288    
289  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
290  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
291  match(), which never changes. */  argument of match(), which never changes. */
292    
293  #define REGISTER  #define REGISTER
294    
295  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
296    {\    {\
297    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
298    if (setjmp(frame->Xwhere) == 0)\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
299      {\    frame->Xwhere = rw; \
300      newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
301      newframe->Xecode = rb;\    newframe->Xecode = rb;\
302      newframe->Xoffset_top = rc;\    newframe->Xmstart = mstart;\
303      newframe->Xims = re;\    newframe->Xmarkptr = markptr;\
304      newframe->Xeptrb = rf;\    newframe->Xoffset_top = rc;\
305      newframe->Xflags = rg;\    newframe->Xims = re;\
306      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xeptrb = rf;\
307      newframe->Xprevframe = frame;\    newframe->Xflags = rg;\
308      frame = newframe;\    newframe->Xrdepth = frame->Xrdepth + 1;\
309      DPRINTF(("restarting from line %d\n", __LINE__));\    newframe->Xprevframe = frame;\
310      goto HEAP_RECURSE;\    frame = newframe;\
311      }\    DPRINTF(("restarting from line %d\n", __LINE__));\
312    else\    goto HEAP_RECURSE;\
313      {\    L_##rw:\
314      DPRINTF(("longjumped back to line %d\n", __LINE__));\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
315    }    }
316    
317  #define RRETURN(ra)\  #define RRETURN(ra)\
318    {\    {\
319    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
320    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
321    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
322    if (frame != NULL)\    if (frame != NULL)\
323      {\      {\
324      frame->Xresult = ra;\      rrc = ra;\
325      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
326      }\      }\
327    return ra;\    return ra;\
328    }    }
# Line 266  typedef struct heapframe { Line 335  typedef struct heapframe {
335    
336    /* Function arguments that may change */    /* Function arguments that may change */
337    
338    const uschar *Xeptr;    USPTR Xeptr;
339    const uschar *Xecode;    const uschar *Xecode;
340      USPTR Xmstart;
341      USPTR Xmarkptr;
342    int Xoffset_top;    int Xoffset_top;
343    long int Xims;    long int Xims;
344    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 276  typedef struct heapframe { Line 347  typedef struct heapframe {
347    
348    /* Function local variables */    /* Function local variables */
349    
350    const uschar *Xcallpat;    USPTR Xcallpat;
351    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
352    const uschar *Xdata;    USPTR Xcharptr;
353    const uschar *Xnext;  #endif
354    const uschar *Xpp;    USPTR Xdata;
355    const uschar *Xprev;    USPTR Xnext;
356    const uschar *Xsaved_eptr;    USPTR Xpp;
357      USPTR Xprev;
358      USPTR Xsaved_eptr;
359    
360    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
361    
# Line 303  typedef struct heapframe { Line 376  typedef struct heapframe {
376    uschar Xocchars[8];    uschar Xocchars[8];
377  #endif  #endif
378    
379      int Xcodelink;
380    int Xctype;    int Xctype;
381    unsigned int Xfc;    unsigned int Xfc;
382    int Xfi;    int Xfi;
# Line 318  typedef struct heapframe { Line 392  typedef struct heapframe {
392    
393    eptrblock Xnewptrb;    eptrblock Xnewptrb;
394    
395    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
396    
397    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
398    
399  } heapframe;  } heapframe;
400    
# Line 339  typedef struct heapframe { Line 412  typedef struct heapframe {
412    
413  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
414  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
415  same response.  same response. */
416    
417  Performance note: It might be tempting to extract commonly used fields from the  /* These macros pack up tests that are used for partial matching, and which
418  md structure (e.g. utf8, end_subject) into individual variables to improve  appears several times in the code. We set the "hit end" flag if the pointer is
419    at the end of the subject and also past the start of the subject (i.e.
420    something has been matched). For hard partial matching, we then return
421    immediately. The second one is used when we already know we are past the end of
422    the subject. */
423    
424    #define CHECK_PARTIAL()\
425      if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
426        {\
427        md->hitend = TRUE;\
428        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
429        }
430    
431    #define SCHECK_PARTIAL()\
432      if (md->partial != 0 && eptr > mstart)\
433        {\
434        md->hitend = TRUE;\
435        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\
436        }
437    
438    
439    /* Performance note: It might be tempting to extract commonly used fields from
440    the md structure (e.g. utf8, end_subject) into individual variables to improve
441  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
442  made performance worse.  made performance worse.
443    
444  Arguments:  Arguments:
445     eptr        pointer to current character in subject     eptr        pointer to current character in subject
446     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
447       mstart      pointer to the current match start position (can be modified
448                     by encountering \K)
449       markptr     pointer to the most recent MARK name, or NULL
450     offset_top  current top pointer     offset_top  current top pointer
451     md          pointer to "static" info for the match     md          pointer to "static" info for the match
452     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 358  Arguments: Line 456  Arguments:
456                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
457                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
458                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
459     rdepth      the recursion depth     rdepth      the recursion depth
460    
461  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
462                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
463                   a negative MATCH_xxx value for PRUNE, SKIP, etc
464                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
465                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
466  */  */
467    
468  static int  static int
469  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
470    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, unsigned long int ims,
471    int flags, unsigned int rdepth)    eptrblock *eptrb, int flags, unsigned int rdepth)
472  {  {
473  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
474  so they can be ordinary variables in all cases. Mark some of them with  so they can be ordinary variables in all cases. Mark some of them with
# Line 382  register unsigned int c; /* Character Line 480  register unsigned int c; /* Character
480  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
481    
482  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
483    int condcode;
484    
485  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
486  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 390  heap whenever RMATCH() does a "recursion Line 489  heap whenever RMATCH() does a "recursion
489    
490  #ifdef NO_RECURSE  #ifdef NO_RECURSE
491  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));
492    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
493  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
494    
495  /* Copy in the original argument variables */  /* Copy in the original argument variables */
496    
497  frame->Xeptr = eptr;  frame->Xeptr = eptr;
498  frame->Xecode = ecode;  frame->Xecode = ecode;
499    frame->Xmstart = mstart;
500    frame->Xmarkptr = markptr;
501  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
502  frame->Xims = ims;  frame->Xims = ims;
503  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 410  HEAP_RECURSE: Line 512  HEAP_RECURSE:
512    
513  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
514  #define ecode              frame->Xecode  #define ecode              frame->Xecode
515    #define mstart             frame->Xmstart
516    #define markptr            frame->Xmarkptr
517  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
518  #define ims                frame->Xims  #define ims                frame->Xims
519  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 422  HEAP_RECURSE: Line 526  HEAP_RECURSE:
526  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
527  #endif  #endif
528  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
529    #define codelink           frame->Xcodelink
530  #define data               frame->Xdata  #define data               frame->Xdata
531  #define next               frame->Xnext  #define next               frame->Xnext
532  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 502  int oclength; Line 607  int oclength;
607  uschar occhars[8];  uschar occhars[8];
608  #endif  #endif
609    
610    int codelink;
611  int ctype;  int ctype;
612  int length;  int length;
613  int max;  int max;
# Line 535  TAIL_RECURSE: Line 641  TAIL_RECURSE:
641  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
642  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
643  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
644  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
645  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
646  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
647  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
648    
649    #ifdef SUPPORT_UTF8
650    utf8 = md->utf8;       /* Local copy of the flag */
651    #else
652    utf8 = FALSE;
653    #endif
654    
655  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
656  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
657    
# Line 548  if (rdepth >= md->match_limit_recursion) Line 660  if (rdepth >= md->match_limit_recursion)
660    
661  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
662    
 #ifdef SUPPORT_UTF8  
 utf8 = md->utf8;       /* Local copy of the flag */  
 #else  
 utf8 = FALSE;  
 #endif  
   
663  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
664  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
665  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
666  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
667  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
668  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
669  already used. */  block that is used is on the stack, so a new one may be required for each
670    match(). */
671    
672  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
673    {    {
674    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
675    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
676      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
677    }    }
678    
679  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 583  for (;;) Line 683  for (;;)
683    minimize = possessive = FALSE;    minimize = possessive = FALSE;
684    op = *ecode;    op = *ecode;
685    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
686    switch(op)    switch(op)
687      {      {
688        case OP_MARK:
689        markptr = ecode + 2;
690        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
691          ims, eptrb, flags, RM55);
692    
693        /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
694        argument, and we must check whether that argument matches this MARK's
695        argument. It is passed back in md->start_match_ptr (an overloading of that
696        variable). If it does match, we reset that variable to the current subject
697        position and return MATCH_SKIP. Otherwise, pass back the return code
698        unaltered. */
699    
700        if (rrc == MATCH_SKIP_ARG &&
701            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
702          {
703          md->start_match_ptr = eptr;
704          RRETURN(MATCH_SKIP);
705          }
706    
707        if (md->mark == NULL) md->mark = markptr;
708        RRETURN(rrc);
709    
710        case OP_FAIL:
711        MRRETURN(MATCH_NOMATCH);
712    
713        /* COMMIT overrides PRUNE, SKIP, and THEN */
714    
715        case OP_COMMIT:
716        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
717          ims, eptrb, flags, RM52);
718        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
719            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
720            rrc != MATCH_THEN)
721          RRETURN(rrc);
722        MRRETURN(MATCH_COMMIT);
723    
724        /* PRUNE overrides THEN */
725    
726        case OP_PRUNE:
727        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
728          ims, eptrb, flags, RM51);
729        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
730        MRRETURN(MATCH_PRUNE);
731    
732        case OP_PRUNE_ARG:
733        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
734          ims, eptrb, flags, RM56);
735        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
736        md->mark = ecode + 2;
737        RRETURN(MATCH_PRUNE);
738    
739        /* SKIP overrides PRUNE and THEN */
740    
741        case OP_SKIP:
742        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
743          ims, eptrb, flags, RM53);
744        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
745          RRETURN(rrc);
746        md->start_match_ptr = eptr;   /* Pass back current position */
747        MRRETURN(MATCH_SKIP);
748    
749        case OP_SKIP_ARG:
750        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
751          ims, eptrb, flags, RM57);
752        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
753          RRETURN(rrc);
754    
755        /* Pass back the current skip name by overloading md->start_match_ptr and
756        returning the special MATCH_SKIP_ARG return code. This will either be
757        caught by a matching MARK, or get to the top, where it is treated the same
758        as PRUNE. */
759    
760        md->start_match_ptr = ecode + 2;
761        RRETURN(MATCH_SKIP_ARG);
762    
763        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
764        the alt that is at the start of the current branch. This makes it possible
765        to skip back past alternatives that precede the THEN within the current
766        branch. */
767    
768        case OP_THEN:
769        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
770          ims, eptrb, flags, RM54);
771        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
772        md->start_match_ptr = ecode - GET(ecode, 1);
773        MRRETURN(MATCH_THEN);
774    
775        case OP_THEN_ARG:
776        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
777          offset_top, md, ims, eptrb, flags, RM58);
778        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
779        md->start_match_ptr = ecode - GET(ecode, 1);
780        md->mark = ecode + LINK_SIZE + 2;
781        RRETURN(MATCH_THEN);
782    
783      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
784      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
785      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 612  for (;;) Line 799  for (;;)
799      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
800      offset = number << 1;      offset = number << 1;
801    
802  #ifdef DEBUG  #ifdef PCRE_DEBUG
803      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
804      printf("subject=");      printf("subject=");
805      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
# Line 627  for (;;) Line 814  for (;;)
814        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
815    
816        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
817        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
818            (int)(eptr - md->start_subject);
819    
820        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
821        do        do
822          {          {
823          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
824            ims, eptrb, flags);            ims, eptrb, flags, RM1);
825          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
826                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
827              RRETURN(rrc);
828          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
829          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
830          }          }
# Line 646  for (;;) Line 836  for (;;)
836        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
837        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
838    
839          if (rrc != MATCH_THEN) md->mark = markptr;
840        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
841        }        }
842    
843      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
844      bracket. */      as a non-capturing bracket. */
845    
846        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
847        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
848    
849      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
850    
851        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
852        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
853    
854      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
855      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
856      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
857      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
858        is set.*/
859    
860      case OP_BRA:      case OP_BRA:
861      case OP_SBRA:      case OP_SBRA:
# Line 665  for (;;) Line 863  for (;;)
863      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
864      for (;;)      for (;;)
865        {        {
866        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
867          {          {
868          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
869          flags |= match_tail_recursed;            {
870          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
871          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
872              goto TAIL_RECURSE;
873              }
874    
875            /* Possibly empty group; can't use tail recursion. */
876    
877            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
878              eptrb, flags, RM48);
879            if (rrc == MATCH_NOMATCH) md->mark = markptr;
880            RRETURN(rrc);
881          }          }
882    
883        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
884        otherwise return. */        otherwise return. */
885    
886        RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
887          eptrb, flags);          eptrb, flags, RM2);
888        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
889              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
890            RRETURN(rrc);
891        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
892        }        }
893      /* Control never reaches here. */      /* Control never reaches here. */
# Line 691  for (;;) Line 900  for (;;)
900    
901      case OP_COND:      case OP_COND:
902      case OP_SCOND:      case OP_SCOND:
903      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
904    
905        /* Because of the way auto-callout works during compile, a callout item is
906        inserted between OP_COND and an assertion condition. */
907    
908        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
909        {        {
910        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        if (pcre_callout != NULL)
911        condition = md->recursive != NULL &&          {
912          (offset == RREF_ANY || offset == md->recursive->group_num);          pcre_callout_block cb;
913        ecode += condition? 3 : GET(ecode, 1);          cb.version          = 1;   /* Version 1 of the callout block */
914            cb.callout_number   = ecode[LINK_SIZE+2];
915            cb.offset_vector    = md->offset_vector;
916            cb.subject          = (PCRE_SPTR)md->start_subject;
917            cb.subject_length   = (int)(md->end_subject - md->start_subject);
918            cb.start_match      = (int)(mstart - md->start_subject);
919            cb.current_position = (int)(eptr - md->start_subject);
920            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
921            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
922            cb.capture_top      = offset_top/2;
923            cb.capture_last     = md->capture_last;
924            cb.callout_data     = md->callout_data;
925            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
926            if (rrc < 0) RRETURN(rrc);
927            }
928          ecode += _pcre_OP_lengths[OP_CALLOUT];
929          }
930    
931        condcode = ecode[LINK_SIZE+1];
932    
933        /* Now see what the actual condition is */
934    
935        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
936          {
937          if (md->recursive == NULL)                /* Not recursing => FALSE */
938            {
939            condition = FALSE;
940            ecode += GET(ecode, 1);
941            }
942          else
943            {
944            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
945            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
946    
947            /* If the test is for recursion into a specific subpattern, and it is
948            false, but the test was set up by name, scan the table to see if the
949            name refers to any other numbers, and test them. The condition is true
950            if any one is set. */
951    
952            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
953              {
954              uschar *slotA = md->name_table;
955              for (i = 0; i < md->name_count; i++)
956                {
957                if (GET2(slotA, 0) == recno) break;
958                slotA += md->name_entry_size;
959                }
960    
961              /* Found a name for the number - there can be only one; duplicate
962              names for different numbers are allowed, but not vice versa. First
963              scan down for duplicates. */
964    
965              if (i < md->name_count)
966                {
967                uschar *slotB = slotA;
968                while (slotB > md->name_table)
969                  {
970                  slotB -= md->name_entry_size;
971                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
972                    {
973                    condition = GET2(slotB, 0) == md->recursive->group_num;
974                    if (condition) break;
975                    }
976                  else break;
977                  }
978    
979                /* Scan up for duplicates */
980    
981                if (!condition)
982                  {
983                  slotB = slotA;
984                  for (i++; i < md->name_count; i++)
985                    {
986                    slotB += md->name_entry_size;
987                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
988                      {
989                      condition = GET2(slotB, 0) == md->recursive->group_num;
990                      if (condition) break;
991                      }
992                    else break;
993                    }
994                  }
995                }
996              }
997    
998            /* Chose branch according to the condition */
999    
1000            ecode += condition? 3 : GET(ecode, 1);
1001            }
1002        }        }
1003    
1004      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1005        {        {
1006        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1007        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
1008    
1009          /* If the numbered capture is unset, but the reference was by name,
1010          scan the table to see if the name refers to any other numbers, and test
1011          them. The condition is true if any one is set. This is tediously similar
1012          to the code above, but not close enough to try to amalgamate. */
1013    
1014          if (!condition && condcode == OP_NCREF)
1015            {
1016            int refno = offset >> 1;
1017            uschar *slotA = md->name_table;
1018    
1019            for (i = 0; i < md->name_count; i++)
1020              {
1021              if (GET2(slotA, 0) == refno) break;
1022              slotA += md->name_entry_size;
1023              }
1024    
1025            /* Found a name for the number - there can be only one; duplicate names
1026            for different numbers are allowed, but not vice versa. First scan down
1027            for duplicates. */
1028    
1029            if (i < md->name_count)
1030              {
1031              uschar *slotB = slotA;
1032              while (slotB > md->name_table)
1033                {
1034                slotB -= md->name_entry_size;
1035                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1036                  {
1037                  offset = GET2(slotB, 0) << 1;
1038                  condition = offset < offset_top &&
1039                    md->offset_vector[offset] >= 0;
1040                  if (condition) break;
1041                  }
1042                else break;
1043                }
1044    
1045              /* Scan up for duplicates */
1046    
1047              if (!condition)
1048                {
1049                slotB = slotA;
1050                for (i++; i < md->name_count; i++)
1051                  {
1052                  slotB += md->name_entry_size;
1053                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1054                    {
1055                    offset = GET2(slotB, 0) << 1;
1056                    condition = offset < offset_top &&
1057                      md->offset_vector[offset] >= 0;
1058                    if (condition) break;
1059                    }
1060                  else break;
1061                  }
1062                }
1063              }
1064            }
1065    
1066          /* Chose branch according to the condition */
1067    
1068        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
1069        }        }
1070    
1071      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
1072        {        {
1073        condition = FALSE;        condition = FALSE;
1074        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 718  for (;;) Line 1080  for (;;)
1080    
1081      else      else
1082        {        {
1083        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
1084            match_condassert);            match_condassert, RM3);
1085        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1086          {          {
1087          condition = TRUE;          condition = TRUE;
1088          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1089          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1090          }          }
1091        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH &&
1092                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1093          {          {
1094          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1095          }          }
1096        else        else
1097          {          {
1098          condition = FALSE;          condition = FALSE;
1099          ecode += GET(ecode, 1);          ecode += codelink;
1100          }          }
1101        }        }
1102    
1103      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1104      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
1105      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
1106        group. If the second alternative doesn't exist, we can just plough on. */
1107    
1108      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1109        {        {
1110        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1111        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
1112        goto TAIL_RECURSE;          {
1113            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
1114            RRETURN(rrc);
1115            }
1116          else                       /* Group must match something */
1117            {
1118            flags = 0;
1119            goto TAIL_RECURSE;
1120            }
1121        }        }
1122      else      else                         /* Condition false & no alternative */
1123        {        {
1124        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1125        }        }
1126      break;      break;
1127    
1128    
1129      /* End of the pattern. If we are in a top-level recursion, we should      /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1130      restore the offsets appropriately and continue from after the call. */      to close any currently open capturing brackets. */
1131    
1132        case OP_CLOSE:
1133        number = GET2(ecode, 1);
1134        offset = number << 1;
1135    
1136    #ifdef PCRE_DEBUG
1137          printf("end bracket %d at *ACCEPT", number);
1138          printf("\n");
1139    #endif
1140    
1141        md->capture_last = number;
1142        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1143          {
1144          md->offset_vector[offset] =
1145            md->offset_vector[md->offset_end - number];
1146          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1147          if (offset_top <= offset) offset_top = offset + 2;
1148          }
1149        ecode += 3;
1150        break;
1151    
1152    
1153        /* End of the pattern, either real or forced. If we are in a top-level
1154        recursion, we should restore the offsets appropriately and continue from
1155        after the call. */
1156    
1157        case OP_ACCEPT:
1158      case OP_END:      case OP_END:
1159      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1160        {        {
# Line 765  for (;;) Line 1163  for (;;)
1163        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1164        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1165          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1166        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
1167        ims = original_ims;        ims = original_ims;
1168        ecode = rec->after_call;        ecode = rec->after_call;
1169        break;        break;
1170        }        }
1171    
1172      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1173      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1174        the subject. In both cases, backtracking will then try other alternatives,
1175        if any. */
1176    
1177        if (eptr == mstart &&
1178            (md->notempty ||
1179              (md->notempty_atstart &&
1180                mstart == md->start_subject + md->start_offset)))
1181          MRRETURN(MATCH_NOMATCH);
1182    
1183        /* Otherwise, we have a match. */
1184    
1185        md->end_match_ptr = eptr;           /* Record where we ended */
1186        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1187        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1188    
1189        /* For some reason, the macros don't work properly if an expression is
1190        given as the argument to MRRETURN when the heap is in use. */
1191    
1192      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1193      md->end_match_ptr = eptr;          /* Record where we ended */      MRRETURN(rrc);
     md->end_offset_top = offset_top;   /* and how many extracts were taken */  
     RRETURN(MATCH_MATCH);  
1194    
1195      /* Change option settings */      /* Change option settings */
1196    
# Line 797  for (;;) Line 1210  for (;;)
1210      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1211      do      do
1212        {        {
1213        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1214        if (rrc == MATCH_MATCH) break;          RM4);
1215        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1216            {
1217            mstart = md->start_match_ptr;   /* In case \K reset it */
1218            break;
1219            }
1220          if (rrc != MATCH_NOMATCH &&
1221              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1222            RRETURN(rrc);
1223        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1224        }        }
1225      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1226      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1227    
1228      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1229    
# Line 817  for (;;) Line 1237  for (;;)
1237      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1238      continue;      continue;
1239    
1240      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1241        PRUNE, or COMMIT means we must assume failure without checking subsequent
1242        branches. */
1243    
1244      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1245      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1246      do      do
1247        {        {
1248        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
1249        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);          RM5);
1250        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1251          if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1252            {
1253            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1254            break;
1255            }
1256          if (rrc != MATCH_NOMATCH &&
1257              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1258            RRETURN(rrc);
1259        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1260        }        }
1261      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 848  for (;;) Line 1278  for (;;)
1278        while (i-- > 0)        while (i-- > 0)
1279          {          {
1280          eptr--;          eptr--;
1281          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1282          BACKCHAR(eptr)          BACKCHAR(eptr);
1283          }          }
1284        }        }
1285      else      else
# Line 859  for (;;) Line 1289  for (;;)
1289    
1290        {        {
1291        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1292        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1293        }        }
1294    
1295      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1296    
1297        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1298      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1299      break;      break;
1300    
# Line 879  for (;;) Line 1310  for (;;)
1310        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1311        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1312        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1313        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1314        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1315        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1316        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1317        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1318        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1319        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1320        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1321        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1322        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1323        }        }
1324      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 942  for (;;) Line 1373  for (;;)
1373    
1374        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1375              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1376        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1377    
1378        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1379        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 952  for (;;) Line 1382  for (;;)
1382        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1383        do        do
1384          {          {
1385          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1386            md, ims, eptrb, flags);            md, ims, eptrb, flags, RM6);
1387          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1388            {            {
1389            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1390            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1391            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1392              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1393            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1394            }            }
1395          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH &&
1396                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1397            {            {
1398            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1399              if (new_recursive.offset_save != stacksave)
1400                (pcre_free)(new_recursive.offset_save);
1401            RRETURN(rrc);            RRETURN(rrc);
1402            }            }
1403    
# Line 979  for (;;) Line 1412  for (;;)
1412        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1413        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1414          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1415        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1416        }        }
1417      /* Control never reaches here */      /* Control never reaches here */
1418    
# Line 988  for (;;) Line 1421  for (;;)
1421      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1422      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1423      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1424      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1425        the start-of-match value in case it was changed by \K. */
1426    
1427      case OP_ONCE:      case OP_ONCE:
1428      prev = ecode;      prev = ecode;
# Line 996  for (;;) Line 1430  for (;;)
1430    
1431      do      do
1432        {        {
1433        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
1434          eptrb, 0);        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1435        if (rrc == MATCH_MATCH) break;          {
1436        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          mstart = md->start_match_ptr;
1437            break;
1438            }
1439          if (rrc != MATCH_NOMATCH &&
1440              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1441            RRETURN(rrc);
1442        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1443        }        }
1444      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1042  for (;;) Line 1481  for (;;)
1481    
1482      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1483        {        {
1484        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1485        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1486        ecode = prev;        ecode = prev;
1487        flags = match_tail_recursed;        flags = 0;
1488        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1489        }        }
1490      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1491        {        {
1492        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1493        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1494        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1495        flags = match_tail_recursed;        flags = 0;
1496        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1497        }        }
1498      /* Control never gets here */      /* Control never gets here */
# Line 1065  for (;;) Line 1504  for (;;)
1504      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1505      break;      break;
1506    
1507      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1508      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1509      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1510      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1511      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1512    
1513      case OP_BRAZERO:      case OP_BRAZERO:
1514        {        {
1515        next = ecode+1;        next = ecode+1;
1516        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1517        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1518        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1519        ecode = next + 1 + LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
# Line 1085  for (;;) Line 1524  for (;;)
1524        {        {
1525        next = ecode+1;        next = ecode+1;
1526        do next += GET(next, 1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1527        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1528        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1529        ecode++;        ecode++;
1530        }        }
1531      break;      break;
1532    
1533        case OP_SKIPZERO:
1534          {
1535          next = ecode+1;
1536          do next += GET(next,1); while (*next == OP_ALT);
1537          ecode = next + 1 + LINK_SIZE;
1538          }
1539        break;
1540    
1541      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1542    
1543      case OP_KET:      case OP_KET:
# Line 1109  for (;;) Line 1556  for (;;)
1556        }        }
1557      else saved_eptr = NULL;      else saved_eptr = NULL;
1558    
1559      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or an atomic group, stop
1560      MATCH_MATCH, but record the current high water mark for use by positive      matching and return MATCH_MATCH, but record the current high water mark for
1561      assertions. Do this also for the "once" (atomic) groups. */      use by positive assertions. We also need to record the match start in case
1562        it was changed by \K. */
1563    
1564      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1565          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1119  for (;;) Line 1567  for (;;)
1567        {        {
1568        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1569        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1570        RRETURN(MATCH_MATCH);        md->start_match_ptr = mstart;
1571          MRRETURN(MATCH_MATCH);
1572        }        }
1573    
1574      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
# Line 1133  for (;;) Line 1582  for (;;)
1582        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1583        offset = number << 1;        offset = number << 1;
1584    
1585  #ifdef DEBUG  #ifdef PCRE_DEBUG
1586        printf("end bracket %d", number);        printf("end bracket %d", number);
1587        printf("\n");        printf("\n");
1588  #endif  #endif
# Line 1143  for (;;) Line 1592  for (;;)
1592          {          {
1593          md->offset_vector[offset] =          md->offset_vector[offset] =
1594            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1595          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1596          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1597          }          }
1598    
# Line 1155  for (;;) Line 1604  for (;;)
1604          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1605          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1606          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
         md->start_match = rec->save_start;  
1607          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1608            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1609            offset_top = rec->save_offset_top;
1610          ecode = rec->after_call;          ecode = rec->after_call;
1611          ims = original_ims;          ims = original_ims;
1612          break;          break;
# Line 1184  for (;;) Line 1633  for (;;)
1633    
1634      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1635      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1636      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1637        unlimited repeat of a group that can match an empty string. */
1638    
1639      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1640    
1641      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1642        {        {
1643        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1644        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1645          if (flags != 0)    /* Could match an empty string */
1646            {
1647            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1648            RRETURN(rrc);
1649            }
1650        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1651        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1652        }        }
1653      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1654        {        {
1655        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1656        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1657        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1658        flags = match_tail_recursed;        flags = 0;
1659        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1660        }        }
1661      /* Control never gets here */      /* Control never gets here */
# Line 1209  for (;;) Line 1663  for (;;)
1663      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1664    
1665      case OP_CIRC:      case OP_CIRC:
1666      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1667      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1668        {        {
1669        if (eptr != md->start_subject &&        if (eptr != md->start_subject &&
1670            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1671          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1672        ecode++;        ecode++;
1673        break;        break;
1674        }        }
# Line 1223  for (;;) Line 1677  for (;;)
1677      /* Start of subject assertion */      /* Start of subject assertion */
1678    
1679      case OP_SOD:      case OP_SOD:
1680      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1681      ecode++;      ecode++;
1682      break;      break;
1683    
1684      /* Start of match assertion */      /* Start of match assertion */
1685    
1686      case OP_SOM:      case OP_SOM:
1687      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1688        ecode++;
1689        break;
1690    
1691        /* Reset the start of match point */
1692    
1693        case OP_SET_SOM:
1694        mstart = eptr;
1695      ecode++;      ecode++;
1696      break;      break;
1697    
# Line 1241  for (;;) Line 1702  for (;;)
1702      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1703        {        {
1704        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1705          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1706        else        else
1707          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) MRRETURN(MATCH_NOMATCH); }
1708        ecode++;        ecode++;
1709        break;        break;
1710        }        }
1711      else      else
1712        {        {
1713        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1714        if (!md->endonly)        if (!md->endonly)
1715          {          {
1716          if (eptr != md->end_subject &&          if (eptr != md->end_subject &&
1717              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1718            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
1719          ecode++;          ecode++;
1720          break;          break;
1721          }          }
# Line 1264  for (;;) Line 1725  for (;;)
1725      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1726    
1727      case OP_EOD:      case OP_EOD:
1728      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1729      ecode++;      ecode++;
1730      break;      break;
1731    
# Line 1273  for (;;) Line 1734  for (;;)
1734      case OP_EODN:      case OP_EODN:
1735      if (eptr != md->end_subject &&      if (eptr != md->end_subject &&
1736          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1737        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1738      ecode++;      ecode++;
1739      break;      break;
1740    
# Line 1285  for (;;) Line 1746  for (;;)
1746    
1747        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1748        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1749        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1750          partial matching. */
1751    
1752  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1753        if (utf8)        if (utf8)
1754          {          {
1755            /* Get status of previous character */
1756    
1757          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1758            {            {
1759            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1760            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1761              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1762            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1763    #ifdef SUPPORT_UCP
1764              if (md->use_ucp)
1765                {
1766                if (c == '_') prev_is_word = TRUE; else
1767                  {
1768                  int cat = UCD_CATEGORY(c);
1769                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1770                  }
1771                }
1772              else
1773    #endif
1774            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1775            }            }
1776          if (eptr >= md->end_subject) cur_is_word = FALSE; else  
1777            /* Get status of next character */
1778    
1779            if (eptr >= md->end_subject)
1780              {
1781              SCHECK_PARTIAL();
1782              cur_is_word = FALSE;
1783              }
1784            else
1785            {            {
1786            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1787    #ifdef SUPPORT_UCP
1788              if (md->use_ucp)
1789                {
1790                if (c == '_') cur_is_word = TRUE; else
1791                  {
1792                  int cat = UCD_CATEGORY(c);
1793                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1794                  }
1795                }
1796              else
1797    #endif
1798            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1799            }            }
1800          }          }
1801        else        else
1802  #endif  #endif
1803    
1804        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1805          consistency with the behaviour of \w we do use it in this case. */
1806    
1807          {          {
1808          prev_is_word = (eptr != md->start_subject) &&          /* Get status of previous character */
1809            ((md->ctypes[eptr[-1]] & ctype_word) != 0);  
1810          cur_is_word = (eptr < md->end_subject) &&          if (eptr == md->start_subject) prev_is_word = FALSE; else
1811            ((md->ctypes[*eptr] & ctype_word) != 0);            {
1812              if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1813    #ifdef SUPPORT_UCP
1814              if (md->use_ucp)
1815                {
1816                c = eptr[-1];
1817                if (c == '_') prev_is_word = TRUE; else
1818                  {
1819                  int cat = UCD_CATEGORY(c);
1820                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1821                  }
1822                }
1823              else
1824    #endif
1825              prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1826              }
1827    
1828            /* Get status of next character */
1829    
1830            if (eptr >= md->end_subject)
1831              {
1832              SCHECK_PARTIAL();
1833              cur_is_word = FALSE;
1834              }
1835            else
1836    #ifdef SUPPORT_UCP
1837            if (md->use_ucp)
1838              {
1839              c = *eptr;
1840              if (c == '_') cur_is_word = TRUE; else
1841                {
1842                int cat = UCD_CATEGORY(c);
1843                cur_is_word = (cat == ucp_L || cat == ucp_N);
1844                }
1845              }
1846            else
1847    #endif
1848            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1849          }          }
1850    
1851        /* Now see if the situation is what we want */        /* Now see if the situation is what we want */
1852    
1853        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
1854             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1855          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
1856        }        }
1857      break;      break;
1858    
1859      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1860    
1861      case OP_ANY:      case OP_ANY:
1862      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1863        /* Fall through */
1864    
1865        case OP_ALLANY:
1866        if (eptr++ >= md->end_subject)
1867        {        {
1868        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);        SCHECK_PARTIAL();
1869          MRRETURN(MATCH_NOMATCH);
1870        }        }
1871      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     if (utf8)  
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1872      ecode++;      ecode++;
1873      break;      break;
1874    
# Line 1340  for (;;) Line 1876  for (;;)
1876      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1877    
1878      case OP_ANYBYTE:      case OP_ANYBYTE:
1879      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1880          {
1881          SCHECK_PARTIAL();
1882          MRRETURN(MATCH_NOMATCH);
1883          }
1884      ecode++;      ecode++;
1885      break;      break;
1886    
1887      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1888      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1889          {
1890          SCHECK_PARTIAL();
1891          MRRETURN(MATCH_NOMATCH);
1892          }
1893      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1894      if (      if (
1895  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1353  for (;;) Line 1897  for (;;)
1897  #endif  #endif
1898         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1899         )         )
1900        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1901      ecode++;      ecode++;
1902      break;      break;
1903    
1904      case OP_DIGIT:      case OP_DIGIT:
1905      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1906          {
1907          SCHECK_PARTIAL();
1908          MRRETURN(MATCH_NOMATCH);
1909          }
1910      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1911      if (      if (
1912  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1366  for (;;) Line 1914  for (;;)
1914  #endif  #endif
1915         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1916         )         )
1917        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1918      ecode++;      ecode++;
1919      break;      break;
1920    
1921      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1922      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1923          {
1924          SCHECK_PARTIAL();
1925          MRRETURN(MATCH_NOMATCH);
1926          }
1927      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1928      if (      if (
1929  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1379  for (;;) Line 1931  for (;;)
1931  #endif  #endif
1932         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1933         )         )
1934        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1935      ecode++;      ecode++;
1936      break;      break;
1937    
1938      case OP_WHITESPACE:      case OP_WHITESPACE:
1939      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1940          {
1941          SCHECK_PARTIAL();
1942          MRRETURN(MATCH_NOMATCH);
1943          }
1944      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1945      if (      if (
1946  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1392  for (;;) Line 1948  for (;;)
1948  #endif  #endif
1949         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1950         )         )
1951        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1952      ecode++;      ecode++;
1953      break;      break;
1954    
1955      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1956      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1957          {
1958          SCHECK_PARTIAL();
1959          MRRETURN(MATCH_NOMATCH);
1960          }
1961      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1962      if (      if (
1963  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1405  for (;;) Line 1965  for (;;)
1965  #endif  #endif
1966         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1967         )         )
1968        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1969      ecode++;      ecode++;
1970      break;      break;
1971    
1972      case OP_WORDCHAR:      case OP_WORDCHAR:
1973      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1974          {
1975          SCHECK_PARTIAL();
1976          MRRETURN(MATCH_NOMATCH);
1977          }
1978      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1979      if (      if (
1980  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1418  for (;;) Line 1982  for (;;)
1982  #endif  #endif
1983         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1984         )         )
1985        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1986      ecode++;      ecode++;
1987      break;      break;
1988    
1989      case OP_ANYNL:      case OP_ANYNL:
1990      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1991          {
1992          SCHECK_PARTIAL();
1993          MRRETURN(MATCH_NOMATCH);
1994          }
1995      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1996      switch(c)      switch(c)
1997        {        {
1998        default: RRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
1999        case 0x000d:        case 0x000d:
2000        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2001        break;        break;
2002    
2003        case 0x000a:        case 0x000a:
2004          break;
2005    
2006        case 0x000b:        case 0x000b:
2007        case 0x000c:        case 0x000c:
2008        case 0x0085:        case 0x0085:
2009        case 0x2028:        case 0x2028:
2010        case 0x2029:        case 0x2029:
2011          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
2012        break;        break;
2013        }        }
2014      ecode++;      ecode++;
2015      break;      break;
2016    
2017  #ifdef SUPPORT_UCP      case OP_NOT_HSPACE:
2018      /* Check the next character by Unicode property. We will get here only      if (eptr >= md->end_subject)
2019      if the support is in the binary; otherwise a compile-time error occurs. */        {
2020          SCHECK_PARTIAL();
2021          MRRETURN(MATCH_NOMATCH);
2022          }
2023        GETCHARINCTEST(c, eptr);
2024        switch(c)
2025          {
2026          default: break;
2027          case 0x09:      /* HT */
2028          case 0x20:      /* SPACE */
2029          case 0xa0:      /* NBSP */
2030          case 0x1680:    /* OGHAM SPACE MARK */
2031          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2032          case 0x2000:    /* EN QUAD */
2033          case 0x2001:    /* EM QUAD */
2034          case 0x2002:    /* EN SPACE */
2035          case 0x2003:    /* EM SPACE */
2036          case 0x2004:    /* THREE-PER-EM SPACE */
2037          case 0x2005:    /* FOUR-PER-EM SPACE */
2038          case 0x2006:    /* SIX-PER-EM SPACE */
2039          case 0x2007:    /* FIGURE SPACE */
2040          case 0x2008:    /* PUNCTUATION SPACE */
2041          case 0x2009:    /* THIN SPACE */
2042          case 0x200A:    /* HAIR SPACE */
2043          case 0x202f:    /* NARROW NO-BREAK SPACE */
2044          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2045          case 0x3000:    /* IDEOGRAPHIC SPACE */
2046          MRRETURN(MATCH_NOMATCH);
2047          }
2048        ecode++;
2049        break;
2050    
2051        case OP_HSPACE:
2052        if (eptr >= md->end_subject)
2053          {
2054          SCHECK_PARTIAL();
2055          MRRETURN(MATCH_NOMATCH);
2056          }
2057        GETCHARINCTEST(c, eptr);
2058        switch(c)
2059          {
2060          default: MRRETURN(MATCH_NOMATCH);
2061          case 0x09:      /* HT */
2062          case 0x20:      /* SPACE */
2063          case 0xa0:      /* NBSP */
2064          case 0x1680:    /* OGHAM SPACE MARK */
2065          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2066          case 0x2000:    /* EN QUAD */
2067          case 0x2001:    /* EM QUAD */
2068          case 0x2002:    /* EN SPACE */
2069          case 0x2003:    /* EM SPACE */
2070          case 0x2004:    /* THREE-PER-EM SPACE */
2071          case 0x2005:    /* FOUR-PER-EM SPACE */
2072          case 0x2006:    /* SIX-PER-EM SPACE */
2073          case 0x2007:    /* FIGURE SPACE */
2074          case 0x2008:    /* PUNCTUATION SPACE */
2075          case 0x2009:    /* THIN SPACE */
2076          case 0x200A:    /* HAIR SPACE */
2077          case 0x202f:    /* NARROW NO-BREAK SPACE */
2078          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2079          case 0x3000:    /* IDEOGRAPHIC SPACE */
2080          break;
2081          }
2082        ecode++;
2083        break;
2084    
2085        case OP_NOT_VSPACE:
2086        if (eptr >= md->end_subject)
2087          {
2088          SCHECK_PARTIAL();
2089          MRRETURN(MATCH_NOMATCH);
2090          }
2091        GETCHARINCTEST(c, eptr);
2092        switch(c)
2093          {
2094          default: break;
2095          case 0x0a:      /* LF */
2096          case 0x0b:      /* VT */
2097          case 0x0c:      /* FF */
2098          case 0x0d:      /* CR */
2099          case 0x85:      /* NEL */
2100          case 0x2028:    /* LINE SEPARATOR */
2101          case 0x2029:    /* PARAGRAPH SEPARATOR */
2102          MRRETURN(MATCH_NOMATCH);
2103          }
2104        ecode++;
2105        break;
2106    
2107        case OP_VSPACE:
2108        if (eptr >= md->end_subject)
2109          {
2110          SCHECK_PARTIAL();
2111          MRRETURN(MATCH_NOMATCH);
2112          }
2113        GETCHARINCTEST(c, eptr);
2114        switch(c)
2115          {
2116          default: MRRETURN(MATCH_NOMATCH);
2117          case 0x0a:      /* LF */
2118          case 0x0b:      /* VT */
2119          case 0x0c:      /* FF */
2120          case 0x0d:      /* CR */
2121          case 0x85:      /* NEL */
2122          case 0x2028:    /* LINE SEPARATOR */
2123          case 0x2029:    /* PARAGRAPH SEPARATOR */
2124          break;
2125          }
2126        ecode++;
2127        break;
2128    
2129    #ifdef SUPPORT_UCP
2130        /* Check the next character by Unicode property. We will get here only
2131        if the support is in the binary; otherwise a compile-time error occurs. */
2132    
2133      case OP_PROP:      case OP_PROP:
2134      case OP_NOTPROP:      case OP_NOTPROP:
2135      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2136          {
2137          SCHECK_PARTIAL();
2138          MRRETURN(MATCH_NOMATCH);
2139          }
2140      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2141        {        {
2142        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2143    
2144        switch(ecode[1])        switch(ecode[1])
2145          {          {
2146          case PT_ANY:          case PT_ANY:
2147          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2148          break;          break;
2149    
2150          case PT_LAMP:          case PT_LAMP:
2151          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2152               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2153               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2154            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2155           break;          break;
2156    
2157          case PT_GC:          case PT_GC:
2158          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2159            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2160          break;          break;
2161    
2162          case PT_PC:          case PT_PC:
2163          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2164            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2165          break;          break;
2166    
2167          case PT_SC:          case PT_SC:
2168          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2169            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2170            break;
2171    
2172            /* These are specials */
2173    
2174            case PT_ALNUM:
2175            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2176                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2177              MRRETURN(MATCH_NOMATCH);
2178          break;          break;
2179    
2180            case PT_SPACE:    /* Perl space */
2181            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2182                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2183                   == (op == OP_NOTPROP))
2184              MRRETURN(MATCH_NOMATCH);
2185            break;
2186    
2187            case PT_PXSPACE:  /* POSIX space */
2188            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2189                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2190                 c == CHAR_FF || c == CHAR_CR)
2191                   == (op == OP_NOTPROP))
2192              MRRETURN(MATCH_NOMATCH);
2193            break;
2194    
2195            case PT_WORD:
2196            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2197                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2198                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2199              MRRETURN(MATCH_NOMATCH);
2200            break;
2201    
2202            /* This should never occur */
2203    
2204          default:          default:
2205          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
2206          }          }
# Line 1494  for (;;) Line 2213  for (;;)
2213      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2214    
2215      case OP_EXTUNI:      case OP_EXTUNI:
2216      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2217          {
2218          SCHECK_PARTIAL();
2219          MRRETURN(MATCH_NOMATCH);
2220          }
2221      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2222        {        {
2223        int chartype, script;        int category = UCD_CATEGORY(c);
2224        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2225        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2226          {          {
2227          int len = 1;          int len = 1;
# Line 1507  for (;;) Line 2229  for (;;)
2229            {            {
2230            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2231            }            }
2232          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2233          if (category != ucp_M) break;          if (category != ucp_M) break;
2234          eptr += len;          eptr += len;
2235          }          }
# Line 1528  for (;;) Line 2250  for (;;)
2250      case OP_REF:      case OP_REF:
2251        {        {
2252        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2253        ecode += 3;                                 /* Advance past item */        ecode += 3;
2254    
2255          /* If the reference is unset, there are two possibilities:
2256    
2257          (a) In the default, Perl-compatible state, set the length to be longer
2258          than the amount of subject left; this ensures that every attempt at a
2259          match fails. We can't just fail here, because of the possibility of
2260          quantifiers with zero minima.
2261    
2262        /* If the reference is unset, set the length to be longer than the amount        (b) If the JavaScript compatibility flag is set, set the length to zero
2263        of subject left; this ensures that every attempt at a match fails. We        so that the back reference matches an empty string.
2264        can't just fail here, because of the possibility of quantifiers with zero  
2265        minima. */        Otherwise, set the length to the length of what was matched by the
2266          referenced subpattern. */
2267        length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
2268          md->end_subject - eptr + 1 :        if (offset >= offset_top || md->offset_vector[offset] < 0)
2269          md->offset_vector[offset+1] - md->offset_vector[offset];          length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1);
2270          else
2271            length = md->offset_vector[offset+1] - md->offset_vector[offset];
2272    
2273        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
2274    
# Line 1566  for (;;) Line 2297  for (;;)
2297          break;          break;
2298    
2299          default:               /* No repeat follows */          default:               /* No repeat follows */
2300          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2301              {
2302              CHECK_PARTIAL();
2303              MRRETURN(MATCH_NOMATCH);
2304              }
2305          eptr += length;          eptr += length;
2306          continue;              /* With the main loop */          continue;              /* With the main loop */
2307          }          }
# Line 1582  for (;;) Line 2317  for (;;)
2317    
2318        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2319          {          {
2320          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          if (!match_ref(offset, eptr, length, md, ims))
2321              {
2322              CHECK_PARTIAL();
2323              MRRETURN(MATCH_NOMATCH);
2324              }
2325          eptr += length;          eptr += length;
2326          }          }
2327    
# Line 1597  for (;;) Line 2336  for (;;)
2336          {          {
2337          for (fi = min;; fi++)          for (fi = min;; fi++)
2338            {            {
2339            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
2340            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2341            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2342              RRETURN(MATCH_NOMATCH);            if (!match_ref(offset, eptr, length, md, ims))
2343                {
2344                CHECK_PARTIAL();
2345                MRRETURN(MATCH_NOMATCH);
2346                }
2347            eptr += length;            eptr += length;
2348            }            }
2349          /* Control never gets here */          /* Control never gets here */
# Line 1613  for (;;) Line 2356  for (;;)
2356          pp = eptr;          pp = eptr;
2357          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2358            {            {
2359            if (!match_ref(offset, eptr, length, md, ims)) break;            if (!match_ref(offset, eptr, length, md, ims))
2360                {
2361                CHECK_PARTIAL();
2362                break;
2363                }
2364            eptr += length;            eptr += length;
2365            }            }
2366          while (eptr >= pp)          while (eptr >= pp)
2367            {            {
2368            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
2369            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2370            eptr -= length;            eptr -= length;
2371            }            }
2372          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2373          }          }
2374        }        }
2375      /* Control never gets here */      /* Control never gets here */
2376    
   
   
2377      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2378      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2379      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1683  for (;;) Line 2428  for (;;)
2428          {          {
2429          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2430            {            {
2431            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2432                {
2433                SCHECK_PARTIAL();
2434                MRRETURN(MATCH_NOMATCH);
2435                }
2436            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2437            if (c > 255)            if (c > 255)
2438              {              {
2439              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2440              }              }
2441            else            else
2442              {              {
2443              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2444              }              }
2445            }            }
2446          }          }
# Line 1701  for (;;) Line 2450  for (;;)
2450          {          {
2451          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2452            {            {
2453            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2454                {
2455                SCHECK_PARTIAL();
2456                MRRETURN(MATCH_NOMATCH);
2457                }
2458            c = *eptr++;            c = *eptr++;
2459            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2460            }            }
2461          }          }
2462    
# Line 1723  for (;;) Line 2476  for (;;)
2476            {            {
2477            for (fi = min;; fi++)            for (fi = min;; fi++)
2478              {              {
2479              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
2480              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2481              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2482                if (eptr >= md->end_subject)
2483                  {
2484                  SCHECK_PARTIAL();
2485                  MRRETURN(MATCH_NOMATCH);
2486                  }
2487              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2488              if (c > 255)              if (c > 255)
2489                {                {
2490                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2491                }                }
2492              else              else
2493                {                {
2494                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2495                }                }
2496              }              }
2497            }            }
# Line 1743  for (;;) Line 2501  for (;;)
2501            {            {
2502            for (fi = min;; fi++)            for (fi = min;; fi++)
2503              {              {
2504              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
2505              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2506              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2507                if (eptr >= md->end_subject)
2508                  {
2509                  SCHECK_PARTIAL();
2510                  MRRETURN(MATCH_NOMATCH);
2511                  }
2512              c = *eptr++;              c = *eptr++;
2513              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2514              }              }
2515            }            }
2516          /* Control never gets here */          /* Control never gets here */
# Line 1766  for (;;) Line 2529  for (;;)
2529            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2530              {              {
2531              int len = 1;              int len = 1;
2532              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2533                  {
2534                  SCHECK_PARTIAL();
2535                  break;
2536                  }
2537              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2538              if (c > 255)              if (c > 255)
2539                {                {
# Line 1780  for (;;) Line 2547  for (;;)
2547              }              }
2548            for (;;)            for (;;)
2549              {              {
2550              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
2551              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2552              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2553              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1792  for (;;) Line 2559  for (;;)
2559            {            {
2560            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2561              {              {
2562              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2563                  {
2564                  SCHECK_PARTIAL();
2565                  break;
2566                  }
2567              c = *eptr;              c = *eptr;
2568              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2569              eptr++;              eptr++;
2570              }              }
2571            while (eptr >= pp)            while (eptr >= pp)
2572              {              {
2573              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
2574              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2575              eptr--;              eptr--;
2576              }              }
2577            }            }
2578    
2579          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2580          }          }
2581        }        }
2582      /* Control never gets here */      /* Control never gets here */
2583    
2584    
2585      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2586      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2587        mode, because Unicode properties are supported in non-UTF-8 mode. */
2588    
2589  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2590      case OP_XCLASS:      case OP_XCLASS:
# Line 1853  for (;;) Line 2625  for (;;)
2625    
2626        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2627          {          {
2628          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2629          GETCHARINC(c, eptr);            {
2630          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2631              MRRETURN(MATCH_NOMATCH);
2632              }
2633            GETCHARINCTEST(c, eptr);
2634            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2635          }          }
2636    
2637        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1870  for (;;) Line 2646  for (;;)
2646          {          {
2647          for (fi = min;; fi++)          for (fi = min;; fi++)
2648            {            {
2649            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2650            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2651            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2652            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2653            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2654                SCHECK_PARTIAL();
2655                MRRETURN(MATCH_NOMATCH);
2656                }
2657              GETCHARINCTEST(c, eptr);
2658              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2659            }            }
2660          /* Control never gets here */          /* Control never gets here */
2661          }          }
# Line 1887  for (;;) Line 2668  for (;;)
2668          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2669            {            {
2670            int len = 1;            int len = 1;
2671            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2672            GETCHARLEN(c, eptr, len);              {
2673                SCHECK_PARTIAL();
2674                break;
2675                }
2676              GETCHARLENTEST(c, eptr, len);
2677            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2678            eptr += len;            eptr += len;
2679            }            }
2680          for(;;)          for(;;)
2681            {            {
2682            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2683            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2684            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2685            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2686            }            }
2687          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2688          }          }
2689    
2690        /* Control never gets here */        /* Control never gets here */
# Line 1915  for (;;) Line 2700  for (;;)
2700        length = 1;        length = 1;
2701        ecode++;        ecode++;
2702        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2703        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2704        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2705            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2706            MRRETURN(MATCH_NOMATCH);
2707            }
2708          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2709        }        }
2710      else      else
2711  #endif  #endif
2712    
2713      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2714        {        {
2715        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2716        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2717            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2718            MRRETURN(MATCH_NOMATCH);
2719            }
2720          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2721        ecode += 2;        ecode += 2;
2722        }        }
2723      break;      break;
# Line 1939  for (;;) Line 2732  for (;;)
2732        ecode++;        ecode++;
2733        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2734    
2735        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2736            {
2737            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2738            MRRETURN(MATCH_NOMATCH);
2739            }
2740    
2741        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2742        can use the fast lookup table. */        can use the fast lookup table. */
2743    
2744        if (fc < 128)        if (fc < 128)
2745          {          {
2746          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2747          }          }
2748    
2749        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
# Line 1963  for (;;) Line 2760  for (;;)
2760          if (fc != dc)          if (fc != dc)
2761            {            {
2762  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2763            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2764  #endif  #endif
2765              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2766            }            }
2767          }          }
2768        }        }
# Line 1974  for (;;) Line 2771  for (;;)
2771    
2772      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2773        {        {
2774        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2775        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2776            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2777            MRRETURN(MATCH_NOMATCH);
2778            }
2779          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2780        ecode += 2;        ecode += 2;
2781        }        }
2782      break;      break;
# Line 2028  for (;;) Line 2829  for (;;)
2829      case OP_MINQUERY:      case OP_MINQUERY:
2830      c = *ecode++ - OP_STAR;      c = *ecode++ - OP_STAR;
2831      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2832    
2833      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2834      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2835      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2836    
2837      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2838    
2839      REPEATCHAR:      REPEATCHAR:
2840  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2043  for (;;) Line 2843  for (;;)
2843        length = 1;        length = 1;
2844        charptr = ecode;        charptr = ecode;
2845        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2846        ecode += length;        ecode += length;
2847    
2848        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2054  for (;;) Line 2853  for (;;)
2853  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2854          unsigned int othercase;          unsigned int othercase;
2855          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2856              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2857            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2858          else oclength = 0;          else oclength = 0;
2859  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2860    
2861          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2862            {            {
2863            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2864                memcmp(eptr, charptr, length) == 0) eptr += length;
2865  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2866            /* Need braces because of following else */            else if (oclength > 0 &&
2867            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                     eptr <= md->end_subject - oclength &&
2868                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2869    #endif  /* SUPPORT_UCP */
2870            else            else
2871              {              {
2872              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2873              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2874              }              }
 #else   /* without SUPPORT_UCP */  
           else { RRETURN(MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2875            }            }
2876    
2877          if (min == max) continue;          if (min == max) continue;
# Line 2081  for (;;) Line 2880  for (;;)
2880            {            {
2881            for (fi = min;; fi++)            for (fi = min;; fi++)
2882              {              {
2883              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2884              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2885              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2886              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2887                  memcmp(eptr, charptr, length) == 0) eptr += length;
2888  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2889              /* Need braces because of following else */              else if (oclength > 0 &&
2890              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }                       eptr <= md->end_subject - oclength &&
2891                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2892    #endif  /* SUPPORT_UCP */
2893              else              else
2894                {                {
2895                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2896                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2897                }                }
 #else   /* without SUPPORT_UCP */  
             else { RRETURN (MATCH_NOMATCH); }  
 #endif  /* SUPPORT_UCP */  
2898              }              }
2899            /* Control never gets here */            /* Control never gets here */
2900            }            }
# Line 2105  for (;;) Line 2904  for (;;)
2904            pp = eptr;            pp = eptr;
2905            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2906              {              {
2907              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2908              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2909  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2910              else if (oclength == 0) break;              else if (oclength > 0 &&
2911                         eptr <= md->end_subject - oclength &&
2912                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2913    #endif  /* SUPPORT_UCP */
2914              else              else
2915                {                {
2916                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2917                eptr += oclength;                break;
2918                }                }
 #else   /* without SUPPORT_UCP */  
             else break;  
 #endif  /* SUPPORT_UCP */  
2919              }              }
2920    
2921            if (possessive) continue;            if (possessive) continue;
2922    
2923            for(;;)            for(;;)
2924             {              {
2925             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2926             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2927             if (eptr == pp) RRETURN(MATCH_NOMATCH);              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2928  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2929             eptr--;              eptr--;
2930             BACKCHAR(eptr);              BACKCHAR(eptr);
2931  #else   /* without SUPPORT_UCP */  #else   /* without SUPPORT_UCP */
2932             eptr -= length;              eptr -= length;
2933  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2934             }              }
2935            }            }
2936          /* Control never gets here */          /* Control never gets here */
2937          }          }
# Line 2144  for (;;) Line 2944  for (;;)
2944  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2945    
2946      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2947        {  
2948        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2949    
2950      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2951      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2165  for (;;) Line 2963  for (;;)
2963        {        {
2964        fc = md->lcc[fc];        fc = md->lcc[fc];
2965        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2966          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2967            if (eptr >= md->end_subject)
2968              {
2969              SCHECK_PARTIAL();
2970              MRRETURN(MATCH_NOMATCH);
2971              }
2972            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2973            }
2974        if (min == max) continue;        if (min == max) continue;
2975        if (minimize)        if (minimize)
2976          {          {
2977          for (fi = min;; fi++)          for (fi = min;; fi++)
2978            {            {
2979            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2980            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2981            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2982                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2983              RRETURN(MATCH_NOMATCH);              {
2984                SCHECK_PARTIAL();
2985                MRRETURN(MATCH_NOMATCH);
2986                }
2987              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2988            }            }
2989          /* Control never gets here */          /* Control never gets here */
2990          }          }
# Line 2184  for (;;) Line 2993  for (;;)
2993          pp = eptr;          pp = eptr;
2994          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2995            {            {
2996            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2997                {
2998                SCHECK_PARTIAL();
2999                break;
3000                }
3001              if (fc != md->lcc[*eptr]) break;
3002            eptr++;            eptr++;
3003            }            }
3004    
3005          if (possessive) continue;          if (possessive) continue;
3006    
3007          while (eptr >= pp)          while (eptr >= pp)
3008            {            {
3009            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
3010            eptr--;            eptr--;
3011            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3012            }            }
3013          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3014          }          }
3015        /* Control never gets here */        /* Control never gets here */
3016        }        }
# Line 2203  for (;;) Line 3019  for (;;)
3019    
3020      else      else
3021        {        {
3022        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
3023            {
3024            if (eptr >= md->end_subject)
3025              {
3026              SCHECK_PARTIAL();
3027              MRRETURN(MATCH_NOMATCH);
3028              }
3029            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3030            }
3031    
3032        if (min == max) continue;        if (min == max) continue;
3033    
3034        if (minimize)        if (minimize)
3035          {          {
3036          for (fi = min;; fi++)          for (fi = min;; fi++)
3037            {            {
3038            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
3039            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3040            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3041              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3042                {
3043                SCHECK_PARTIAL();
3044                MRRETURN(MATCH_NOMATCH);
3045                }
3046              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3047            }            }
3048          /* Control never gets here */          /* Control never gets here */
3049          }          }
# Line 2221  for (;;) Line 3052  for (;;)
3052          pp = eptr;          pp = eptr;
3053          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3054            {            {
3055            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
3056                {
3057                SCHECK_PARTIAL();
3058                break;
3059                }
3060              if (fc != *eptr) break;
3061            eptr++;            eptr++;
3062            }            }
3063          if (possessive) continue;          if (possessive) continue;
3064    
3065          while (eptr >= pp)          while (eptr >= pp)
3066            {            {
3067            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
3068            eptr--;            eptr--;
3069            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3070            }            }
3071          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3072          }          }
3073        }        }
3074      /* Control never gets here */      /* Control never gets here */
# Line 2240  for (;;) Line 3077  for (;;)
3077      checking can be multibyte. */      checking can be multibyte. */
3078    
3079      case OP_NOT:      case OP_NOT:
3080      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
3081          {
3082          SCHECK_PARTIAL();
3083          MRRETURN(MATCH_NOMATCH);
3084          }
3085      ecode++;      ecode++;
3086      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3087      if ((ims & PCRE_CASELESS) != 0)      if ((ims & PCRE_CASELESS) != 0)
# Line 2249  for (;;) Line 3090  for (;;)
3090        if (c < 256)        if (c < 256)
3091  #endif  #endif
3092        c = md->lcc[c];        c = md->lcc[c];
3093        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3094        }        }
3095      else      else
3096        {        {
3097        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3098        }        }
3099      break;      break;
3100    
# Line 2317  for (;;) Line 3158  for (;;)
3158      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3159      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3160    
3161      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3162    
3163      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3164      fc = *ecode++;      fc = *ecode++;
3165    
3166      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2347  for (;;) Line 3185  for (;;)
3185          register unsigned int d;          register unsigned int d;
3186          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3187            {            {
3188              if (eptr >= md->end_subject)
3189                {
3190                SCHECK_PARTIAL();
3191                MRRETURN(MATCH_NOMATCH);
3192                }
3193            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3194            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3195            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3196            }            }
3197          }          }
3198        else        else
# Line 2358  for (;;) Line 3201  for (;;)
3201        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3202          {          {
3203          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3204            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3205              if (eptr >= md->end_subject)
3206                {
3207                SCHECK_PARTIAL();
3208                MRRETURN(MATCH_NOMATCH);
3209                }
3210              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3211              }
3212          }          }
3213    
3214        if (min == max) continue;        if (min == max) continue;
# Line 2372  for (;;) Line 3222  for (;;)
3222            register unsigned int d;            register unsigned int d;
3223            for (fi = min;; fi++)            for (fi = min;; fi++)
3224              {              {
3225              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
3226              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3227                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3228                if (eptr >= md->end_subject)
3229                  {
3230                  SCHECK_PARTIAL();
3231                  MRRETURN(MATCH_NOMATCH);
3232                  }
3233              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3234              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3235              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3236              }              }
3237            }            }
3238          else          else
# Line 2386  for (;;) Line 3241  for (;;)
3241            {            {
3242            for (fi = min;; fi++)            for (fi = min;; fi++)
3243              {              {
3244              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
3245              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3246              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3247                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3248                  {
3249                  SCHECK_PARTIAL();
3250                  MRRETURN(MATCH_NOMATCH);
3251                  }
3252                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3253              }              }
3254            }            }
3255          /* Control never gets here */          /* Control never gets here */
# Line 2409  for (;;) Line 3269  for (;;)
3269            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3270              {              {
3271              int len = 1;              int len = 1;
3272              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3273                  {
3274                  SCHECK_PARTIAL();
3275                  break;
3276                  }
3277              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3278              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3279              if (fc == d) break;              if (fc == d) break;
# Line 2418  for (;;) Line 3282  for (;;)
3282          if (possessive) continue;          if (possessive) continue;
3283          for(;;)          for(;;)
3284              {              {
3285              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
3286              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3287              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3288              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2430  for (;;) Line 3294  for (;;)
3294            {            {
3295            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3296              {              {
3297              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3298                  {
3299                  SCHECK_PARTIAL();
3300                  break;
3301                  }
3302                if (fc == md->lcc[*eptr]) break;
3303              eptr++;              eptr++;
3304              }              }
3305            if (possessive) continue;            if (possessive) continue;
3306            while (eptr >= pp)            while (eptr >= pp)
3307              {              {
3308              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
3309              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3310              eptr--;              eptr--;
3311              }              }
3312            }            }
3313    
3314          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3315          }          }
3316        /* Control never gets here */        /* Control never gets here */
3317        }        }
# Line 2458  for (;;) Line 3327  for (;;)
3327          register unsigned int d;          register unsigned int d;
3328          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3329            {            {
3330              if (eptr >= md->end_subject)
3331                {
3332                SCHECK_PARTIAL();
3333                MRRETURN(MATCH_NOMATCH);
3334                }
3335            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3336            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3337            }            }
3338          }          }
3339        else        else
# Line 2467  for (;;) Line 3341  for (;;)
3341        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3342          {          {
3343          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3344            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3345              if (eptr >= md->end_subject)
3346                {
3347                SCHECK_PARTIAL();
3348                MRRETURN(MATCH_NOMATCH);
3349                }
3350              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3351              }
3352          }          }
3353    
3354        if (min == max) continue;        if (min == max) continue;
# Line 2481  for (;;) Line 3362  for (;;)
3362            register unsigned int d;            register unsigned int d;
3363            for (fi = min;; fi++)            for (fi = min;; fi++)
3364              {              {
3365              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
3366              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3367                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3368                if (eptr >= md->end_subject)
3369                  {
3370                  SCHECK_PARTIAL();
3371                  MRRETURN(MATCH_NOMATCH);
3372                  }
3373              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3374              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3375              }              }
3376            }            }
3377          else          else
# Line 2494  for (;;) Line 3380  for (;;)
3380            {            {
3381            for (fi = min;; fi++)            for (fi = min;; fi++)
3382              {              {
3383              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
3384              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3385              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3386                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3387                  {
3388                  SCHECK_PARTIAL();
3389                  MRRETURN(MATCH_NOMATCH);
3390                  }
3391                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3392              }              }
3393            }            }
3394          /* Control never gets here */          /* Control never gets here */
# Line 2517  for (;;) Line 3408  for (;;)
3408            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3409              {              {
3410              int len = 1;              int len = 1;
3411              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3412                  {
3413                  SCHECK_PARTIAL();
3414                  break;
3415                  }
3416              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3417              if (fc == d) break;              if (fc == d) break;
3418              eptr += len;              eptr += len;
# Line 2525  for (;;) Line 3420  for (;;)
3420            if (possessive) continue;            if (possessive) continue;
3421            for(;;)            for(;;)
3422              {              {
3423              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
3424              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3425              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3426              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2537  for (;;) Line 3432  for (;;)
3432            {            {
3433            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3434              {              {
3435              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3436                  {
3437                  SCHECK_PARTIAL();
3438                  break;
3439                  }
3440                if (fc == *eptr) break;
3441              eptr++;              eptr++;
3442              }              }
3443            if (possessive) continue;            if (possessive) continue;
3444            while (eptr >= pp)            while (eptr >= pp)
3445              {              {
3446              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
3447              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3448              eptr--;              eptr--;
3449              }              }
3450            }            }
3451    
3452          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3453          }          }
3454        }        }
3455      /* Control never gets here */      /* Control never gets here */
# Line 2631  for (;;) Line 3531  for (;;)
3531    
3532      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3533      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3534      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3535      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3536      and single-bytes. */      and single-bytes. */
3537    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3538      if (min > 0)      if (min > 0)
3539        {        {
3540  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2646  for (;;) Line 3543  for (;;)
3543          switch(prop_type)          switch(prop_type)
3544            {            {
3545            case PT_ANY:            case PT_ANY:
3546            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3547            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3548              {              {
3549              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3550              GETCHARINC(c, eptr);                {
3551                  SCHECK_PARTIAL();
3552                  MRRETURN(MATCH_NOMATCH);
3553                  }
3554                GETCHARINCTEST(c, eptr);
3555              }              }
3556            break;            break;
3557    
3558            case PT_LAMP:            case PT_LAMP:
3559            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3560              {              {
3561              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3562              GETCHARINC(c, eptr);                {
3563              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3564                  MRRETURN(MATCH_NOMATCH);
3565                  }
3566                GETCHARINCTEST(c, eptr);
3567                prop_chartype = UCD_CHARTYPE(c);
3568              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3569                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3570                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3571                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3572              }              }
3573            break;            break;
3574    
3575            case PT_GC:            case PT_GC:
3576            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3577              {              {
3578              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3579              GETCHARINC(c, eptr);                {
3580              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3581                  MRRETURN(MATCH_NOMATCH);
3582                  }
3583                GETCHARINCTEST(c, eptr);
3584                prop_category = UCD_CATEGORY(c);
3585              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3586                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3587              }              }
3588            break;            break;
3589    
3590            case PT_PC:            case PT_PC:
3591            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3592              {              {
3593              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3594              GETCHARINC(c, eptr);                {
3595              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3596                  MRRETURN(MATCH_NOMATCH);
3597                  }
3598                GETCHARINCTEST(c, eptr);
3599                prop_chartype = UCD_CHARTYPE(c);
3600              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3601                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3602              }              }
3603            break;            break;
3604    
3605            case PT_SC:            case PT_SC:
3606            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3607              {              {
3608              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3609              GETCHARINC(c, eptr);                {
3610              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3611                  MRRETURN(MATCH_NOMATCH);
3612                  }
3613                GETCHARINCTEST(c, eptr);
3614                prop_script = UCD_SCRIPT(c);
3615              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3616                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3617                }
3618              break;
3619    
3620              case PT_ALNUM:
3621              for (i = 1; i <= min; i++)
3622                {
3623                if (eptr >= md->end_subject)
3624                  {
3625                  SCHECK_PARTIAL();
3626                  MRRETURN(MATCH_NOMATCH);
3627                  }
3628                GETCHARINCTEST(c, eptr);
3629                prop_category = UCD_CATEGORY(c);
3630                if ((prop_category == ucp_L || prop_category == ucp_N)
3631                       == prop_fail_result)
3632                  MRRETURN(MATCH_NOMATCH);
3633                }
3634              break;
3635    
3636              case PT_SPACE:    /* Perl space */
3637              for (i = 1; i <= min; i++)
3638                {
3639                if (eptr >= md->end_subject)
3640                  {
3641                  SCHECK_PARTIAL();
3642                  MRRETURN(MATCH_NOMATCH);
3643                  }
3644                GETCHARINCTEST(c, eptr);
3645                prop_category = UCD_CATEGORY(c);
3646                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3647                     c == CHAR_FF || c == CHAR_CR)
3648                       == prop_fail_result)
3649                  MRRETURN(MATCH_NOMATCH);
3650                }
3651              break;
3652    
3653              case PT_PXSPACE:  /* POSIX space */
3654              for (i = 1; i <= min; i++)
3655                {
3656                if (eptr >= md->end_subject)
3657                  {
3658                  SCHECK_PARTIAL();
3659                  MRRETURN(MATCH_NOMATCH);
3660                  }
3661                GETCHARINCTEST(c, eptr);
3662                prop_category = UCD_CATEGORY(c);
3663                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3664                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3665                       == prop_fail_result)
3666                  MRRETURN(MATCH_NOMATCH);
3667                }
3668              break;
3669    
3670              case PT_WORD:
3671              for (i = 1; i <= min; i++)
3672                {
3673                if (eptr >= md->end_subject)
3674                  {
3675                  SCHECK_PARTIAL();
3676                  MRRETURN(MATCH_NOMATCH);
3677                  }
3678                GETCHARINCTEST(c, eptr);
3679                prop_category = UCD_CATEGORY(c);
3680                if ((prop_category == ucp_L || prop_category == ucp_N ||
3681                     c == CHAR_UNDERSCORE)
3682                       == prop_fail_result)
3683                  MRRETURN(MATCH_NOMATCH);
3684              }              }
3685            break;            break;
3686    
3687              /* This should not occur */
3688    
3689            default:            default:
3690            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
3691            }            }
# Line 2712  for (;;) Line 3698  for (;;)
3698          {          {
3699          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3700            {            {
3701              if (eptr >= md->end_subject)
3702                {
3703                SCHECK_PARTIAL();
3704                MRRETURN(MATCH_NOMATCH);
3705                }
3706            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3707            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3708            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3709            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3710              {              {
3711              int len = 1;              int len = 1;
3712              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3713                {                else { GETCHARLEN(c, eptr, len); }
3714                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3715              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3716              eptr += len;              eptr += len;
3717              }              }
# Line 2740  for (;;) Line 3729  for (;;)
3729          case OP_ANY:          case OP_ANY:
3730          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3731            {            {
3732            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3733                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))              {
3734              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3735                MRRETURN(MATCH_NOMATCH);
3736                }
3737              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3738              eptr++;
3739              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3740              }
3741            break;
3742    
3743            case OP_ALLANY:
3744            for (i = 1; i <= min; i++)
3745              {
3746              if (eptr >= md->end_subject)
3747                {
3748                SCHECK_PARTIAL();
3749                MRRETURN(MATCH_NOMATCH);
3750                }
3751            eptr++;            eptr++;
3752            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3753            }            }
3754          break;          break;
3755    
3756          case OP_ANYBYTE:          case OP_ANYBYTE:
3757            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3758          eptr += min;          eptr += min;
3759          break;          break;
3760    
3761          case OP_ANYNL:          case OP_ANYNL:
3762          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3763            {            {
3764            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3765                {
3766                SCHECK_PARTIAL();
3767                MRRETURN(MATCH_NOMATCH);
3768                }
3769            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3770            switch(c)            switch(c)
3771              {              {
3772              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3773              case 0x000d:              case 0x000d:
3774              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3775              break;              break;
3776    
3777              case 0x000a:              case 0x000a:
3778                break;
3779    
3780              case 0x000b:              case 0x000b:
3781              case 0x000c:              case 0x000c:
3782              case 0x0085:              case 0x0085:
3783              case 0x2028:              case 0x2028:
3784              case 0x2029:              case 0x2029:
3785                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3786              break;              break;
3787              }              }
3788            }            }
3789          break;          break;
3790    
3791          case OP_NOT_DIGIT:          case OP_NOT_HSPACE:
3792          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3793            {            {
3794            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3795                {
3796                SCHECK_PARTIAL();
3797                MRRETURN(MATCH_NOMATCH);
3798                }
3799            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3800            if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)            switch(c)
3801              RRETURN(MATCH_NOMATCH);              {
3802                default: break;
3803                case 0x09:      /* HT */
3804                case 0x20:      /* SPACE */
3805                case 0xa0:      /* NBSP */
3806                case 0x1680:    /* OGHAM SPACE MARK */
3807                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3808                case 0x2000:    /* EN QUAD */
3809                case 0x2001:    /* EM QUAD */
3810                case 0x2002:    /* EN SPACE */
3811                case 0x2003:    /* EM SPACE */
3812                case 0x2004:    /* THREE-PER-EM SPACE */
3813                case 0x2005:    /* FOUR-PER-EM SPACE */
3814                case 0x2006:    /* SIX-PER-EM SPACE */
3815                case 0x2007:    /* FIGURE SPACE */
3816                case 0x2008:    /* PUNCTUATION SPACE */
3817                case 0x2009:    /* THIN SPACE */
3818                case 0x200A:    /* HAIR SPACE */
3819                case 0x202f:    /* NARROW NO-BREAK SPACE */
3820                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3821                case 0x3000:    /* IDEOGRAPHIC SPACE */
3822                MRRETURN(MATCH_NOMATCH);
3823                }
3824              }
3825            break;
3826    
3827            case OP_HSPACE:
3828            for (i = 1; i <= min; i++)
3829              {
3830              if (eptr >= md->end_subject)
3831                {
3832                SCHECK_PARTIAL();
3833                MRRETURN(MATCH_NOMATCH);
3834                }
3835              GETCHARINC(c, eptr);
3836              switch(c)
3837                {
3838                default: MRRETURN(MATCH_NOMATCH);
3839                case 0x09:      /* HT */
3840                case 0x20:      /* SPACE */
3841                case 0xa0:      /* NBSP */
3842                case 0x1680:    /* OGHAM SPACE MARK */
3843                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3844                case 0x2000:    /* EN QUAD */
3845                case 0x2001:    /* EM QUAD */
3846                case 0x2002:    /* EN SPACE */
3847                case 0x2003:    /* EM SPACE */
3848                case 0x2004:    /* THREE-PER-EM SPACE */
3849                case 0x2005:    /* FOUR-PER-EM SPACE */
3850                case 0x2006:    /* SIX-PER-EM SPACE */
3851                case 0x2007:    /* FIGURE SPACE */
3852                case 0x2008:    /* PUNCTUATION SPACE */
3853                case 0x2009:    /* THIN SPACE */
3854                case 0x200A:    /* HAIR SPACE */
3855                case 0x202f:    /* NARROW NO-BREAK SPACE */
3856                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3857                case 0x3000:    /* IDEOGRAPHIC SPACE */
3858                break;
3859                }
3860              }
3861            break;
3862    
3863            case OP_NOT_VSPACE:
3864            for (i = 1; i <= min; i++)
3865              {
3866              if (eptr >= md->end_subject)
3867                {
3868                SCHECK_PARTIAL();
3869                MRRETURN(MATCH_NOMATCH);
3870                }
3871              GETCHARINC(c, eptr);
3872              switch(c)
3873                {
3874                default: break;
3875                case 0x0a:      /* LF */
3876                case 0x0b:      /* VT */
3877                case 0x0c:      /* FF */
3878                case 0x0d:      /* CR */
3879                case 0x85:      /* NEL */
3880                case 0x2028:    /* LINE SEPARATOR */
3881                case 0x2029:    /* PARAGRAPH SEPARATOR */
3882                MRRETURN(MATCH_NOMATCH);
3883                }
3884              }
3885            break;
3886    
3887            case OP_VSPACE:
3888            for (i = 1; i <= min; i++)
3889              {
3890              if (eptr >= md->end_subject)
3891                {
3892                SCHECK_PARTIAL();
3893                MRRETURN(MATCH_NOMATCH);
3894                }
3895              GETCHARINC(c, eptr);
3896              switch(c)
3897                {
3898                default: MRRETURN(MATCH_NOMATCH);
3899                case 0x0a:      /* LF */
3900                case 0x0b:      /* VT */
3901                case 0x0c:      /* FF */
3902                case 0x0d:      /* CR */
3903                case 0x85:      /* NEL */
3904                case 0x2028:    /* LINE SEPARATOR */
3905                case 0x2029:    /* PARAGRAPH SEPARATOR */
3906                break;
3907                }
3908              }
3909            break;
3910    
3911            case OP_NOT_DIGIT:
3912            for (i = 1; i <= min; i++)
3913              {
3914              if (eptr >= md->end_subject)
3915                {
3916                SCHECK_PARTIAL();
3917                MRRETURN(MATCH_NOMATCH);
3918                }
3919              GETCHARINC(c, eptr);
3920              if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
3921                MRRETURN(MATCH_NOMATCH);
3922            }            }
3923          break;          break;
3924    
3925          case OP_DIGIT:          case OP_DIGIT:
3926          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3927            {            {
3928            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3929               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)              {
3930              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3931                MRRETURN(MATCH_NOMATCH);
3932                }
3933              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
3934                MRRETURN(MATCH_NOMATCH);
3935            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3936            }            }
3937          break;          break;
# Line 2797  for (;;) Line 3939  for (;;)
3939          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
3940          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3941            {            {
3942            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3943               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))              {
3944              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3945            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3946                }
3947              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
3948                MRRETURN(MATCH_NOMATCH);
3949              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3950            }            }
3951          break;          break;
3952    
3953          case OP_WHITESPACE:          case OP_WHITESPACE:
3954          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3955            {            {
3956            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3957               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)              {
3958              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3959                MRRETURN(MATCH_NOMATCH);
3960                }
3961              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
3962                MRRETURN(MATCH_NOMATCH);
3963            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3964            }            }
3965          break;          break;
# Line 2817  for (;;) Line 3967  for (;;)
3967          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
3968          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3969            {            {
3970            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3971               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))              {
3972              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3973            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              MRRETURN(MATCH_NOMATCH);
3974                }
3975              if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
3976                MRRETURN(MATCH_NOMATCH);
3977              while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3978            }            }
3979          break;          break;
3980    
3981          case OP_WORDCHAR:          case OP_WORDCHAR:
3982          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3983            {            {
3984            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3985               *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)              {
3986              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3987                MRRETURN(MATCH_NOMATCH);
3988                }
3989              if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
3990                MRRETURN(MATCH_NOMATCH);
3991            /* No need to skip more bytes - we know it's a 1-byte character */            /* No need to skip more bytes - we know it's a 1-byte character */
3992            }            }
3993          break;          break;
# Line 2842  for (;;) Line 4000  for (;;)
4000  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
4001    
4002        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
4003        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum        than OP_PROP and OP_NOTPROP. */
       number of bytes present, as this was tested above. */  
4004    
4005        switch(ctype)        switch(ctype)
4006          {          {
4007          case OP_ANY:          case OP_ANY:
4008          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
4009            {            {
4010            for (i = 1; i <= min; i++)            if (eptr >= md->end_subject)
4011              {              {
4012              if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
4013              eptr++;              MRRETURN(MATCH_NOMATCH);
4014              }              }
4015              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
4016              eptr++;
4017            }            }
         else eptr += min;  
4018          break;          break;
4019    
4020          case OP_ANYBYTE:          case OP_ALLANY:
4021            if (eptr > md->end_subject - min)
4022              {
4023              SCHECK_PARTIAL();
4024              MRRETURN(MATCH_NOMATCH);
4025              }
4026          eptr += min;          eptr += min;
4027          break;          break;
4028    
4029          /* Because of the CRLF case, we can't assume the minimum number of          case OP_ANYBYTE:
4030          bytes are present in this case. */          if (eptr > md->end_subject - min)
4031              {
4032              SCHECK_PARTIAL();
4033              MRRETURN(MATCH_NOMATCH);
4034              }
4035            eptr += min;
4036            break;
4037    
4038          case OP_ANYNL:          case OP_ANYNL:
4039          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4040            {            {
4041            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
4042                {
4043                SCHECK_PARTIAL();
4044                MRRETURN(MATCH_NOMATCH);
4045                }
4046            switch(*eptr++)            switch(*eptr++)
4047              {              {
4048              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
4049              case 0x000d:              case 0x000d:
4050              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
4051              break;              break;
4052              case 0x000a:              case 0x000a:
4053                break;
4054    
4055              case 0x000b:              case 0x000b:
4056              case 0x000c:              case 0x000c:
4057              case 0x0085:              case 0x0085:
4058                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
4059                break;
4060                }
4061              }
4062            break;
4063    
4064            case OP_NOT_HSPACE:
4065            for (i = 1; i <= min; i++)
4066              {
4067              if (eptr >= md->end_subject)
4068                {
4069                SCHECK_PARTIAL();
4070                MRRETURN(MATCH_NOMATCH);
4071                }
4072              switch(*eptr++)
4073                {
4074                default: break;
4075                case 0x09:      /* HT */
4076                case 0x20:      /* SPACE */
4077                case 0xa0:      /* NBSP */
4078                MRRETURN(MATCH_NOMATCH);
4079                }
4080              }
4081            break;
4082    
4083            case OP_HSPACE:
4084            for (i = 1; i <= min; i++)
4085              {
4086              if (eptr >= md->end_subject)
4087                {
4088                SCHECK_PARTIAL();
4089                MRRETURN(MATCH_NOMATCH);
4090                }
4091              switch(*eptr++)
4092                {
4093                default: MRRETURN(MATCH_NOMATCH);
4094                case 0x09:      /* HT */
4095                case 0x20:      /* SPACE */
4096                case 0xa0:      /* NBSP */
4097                break;
4098                }
4099              }
4100            break;
4101    
4102            case OP_NOT_VSPACE:
4103            for (i = 1; i <= min; i++)
4104              {
4105              if (eptr >= md->end_subject)
4106                {
4107                SCHECK_PARTIAL();
4108                MRRETURN(MATCH_NOMATCH);
4109                }
4110              switch(*eptr++)
4111                {
4112                default: break;
4113                case 0x0a:      /* LF */
4114                case 0x0b:      /* VT */
4115                case 0x0c:      /* FF */
4116                case 0x0d:      /* CR */
4117                case 0x85:      /* NEL */
4118                MRRETURN(MATCH_NOMATCH);
4119                }
4120              }
4121            break;
4122    
4123            case OP_VSPACE:
4124            for (i = 1; i <= min; i++)
4125              {
4126              if (eptr >= md->end_subject)
4127                {
4128                SCHECK_PARTIAL();
4129                MRRETURN(MATCH_NOMATCH);
4130                }
4131              switch(*eptr++)
4132                {
4133                default: MRRETURN(MATCH_NOMATCH);
4134                case 0x0a:      /* LF */
4135                case 0x0b:      /* VT */
4136                case 0x0c:      /* FF */
4137                case 0x0d:      /* CR */
4138                case 0x85:      /* NEL */
4139              break;              break;
4140              }              }
4141            }            }
# Line 2887  for (;;) Line 4143  for (;;)
4143    
4144          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
4145          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4146            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            {
4147              if (eptr >= md->end_subject)
4148                {
4149                SCHECK_PARTIAL();
4150                MRRETURN(MATCH_NOMATCH);
4151                }
4152              if ((md->ctypes[*eptr++] & ctype_digit) != 0) MRRETURN(MATCH_NOMATCH);
4153              }
4154          break;          break;
4155    
4156          case OP_DIGIT:          case OP_DIGIT:
4157          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4158            if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);            {
4159              if (eptr >= md->end_subject)
4160                {
4161                SCHECK_PARTIAL();
4162                MRRETURN(MATCH_NOMATCH);
4163                }
4164              if ((md->ctypes[*eptr++] & ctype_digit) == 0) MRRETURN(MATCH_NOMATCH);
4165              }
4166          break;          break;
4167    
4168          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4169          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4170            if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);            {
4171              if (eptr >= md->end_subject)
4172                {
4173                SCHECK_PARTIAL();
4174                MRRETURN(MATCH_NOMATCH);
4175                }
4176              if ((md->ctypes[*eptr++] & ctype_space) != 0) MRRETURN(MATCH_NOMATCH);
4177              }
4178          break;          break;
4179    
4180          case OP_WHITESPACE:          case OP_WHITESPACE:
4181          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4182            if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);            {
4183              if (eptr >= md->end_subject)
4184                {
4185                SCHECK_PARTIAL();
4186                MRRETURN(MATCH_NOMATCH);
4187                }
4188              if ((md->ctypes[*eptr++] & ctype_space) == 0) MRRETURN(MATCH_NOMATCH);
4189              }
4190          break;          break;
4191    
4192          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4193          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4194              {
4195              if (eptr >= md->end_subject)
4196                {
4197                SCHECK_PARTIAL();
4198                MRRETURN(MATCH_NOMATCH);
4199                }
4200            if ((md->ctypes[*eptr++] & ctype_word) != 0)            if ((md->ctypes[*eptr++] & ctype_word) != 0)
4201              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4202              }
4203          break;          break;
4204    
4205          case OP_WORDCHAR:          case OP_WORDCHAR:
4206          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4207              {
4208              if (eptr >= md->end_subject)
4209                {
4210                SCHECK_PARTIAL();
4211                MRRETURN(MATCH_NOMATCH);
4212                }
4213            if ((md->ctypes[*eptr++] & ctype_word) == 0)            if ((md->ctypes[*eptr++] & ctype_word) == 0)
4214              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
4215              }
4216          break;          break;
4217    
4218          default:          default:
# Line 2940  for (;;) Line 4238  for (;;)
4238            case PT_ANY:            case PT_ANY:
4239            for (fi = min;; fi++)            for (fi = min;; fi++)
4240              {              {
4241              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
4242              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4243              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4244              GETCHARINC(c, eptr);              if (eptr >= md->end_subject)
4245              if (prop_fail_result) RRETURN(MATCH_NOMATCH);                {
4246                  SCHECK_PARTIAL();
4247                  MRRETURN(MATCH_NOMATCH);
4248                  }
4249                GETCHARINCTEST(c, eptr);
4250                if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
4251              }              }
4252            /* Control never gets here */            /* Control never gets here */
4253    
4254            case PT_LAMP:            case PT_LAMP:
4255            for (fi = min;; fi++)            for (fi = min;; fi++)
4256              {              {
4257              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
4258              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4259              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
4260              GETCHARINC(c, eptr);              if (eptr >= md->end_subject)
4261              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                {
4262                  SCHECK_PARTIAL();
4263                  MRRETURN(MATCH_NOMATCH);
4264                  }
4265                GETCHARINCTEST(c, eptr);
4266                prop_chartype = UCD_CHARTYPE(c);
4267              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
4268                   prop_chartype == ucp_Ll ||</