/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC revision 602 by ph10, Wed May 25 08:29:03 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2011 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include "config.h"
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55  /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  /* Undefine some potentially clashing cpp symbols */
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
56    
57  #define EPTR_WORK_SIZE (1000)  #undef min
58    #undef max
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 65  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_ACCEPT       (-999)
75    #define MATCH_COMMIT       (-998)
76    #define MATCH_PRUNE        (-997)
77    #define MATCH_SKIP         (-996)
78    #define MATCH_SKIP_ARG     (-995)
79    #define MATCH_THEN         (-994)
80    
81    /* This is a convenience macro for code that occurs many times. */
82    
83    #define MRRETURN(ra) \
84      { \
85      md->mark = markptr; \
86      RRETURN(ra); \
87      }
88    
89  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
90  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
91  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 78  static const char rep_max[] = { 0, 0, 0, Line 99  static const char rep_max[] = { 0, 0, 0,
99    
100    
101    
102  #ifdef DEBUG  #ifdef PCRE_DEBUG
103  /*************************************************  /*************************************************
104  *        Debugging function to print chars       *  *        Debugging function to print chars       *
105  *************************************************/  *************************************************/
# Line 111  while (length-- > 0) Line 132  while (length-- > 0)
132  *          Match a back-reference                *  *          Match a back-reference                *
133  *************************************************/  *************************************************/
134    
135  /* If a back reference hasn't been set, the length that is passed is greater  /* Normally, if a back reference hasn't been set, the length that is passed is
136  than the number of characters left in the string, so the match fails.  negative, so the match always fails. However, in JavaScript compatibility mode,
137    the length passed is zero. Note that in caseless UTF-8 mode, the number of
138    subject bytes matched may be different to the number of reference bytes.
139    
140  Arguments:  Arguments:
141    offset      index into the offset vector    offset      index into the offset vector
142    eptr        points into the subject    eptr        pointer into the subject
143    length      length to be matched    length      length of reference to be matched (number of bytes)
144    md          points to match data block    md          points to match data block
145    ims         the ims flags    caseless    TRUE if caseless
146    
147  Returns:      TRUE if matched  Returns:      < 0 if not matched, otherwise the number of subject bytes matched
148  */  */
149    
150  static BOOL  static int
151  match_ref(int offset, register USPTR eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
152    unsigned long int ims)    BOOL caseless)
153  {  {
154  USPTR p = md->start_subject + md->offset_vector[offset];  USPTR eptr_start = eptr;
155    register USPTR p = md->start_subject + md->offset_vector[offset];
156    
157  #ifdef DEBUG  #ifdef PCRE_DEBUG
158  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
159    printf("matching subject <null>");    printf("matching subject <null>");
160  else  else
# Line 143  pchars(p, length, FALSE, md); Line 167  pchars(p, length, FALSE, md);
167  printf("\n");  printf("\n");
168  #endif  #endif
169    
170  /* Always fail if not enough characters left */  /* Always fail if reference not set (and not JavaScript compatible). */
171    
172  if (length > md->end_subject - eptr) return FALSE;  if (length < 0) return -1;
173    
174  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
175    properly if Unicode properties are supported. Otherwise, we can check only
176    ASCII characters. */
177    
178  if ((ims & PCRE_CASELESS) != 0)  if (caseless)
179    {    {
180    while (length-- > 0)  #ifdef SUPPORT_UTF8
181      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;  #ifdef SUPPORT_UCP
182      if (md->utf8)
183        {
184        /* Match characters up to the end of the reference. NOTE: the number of
185        bytes matched may differ, because there are some characters whose upper and
186        lower case versions code as different numbers of bytes. For example, U+023A
187        (2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8);
188        a sequence of 3 of the former uses 6 bytes, as does a sequence of two of
189        the latter. It is important, therefore, to check the length along the
190        reference, not along the subject (earlier code did this wrong). */
191    
192        USPTR endptr = p + length;
193        while (p < endptr)
194          {
195          int c, d;
196          if (eptr >= md->end_subject) return -1;
197          GETCHARINC(c, eptr);
198          GETCHARINC(d, p);
199          if (c != d && c != UCD_OTHERCASE(d)) return -1;
200          }
201        }
202      else
203    #endif
204    #endif
205    
206      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
207      is no UCP support. */
208        {
209        if (eptr + length > md->end_subject) return -1;
210        while (length-- > 0)
211          { if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; }
212        }
213    }    }
214    
215    /* In the caseful case, we can just compare the bytes, whether or not we
216    are in UTF-8 mode. */
217    
218  else  else
219    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    {
220      if (eptr + length > md->end_subject) return -1;
221      while (length-- > 0) if (*p++ != *eptr++) return -1;
222      }
223    
224  return TRUE;  return eptr - eptr_start;
225  }  }
226    
227    
# Line 183  calls by keeping local variables that ne Line 247  calls by keeping local variables that ne
247  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
248  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
249  always used to.  always used to.
250    
251    The original heap-recursive code used longjmp(). However, it seems that this
252    can be very slow on some operating systems. Following a suggestion from Stan
253    Switzer, the use of longjmp() has been abolished, at the cost of having to
254    provide a unique number for each call to RMATCH. There is no way of generating
255    a sequence of numbers at compile time in C. I have given them names, to make
256    them stand out more clearly.
257    
258    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
259    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
260    tests. Furthermore, not using longjmp() means that local dynamic variables
261    don't have indeterminate values; this has meant that the frame size can be
262    reduced because the result can be "passed back" by straight setting of the
263    variable instead of being passed in the frame.
264  ****************************************************************************  ****************************************************************************
265  ***************************************************************************/  ***************************************************************************/
266    
267    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
268    below must be updated in sync.  */
269    
270    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
271           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
272           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
273           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
274           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
275           RM51,  RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
276           RM61,  RM62 };
277    
278  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
279  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
280    actually used in this definition. */
281    
282  #ifndef NO_RECURSE  #ifndef NO_RECURSE
283  #define REGISTER register  #define REGISTER register
284  #ifdef DEBUG  
285  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #ifdef PCRE_DEBUG
286    #define RMATCH(ra,rb,rc,rd,re,rf,rw) \
287    { \    { \
288    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
289    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rdepth+1); \
290    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
291    }    }
292  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 205  versions and production versions. */ Line 295  versions and production versions. */
295    return ra; \    return ra; \
296    }    }
297  #else  #else
298  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rw) \
299    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rdepth+1)
300  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
301  #endif  #endif
302    
303  #else  #else
304    
305    
306  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
307  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
308  match(), which never changes. */  argument of match(), which never changes. */
309    
310  #define REGISTER  #define REGISTER
311    
312  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rw)\
313    {\    {\
314    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\
315    if (setjmp(frame->Xwhere) == 0)\    if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
316      {\    frame->Xwhere = rw; \
317      newframe->Xeptr = ra;\    newframe->Xeptr = ra;\
318      newframe->Xecode = rb;\    newframe->Xecode = rb;\
319      newframe->Xoffset_top = rc;\    newframe->Xmstart = mstart;\
320      newframe->Xims = re;\    newframe->Xmarkptr = markptr;\
321      newframe->Xeptrb = rf;\    newframe->Xoffset_top = rc;\
322      newframe->Xflags = rg;\    newframe->Xeptrb = re;\
323      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xflags = rf;\
324      newframe->Xprevframe = frame;\    newframe->Xrdepth = frame->Xrdepth + 1;\
325      frame = newframe;\    newframe->Xprevframe = frame;\
326      DPRINTF(("restarting from line %d\n", __LINE__));\    frame = newframe;\
327      goto HEAP_RECURSE;\    DPRINTF(("restarting from line %d\n", __LINE__));\
328      }\    goto HEAP_RECURSE;\
329    else\    L_##rw:\
330      {\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
331    }    }
332    
333  #define RRETURN(ra)\  #define RRETURN(ra)\
334    {\    {\
335    heapframe *newframe = frame;\    heapframe *oldframe = frame;\
336    frame = newframe->Xprevframe;\    frame = oldframe->Xprevframe;\
337    (pcre_stack_free)(newframe);\    (pcre_stack_free)(oldframe);\
338    if (frame != NULL)\    if (frame != NULL)\
339      {\      {\
340      frame->Xresult = ra;\      rrc = ra;\
341      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
342      }\      }\
343    return ra;\    return ra;\
344    }    }
# Line 266  typedef struct heapframe { Line 351  typedef struct heapframe {
351    
352    /* Function arguments that may change */    /* Function arguments that may change */
353    
354    const uschar *Xeptr;    USPTR Xeptr;
355    const uschar *Xecode;    const uschar *Xecode;
356      USPTR Xmstart;
357      USPTR Xmarkptr;
358    int Xoffset_top;    int Xoffset_top;
   long int Xims;  
359    eptrblock *Xeptrb;    eptrblock *Xeptrb;
360    int Xflags;    int Xflags;
361    unsigned int Xrdepth;    unsigned int Xrdepth;
362    
363    /* Function local variables */    /* Function local variables */
364    
365    const uschar *Xcallpat;    USPTR Xcallpat;
366    const uschar *Xcharptr;  #ifdef SUPPORT_UTF8
367    const uschar *Xdata;    USPTR Xcharptr;
368    const uschar *Xnext;  #endif
369    const uschar *Xpp;    USPTR Xdata;
370    const uschar *Xprev;    USPTR Xnext;
371    const uschar *Xsaved_eptr;    USPTR Xpp;
372      USPTR Xprev;
373      USPTR Xsaved_eptr;
374    
375    recursion_info Xnew_recursive;    recursion_info Xnew_recursive;
376    
# Line 290  typedef struct heapframe { Line 378  typedef struct heapframe {
378    BOOL Xcondition;    BOOL Xcondition;
379    BOOL Xprev_is_word;    BOOL Xprev_is_word;
380    
   unsigned long int Xoriginal_ims;  
   
381  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
382    int Xprop_type;    int Xprop_type;
383    int Xprop_value;    int Xprop_value;
# Line 299  typedef struct heapframe { Line 385  typedef struct heapframe {
385    int Xprop_category;    int Xprop_category;
386    int Xprop_chartype;    int Xprop_chartype;
387    int Xprop_script;    int Xprop_script;
388      int Xoclength;
389      uschar Xocchars[8];
390  #endif  #endif
391    
392      int Xcodelink;
393    int Xctype;    int Xctype;
394    unsigned int Xfc;    unsigned int Xfc;
395    int Xfi;    int Xfi;
# Line 316  typedef struct heapframe { Line 405  typedef struct heapframe {
405    
406    eptrblock Xnewptrb;    eptrblock Xnewptrb;
407    
408    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
409    
410    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
411    
412  } heapframe;  } heapframe;
413    
# Line 337  typedef struct heapframe { Line 425  typedef struct heapframe {
425    
426  /* This function is called recursively in many circumstances. Whenever it  /* This function is called recursively in many circumstances. Whenever it
427  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
428  same response.  same response. */
429    
430    /* These macros pack up tests that are used for partial matching, and which
431    appears several times in the code. We set the "hit end" flag if the pointer is
432    at the end of the subject and also past the start of the subject (i.e.
433    something has been matched). For hard partial matching, we then return
434    immediately. The second one is used when we already know we are past the end of
435    the subject. */
436    
437    #define CHECK_PARTIAL()\
438      if (md->partial != 0 && eptr >= md->end_subject && \
439          eptr > md->start_used_ptr) \
440        { \
441        md->hitend = TRUE; \
442        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
443        }
444    
445  Performance note: It might be tempting to extract commonly used fields from the  #define SCHECK_PARTIAL()\
446  md structure (e.g. utf8, end_subject) into individual variables to improve    if (md->partial != 0 && eptr > md->start_used_ptr) \
447        { \
448        md->hitend = TRUE; \
449        if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \
450        }
451    
452    
453    /* Performance note: It might be tempting to extract commonly used fields from
454    the md structure (e.g. utf8, end_subject) into individual variables to improve
455  performance. Tests using gcc on a SPARC disproved this; in the first case, it  performance. Tests using gcc on a SPARC disproved this; in the first case, it
456  made performance worse.  made performance worse.
457    
458  Arguments:  Arguments:
459     eptr        pointer to current character in subject     eptr        pointer to current character in subject
460     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
461       mstart      pointer to the current match start position (can be modified
462                     by encountering \K)
463       markptr     pointer to the most recent MARK name, or NULL
464     offset_top  current top pointer     offset_top  current top pointer
465     md          pointer to "static" info for the match     md          pointer to "static" info for the match
    ims         current /i, /m, and /s options  
466     eptrb       pointer to chain of blocks containing eptr at start of     eptrb       pointer to chain of blocks containing eptr at start of
467                   brackets - for testing for empty matches                   brackets - for testing for empty matches
468     flags       can contain     flags       can contain
469                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
470                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
471                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
472     rdepth      the recursion depth     rdepth      the recursion depth
473    
474  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
475                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
476                   a negative MATCH_xxx value for PRUNE, SKIP, etc
477                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
478                   (e.g. stopped by repeated call or recursion limit)                   (e.g. stopped by repeated call or recursion limit)
479  */  */
480    
481  static int  static int
482  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart,
483    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    const uschar *markptr, int offset_top, match_data *md, eptrblock *eptrb,
484    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
485  {  {
486  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
# Line 380  register unsigned int c; /* Character Line 493  register unsigned int c; /* Character
493  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
494    
495  BOOL minimize, possessive; /* Quantifier options */  BOOL minimize, possessive; /* Quantifier options */
496    BOOL caseless;
497    int condcode;
498    
499  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
500  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 387  heap storage. Set up the top-level frame Line 502  heap storage. Set up the top-level frame
502  heap whenever RMATCH() does a "recursion". See the macro definitions above. */  heap whenever RMATCH() does a "recursion". See the macro definitions above. */
503    
504  #ifdef NO_RECURSE  #ifdef NO_RECURSE
505  heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));  heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));
506    if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
507  frame->Xprevframe = NULL;            /* Marks the top level */  frame->Xprevframe = NULL;            /* Marks the top level */
508    
509  /* Copy in the original argument variables */  /* Copy in the original argument variables */
510    
511  frame->Xeptr = eptr;  frame->Xeptr = eptr;
512  frame->Xecode = ecode;  frame->Xecode = ecode;
513    frame->Xmstart = mstart;
514    frame->Xmarkptr = markptr;
515  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
 frame->Xims = ims;  
516  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
517  frame->Xflags = flags;  frame->Xflags = flags;
518  frame->Xrdepth = rdepth;  frame->Xrdepth = rdepth;
# Line 408  HEAP_RECURSE: Line 525  HEAP_RECURSE:
525    
526  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
527  #define ecode              frame->Xecode  #define ecode              frame->Xecode
528    #define mstart             frame->Xmstart
529    #define markptr            frame->Xmarkptr
530  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
 #define ims                frame->Xims  
531  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
532  #define flags              frame->Xflags  #define flags              frame->Xflags
533  #define rdepth             frame->Xrdepth  #define rdepth             frame->Xrdepth
# Line 420  HEAP_RECURSE: Line 538  HEAP_RECURSE:
538  #define charptr            frame->Xcharptr  #define charptr            frame->Xcharptr
539  #endif  #endif
540  #define callpat            frame->Xcallpat  #define callpat            frame->Xcallpat
541    #define codelink           frame->Xcodelink
542  #define data               frame->Xdata  #define data               frame->Xdata
543  #define next               frame->Xnext  #define next               frame->Xnext
544  #define pp                 frame->Xpp  #define pp                 frame->Xpp
# Line 432  HEAP_RECURSE: Line 551  HEAP_RECURSE:
551  #define condition          frame->Xcondition  #define condition          frame->Xcondition
552  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
553    
 #define original_ims       frame->Xoriginal_ims  
   
554  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
555  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
556  #define prop_value         frame->Xprop_value  #define prop_value         frame->Xprop_value
# Line 441  HEAP_RECURSE: Line 558  HEAP_RECURSE:
558  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
559  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
560  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
561    #define oclength           frame->Xoclength
562    #define occhars            frame->Xocchars
563  #endif  #endif
564    
565  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 485  BOOL cur_is_word; /* a Line 604  BOOL cur_is_word; /* a
604  BOOL condition;  BOOL condition;
605  BOOL prev_is_word;  BOOL prev_is_word;
606    
 unsigned long int original_ims;  
   
607  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
608  int prop_type;  int prop_type;
609  int prop_value;  int prop_value;
# Line 494  int prop_fail_result; Line 611  int prop_fail_result;
611  int prop_category;  int prop_category;
612  int prop_chartype;  int prop_chartype;
613  int prop_script;  int prop_script;
614    int oclength;
615    uschar occhars[8];
616  #endif  #endif
617    
618    int codelink;
619  int ctype;  int ctype;
620  int length;  int length;
621  int max;  int max;
# Line 529  TAIL_RECURSE: Line 649  TAIL_RECURSE:
649  /* OK, now we can get on with the real code of the function. Recursive calls  /* OK, now we can get on with the real code of the function. Recursive calls
650  are specified by the macro RMATCH and RRETURN is used to return. When  are specified by the macro RMATCH and RRETURN is used to return. When
651  NO_RECURSE is *not* defined, these just turn into a recursive call to match()  NO_RECURSE is *not* defined, these just turn into a recursive call to match()
652  and a "return", respectively (possibly with some debugging if DEBUG is  and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
653  defined). However, RMATCH isn't like a function call because it's quite a  defined). However, RMATCH isn't like a function call because it's quite a
654  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
655  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
656    
 /* First check that we haven't called match() too many times, or that we  
 haven't exceeded the recursive call limit. */  
   
 if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  
 if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);  
   
 original_ims = ims;    /* Save for resetting on ')' */  
   
657  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
658  utf8 = md->utf8;       /* Local copy of the flag */  utf8 = md->utf8;       /* Local copy of the flag */
659  #else  #else
660  utf8 = FALSE;  utf8 = FALSE;
661  #endif  #endif
662    
663    /* First check that we haven't called match() too many times, or that we
664    haven't exceeded the recursive call limit. */
665    
666    if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
667    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
668    
669  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
670  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
671  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
672  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
673  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
674  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
675  already used. */  block that is used is on the stack, so a new one may be required for each
676    match(). */
677    
678  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
679    {    {
680    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
681    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
682      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
683    }    }
684    
685  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 577  for (;;) Line 689  for (;;)
689    minimize = possessive = FALSE;    minimize = possessive = FALSE;
690    op = *ecode;    op = *ecode;
691    
   /* For partial matching, remember if we ever hit the end of the subject after  
   matching at least one subject character. */  
   
   if (md->partial &&  
       eptr >= md->end_subject &&  
       eptr > md->start_match)  
     md->hitend = TRUE;  
   
692    switch(op)    switch(op)
693      {      {
694        case OP_MARK:
695        markptr = ecode + 2;
696        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
697          eptrb, flags, RM55);
698    
699        /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
700        argument, and we must check whether that argument matches this MARK's
701        argument. It is passed back in md->start_match_ptr (an overloading of that
702        variable). If it does match, we reset that variable to the current subject
703        position and return MATCH_SKIP. Otherwise, pass back the return code
704        unaltered. */
705    
706        if (rrc == MATCH_SKIP_ARG &&
707            strcmp((char *)markptr, (char *)(md->start_match_ptr)) == 0)
708          {
709          md->start_match_ptr = eptr;
710          RRETURN(MATCH_SKIP);
711          }
712    
713        if (md->mark == NULL) md->mark = markptr;
714        RRETURN(rrc);
715    
716        case OP_FAIL:
717        MRRETURN(MATCH_NOMATCH);
718    
719        /* COMMIT overrides PRUNE, SKIP, and THEN */
720    
721        case OP_COMMIT:
722        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
723          eptrb, flags, RM52);
724        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE &&
725            rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG &&
726            rrc != MATCH_THEN)
727          RRETURN(rrc);
728        MRRETURN(MATCH_COMMIT);
729    
730        /* PRUNE overrides THEN */
731    
732        case OP_PRUNE:
733        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
734          eptrb, flags, RM51);
735        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
736        MRRETURN(MATCH_PRUNE);
737    
738        case OP_PRUNE_ARG:
739        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
740          eptrb, flags, RM56);
741        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
742        md->mark = ecode + 2;
743        RRETURN(MATCH_PRUNE);
744    
745        /* SKIP overrides PRUNE and THEN */
746    
747        case OP_SKIP:
748        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
749          eptrb, flags, RM53);
750        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
751          RRETURN(rrc);
752        md->start_match_ptr = eptr;   /* Pass back current position */
753        MRRETURN(MATCH_SKIP);
754    
755        case OP_SKIP_ARG:
756        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md,
757          eptrb, flags, RM57);
758        if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN)
759          RRETURN(rrc);
760    
761        /* Pass back the current skip name by overloading md->start_match_ptr and
762        returning the special MATCH_SKIP_ARG return code. This will either be
763        caught by a matching MARK, or get to the top, where it is treated the same
764        as PRUNE. */
765    
766        md->start_match_ptr = ecode + 2;
767        RRETURN(MATCH_SKIP_ARG);
768    
769        /* For THEN (and THEN_ARG) we pass back the address of the bracket or
770        the alt that is at the start of the current branch. This makes it possible
771        to skip back past alternatives that precede the THEN within the current
772        branch. */
773    
774        case OP_THEN:
775        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
776          eptrb, flags, RM54);
777        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
778        md->start_match_ptr = ecode - GET(ecode, 1);
779        MRRETURN(MATCH_THEN);
780    
781        case OP_THEN_ARG:
782        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE],
783          offset_top, md, eptrb, flags, RM58);
784        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
785        md->start_match_ptr = ecode - GET(ecode, 1);
786        md->mark = ecode + LINK_SIZE + 2;
787        RRETURN(MATCH_THEN);
788    
789      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
790      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
791      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 606  for (;;) Line 805  for (;;)
805      number = GET2(ecode, 1+LINK_SIZE);      number = GET2(ecode, 1+LINK_SIZE);
806      offset = number << 1;      offset = number << 1;
807    
808  #ifdef DEBUG  #ifdef PCRE_DEBUG
809      printf("start bracket %d\n", number);      printf("start bracket %d\n", number);
810      printf("subject=");      printf("subject=");
811      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
# Line 621  for (;;) Line 820  for (;;)
820        save_capture_last = md->capture_last;        save_capture_last = md->capture_last;
821    
822        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
823        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] =
824            (int)(eptr - md->start_subject);
825    
826        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
827        do        do
828          {          {
829          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
830            ims, eptrb, flags);            eptrb, flags, RM1);
831          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH &&
832                (rrc != MATCH_THEN || md->start_match_ptr != ecode))
833              RRETURN(rrc);
834          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
835          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
836          }          }
# Line 640  for (;;) Line 842  for (;;)
842        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
843        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
844    
845          if (rrc != MATCH_THEN) md->mark = markptr;
846        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
847        }        }
848    
849      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
850      bracket. */      as a non-capturing bracket. */
851    
852        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
853        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
854    
855      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
856    
857        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
858        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
859    
860      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
861      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
862      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
863      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
864        is set.*/
865    
866      case OP_BRA:      case OP_BRA:
867      case OP_SBRA:      case OP_SBRA:
# Line 659  for (;;) Line 869  for (;;)
869      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
870      for (;;)      for (;;)
871        {        {
872        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
873          {          {
874          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
875          flags |= match_tail_recursed;            {
876          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
877          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
878              goto TAIL_RECURSE;
879              }
880    
881            /* Possibly empty group; can't use tail recursion. */
882    
883            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
884              flags, RM48);
885            if (rrc == MATCH_NOMATCH) md->mark = markptr;
886            RRETURN(rrc);
887          }          }
888    
889        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
890        otherwise return. */        otherwise return. */
891    
892        RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb,
893          eptrb, flags);          flags, RM2);
894        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH &&
895              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
896            RRETURN(rrc);
897        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
898        }        }
899      /* Control never reaches here. */      /* Control never reaches here. */
# Line 685  for (;;) Line 906  for (;;)
906    
907      case OP_COND:      case OP_COND:
908      case OP_SCOND:      case OP_SCOND:
909      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      codelink= GET(ecode, 1);
910    
911        /* Because of the way auto-callout works during compile, a callout item is
912        inserted between OP_COND and an assertion condition. */
913    
914        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
915        {        {
916        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        if (pcre_callout != NULL)
917        condition = md->recursive != NULL &&          {
918          (offset == RREF_ANY || offset == md->recursive->group_num);          pcre_callout_block cb;
919        ecode += condition? 3 : GET(ecode, 1);          cb.version          = 1;   /* Version 1 of the callout block */
920            cb.callout_number   = ecode[LINK_SIZE+2];
921            cb.offset_vector    = md->offset_vector;
922            cb.subject          = (PCRE_SPTR)md->start_subject;
923            cb.subject_length   = (int)(md->end_subject - md->start_subject);
924            cb.start_match      = (int)(mstart - md->start_subject);
925            cb.current_position = (int)(eptr - md->start_subject);
926            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
927            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
928            cb.capture_top      = offset_top/2;
929            cb.capture_last     = md->capture_last;
930            cb.callout_data     = md->callout_data;
931            if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
932            if (rrc < 0) RRETURN(rrc);
933            }
934          ecode += _pcre_OP_lengths[OP_CALLOUT];
935          }
936    
937        condcode = ecode[LINK_SIZE+1];
938    
939        /* Now see what the actual condition is */
940    
941        if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
942          {
943          if (md->recursive == NULL)                /* Not recursing => FALSE */
944            {
945            condition = FALSE;
946            ecode += GET(ecode, 1);
947            }
948          else
949            {
950            int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
951            condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
952    
953            /* If the test is for recursion into a specific subpattern, and it is
954            false, but the test was set up by name, scan the table to see if the
955            name refers to any other numbers, and test them. The condition is true
956            if any one is set. */
957    
958            if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
959              {
960              uschar *slotA = md->name_table;
961              for (i = 0; i < md->name_count; i++)
962                {
963                if (GET2(slotA, 0) == recno) break;
964                slotA += md->name_entry_size;
965                }
966    
967              /* Found a name for the number - there can be only one; duplicate
968              names for different numbers are allowed, but not vice versa. First
969              scan down for duplicates. */
970    
971              if (i < md->name_count)
972                {
973                uschar *slotB = slotA;
974                while (slotB > md->name_table)
975                  {
976                  slotB -= md->name_entry_size;
977                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
978                    {
979                    condition = GET2(slotB, 0) == md->recursive->group_num;
980                    if (condition) break;
981                    }
982                  else break;
983                  }
984    
985                /* Scan up for duplicates */
986    
987                if (!condition)
988                  {
989                  slotB = slotA;
990                  for (i++; i < md->name_count; i++)
991                    {
992                    slotB += md->name_entry_size;
993                    if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
994                      {
995                      condition = GET2(slotB, 0) == md->recursive->group_num;
996                      if (condition) break;
997                      }
998                    else break;
999                    }
1000                  }
1001                }
1002              }
1003    
1004            /* Chose branch according to the condition */
1005    
1006            ecode += condition? 3 : GET(ecode, 1);
1007            }
1008        }        }
1009    
1010      else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */      else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
1011        {        {
1012        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
1013        condition = offset < offset_top && md->offset_vector[offset] >= 0;        condition = offset < offset_top && md->offset_vector[offset] >= 0;
1014    
1015          /* If the numbered capture is unset, but the reference was by name,
1016          scan the table to see if the name refers to any other numbers, and test
1017          them. The condition is true if any one is set. This is tediously similar
1018          to the code above, but not close enough to try to amalgamate. */
1019    
1020          if (!condition && condcode == OP_NCREF)
1021            {
1022            int refno = offset >> 1;
1023            uschar *slotA = md->name_table;
1024    
1025            for (i = 0; i < md->name_count; i++)
1026              {
1027              if (GET2(slotA, 0) == refno) break;
1028              slotA += md->name_entry_size;
1029              }
1030    
1031            /* Found a name for the number - there can be only one; duplicate names
1032            for different numbers are allowed, but not vice versa. First scan down
1033            for duplicates. */
1034    
1035            if (i < md->name_count)
1036              {
1037              uschar *slotB = slotA;
1038              while (slotB > md->name_table)
1039                {
1040                slotB -= md->name_entry_size;
1041                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1042                  {
1043                  offset = GET2(slotB, 0) << 1;
1044                  condition = offset < offset_top &&
1045                    md->offset_vector[offset] >= 0;
1046                  if (condition) break;
1047                  }
1048                else break;
1049                }
1050    
1051              /* Scan up for duplicates */
1052    
1053              if (!condition)
1054                {
1055                slotB = slotA;
1056                for (i++; i < md->name_count; i++)
1057                  {
1058                  slotB += md->name_entry_size;
1059                  if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
1060                    {
1061                    offset = GET2(slotB, 0) << 1;
1062                    condition = offset < offset_top &&
1063                      md->offset_vector[offset] >= 0;
1064                    if (condition) break;
1065                    }
1066                  else break;
1067                  }
1068                }
1069              }
1070            }
1071    
1072          /* Chose branch according to the condition */
1073    
1074        ecode += condition? 3 : GET(ecode, 1);        ecode += condition? 3 : GET(ecode, 1);
1075        }        }
1076    
1077      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */      else if (condcode == OP_DEF)     /* DEFINE - always false */
1078        {        {
1079        condition = FALSE;        condition = FALSE;
1080        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 712  for (;;) Line 1086  for (;;)
1086    
1087      else      else
1088        {        {
1089        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL,
1090            match_condassert);            match_condassert, RM3);
1091        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
1092          {          {
1093          condition = TRUE;          condition = TRUE;
1094          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
1095          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1096          }          }
1097        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH &&
1098                  (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1099          {          {
1100          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
1101          }          }
1102        else        else
1103          {          {
1104          condition = FALSE;          condition = FALSE;
1105          ecode += GET(ecode, 1);          ecode += codelink;
1106          }          }
1107        }        }
1108    
1109      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
1110      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
1111      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
1112        group. If the second alternative doesn't exist, we can just plough on. */
1113    
1114      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
1115        {        {
1116        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1117        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
1118        goto TAIL_RECURSE;          {
1119            RMATCH(eptr, ecode, offset_top, md, eptrb, match_cbegroup, RM49);
1120            RRETURN(rrc);
1121            }
1122          else                       /* Group must match something */
1123            {
1124            flags = 0;
1125            goto TAIL_RECURSE;
1126            }
1127        }        }
1128      else      else                         /* Condition false & no alternative */
1129        {        {
1130        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1131        }        }
1132      break;      break;
1133    
1134    
1135      /* End of the pattern. If we are in a top-level recursion, we should      /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1136      restore the offsets appropriately and continue from after the call. */      to close any currently open capturing brackets. */
1137    
1138        case OP_CLOSE:
1139        number = GET2(ecode, 1);
1140        offset = number << 1;
1141    
1142    #ifdef PCRE_DEBUG
1143          printf("end bracket %d at *ACCEPT", number);
1144          printf("\n");
1145    #endif
1146    
1147        md->capture_last = number;
1148        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1149          {
1150          md->offset_vector[offset] =
1151            md->offset_vector[md->offset_end - number];
1152          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1153          if (offset_top <= offset) offset_top = offset + 2;
1154          }
1155        ecode += 3;
1156        break;
1157    
1158    
1159        /* End of the pattern, either real or forced. If we are in a top-level
1160        recursion, we should restore the offsets appropriately and continue from
1161        after the call. */
1162    
1163        case OP_ACCEPT:
1164      case OP_END:      case OP_END:
1165      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
1166        {        {
# Line 759  for (;;) Line 1169  for (;;)
1169        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
1170        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
1171          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
1172        md->start_match = rec->save_start;        offset_top = rec->save_offset_top;
       ims = original_ims;  
1173        ecode = rec->after_call;        ecode = rec->after_call;
1174        break;        break;
1175        }        }
1176    
1177      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
1178      string - backtracking will then try other alternatives, if any. */      set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
1179        the subject. In both cases, backtracking will then try other alternatives,
1180        if any. */
1181    
1182        if (eptr == mstart &&
1183            (md->notempty ||
1184              (md->notempty_atstart &&
1185                mstart == md->start_subject + md->start_offset)))
1186          MRRETURN(MATCH_NOMATCH);
1187    
1188        /* Otherwise, we have a match. */
1189    
1190        md->end_match_ptr = eptr;           /* Record where we ended */
1191        md->end_offset_top = offset_top;    /* and how many extracts were taken */
1192        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
1193    
1194      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      /* For some reason, the macros don't work properly if an expression is
1195      md->end_match_ptr = eptr;          /* Record where we ended */      given as the argument to MRRETURN when the heap is in use. */
1196      md->end_offset_top = offset_top;   /* and how many extracts were taken */  
1197      RRETURN(MATCH_MATCH);      rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1198        MRRETURN(rrc);
     /* Change option settings */  
   
     case OP_OPT:  
     ims = ecode[1];  
     ecode += 2;  
     DPRINTF(("ims set to %02lx\n", ims));  
     break;  
1199    
1200      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
1201      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
# Line 791  for (;;) Line 1207  for (;;)
1207      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1208      do      do
1209        {        {
1210        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, 0,
1211        if (rrc == MATCH_MATCH) break;          RM4);
1212        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1213            {
1214            mstart = md->start_match_ptr;   /* In case \K reset it */
1215            break;
1216            }
1217          if (rrc != MATCH_NOMATCH &&
1218              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1219            RRETURN(rrc);
1220        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
1221        }        }
1222      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1223      if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KET) MRRETURN(MATCH_NOMATCH);
1224    
1225      /* If checking an assertion for a condition, return MATCH_MATCH. */      /* If checking an assertion for a condition, return MATCH_MATCH. */
1226    
# Line 811  for (;;) Line 1234  for (;;)
1234      offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1235      continue;      continue;
1236    
1237      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match. Encountering SKIP,
1238        PRUNE, or COMMIT means we must assume failure without checking subsequent
1239        branches. */
1240    
1241      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
1242      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1243      do      do
1244        {        {
1245        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, 0,
1246        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);          RM5);
1247        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) MRRETURN(MATCH_NOMATCH);
1248          if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
1249            {
1250            do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1251            break;
1252            }
1253          if (rrc != MATCH_NOMATCH &&
1254              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1255            RRETURN(rrc);
1256        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1257        }        }
1258      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 842  for (;;) Line 1275  for (;;)
1275        while (i-- > 0)        while (i-- > 0)
1276          {          {
1277          eptr--;          eptr--;
1278          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1279          BACKCHAR(eptr)          BACKCHAR(eptr);
1280          }          }
1281        }        }
1282      else      else
# Line 853  for (;;) Line 1286  for (;;)
1286    
1287        {        {
1288        eptr -= GET(ecode, 1);        eptr -= GET(ecode, 1);
1289        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) MRRETURN(MATCH_NOMATCH);
1290        }        }
1291    
1292      /* Skip to next op code */      /* Save the earliest consulted character, then skip to next op code */
1293    
1294        if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1295      ecode += 1 + LINK_SIZE;      ecode += 1 + LINK_SIZE;
1296      break;      break;
1297    
# Line 873  for (;;) Line 1307  for (;;)
1307        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
1308        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
1309        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
1310        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = (int)(md->end_subject - md->start_subject);
1311        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = (int)(mstart - md->start_subject);
1312        cb.current_position = eptr - md->start_subject;        cb.current_position = (int)(eptr - md->start_subject);
1313        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
1314        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1315        cb.capture_top      = offset_top/2;        cb.capture_top      = offset_top/2;
1316        cb.capture_last     = md->capture_last;        cb.capture_last     = md->capture_last;
1317        cb.callout_data     = md->callout_data;        cb.callout_data     = md->callout_data;
1318        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);        if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH);
1319        if (rrc < 0) RRETURN(rrc);        if (rrc < 0) RRETURN(rrc);
1320        }        }
1321      ecode += 2 + 2*LINK_SIZE;      ecode += 2 + 2*LINK_SIZE;
# Line 936  for (;;) Line 1370  for (;;)
1370    
1371        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1372              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1373        new_recursive.save_start = md->start_match;        new_recursive.save_offset_top = offset_top;
       md->start_match = eptr;  
1374    
1375        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1376        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 946  for (;;) Line 1379  for (;;)
1379        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1380        do        do
1381          {          {
1382          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1383            md, ims, eptrb, flags);            md, eptrb, flags, RM6);
1384          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
1385            {            {
1386            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
1387            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
1388            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
1389              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1390            RRETURN(MATCH_MATCH);            MRRETURN(MATCH_MATCH);
1391            }            }
1392          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH &&
1393                    (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1394            {            {
1395            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1396              if (new_recursive.offset_save != stacksave)
1397                (pcre_free)(new_recursive.offset_save);
1398            RRETURN(rrc);            RRETURN(rrc);
1399            }            }
1400    
# Line 973  for (;;) Line 1409  for (;;)
1409        md->recursive = new_recursive.prevrec;        md->recursive = new_recursive.prevrec;
1410        if (new_recursive.offset_save != stacksave)        if (new_recursive.offset_save != stacksave)
1411          (pcre_free)(new_recursive.offset_save);          (pcre_free)(new_recursive.offset_save);
1412        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1413        }        }
1414      /* Control never reaches here */      /* Control never reaches here */
1415    
# Line 982  for (;;) Line 1418  for (;;)
1418      a move back into the brackets. Friedl calls these "atomic" subpatterns.      a move back into the brackets. Friedl calls these "atomic" subpatterns.
1419      Check the alternative branches in turn - the matching won't pass the KET      Check the alternative branches in turn - the matching won't pass the KET
1420      for this kind of subpattern. If any one branch matches, we carry on as at      for this kind of subpattern. If any one branch matches, we carry on as at
1421      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer, but resetting
1422        the start-of-match value in case it was changed by \K. */
1423    
1424      case OP_ONCE:      case OP_ONCE:
1425      prev = ecode;      prev = ecode;
# Line 990  for (;;) Line 1427  for (;;)
1427    
1428      do      do
1429        {        {
1430        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, 0, RM7);
1431          eptrb, 0);        if (rrc == MATCH_MATCH)  /* Note: _not_ MATCH_ACCEPT */
1432        if (rrc == MATCH_MATCH) break;          {
1433        if (rrc != MATCH_NOMATCH) RRETURN(rrc);          mstart = md->start_match_ptr;
1434            break;
1435            }
1436          if (rrc != MATCH_NOMATCH &&
1437              (rrc != MATCH_THEN || md->start_match_ptr != ecode))
1438            RRETURN(rrc);
1439        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1440        }        }
1441      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1024  for (;;) Line 1466  for (;;)
1466    
1467      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1468      preceding bracket, in the appropriate order. The second "call" of match()      preceding bracket, in the appropriate order. The second "call" of match()
1469      uses tail recursion, to avoid using another stack frame. We need to reset      uses tail recursion, to avoid using another stack frame. */
     any options that changed within the bracket before re-running it, so  
     check the next opcode. */  
   
     if (ecode[1+LINK_SIZE] == OP_OPT)  
       {  
       ims = (ims & ~PCRE_IMS) | ecode[4];  
       DPRINTF(("ims set to %02lx at group repeat\n", ims));  
       }  
1470    
1471      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1472        {        {
1473        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, 0, RM8);
1474        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1475        ecode = prev;        ecode = prev;
1476        flags = match_tail_recursed;        flags = 0;
1477        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1478        }        }
1479      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1480        {        {
1481        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);        RMATCH(eptr, prev, offset_top, md, eptrb, match_cbegroup, RM9);
1482        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1483        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1484        flags = match_tail_recursed;        flags = 0;
1485        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1486        }        }
1487      /* Control never gets here */      /* Control never gets here */
# Line 1059  for (;;) Line 1493  for (;;)
1493      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1494      break;      break;
1495    
1496      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1497      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1498      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1499      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1500      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1501    
1502      case OP_BRAZERO:      case OP_BRAZERO:
1503        {        {
1504        next = ecode+1;        next = ecode+1;
1505        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next, offset_top, md, eptrb, 0, RM10);
1506        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1507        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1508        ecode = next + 1 + LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
# Line 1079  for (;;) Line 1513  for (;;)
1513        {        {
1514        next = ecode+1;        next = ecode+1;
1515        do next += GET(next, 1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1516        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, 0, RM11);
1517        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1518        ecode++;        ecode++;
1519        }        }
1520      break;      break;
1521    
1522        case OP_SKIPZERO:
1523          {
1524          next = ecode+1;
1525          do next += GET(next,1); while (*next == OP_ALT);
1526          ecode = next + 1 + LINK_SIZE;
1527          }
1528        break;
1529    
1530      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1531    
1532      case OP_KET:      case OP_KET:
# Line 1103  for (;;) Line 1545  for (;;)
1545        }        }
1546      else saved_eptr = NULL;      else saved_eptr = NULL;
1547    
1548      /* If we are at the end of an assertion group, stop matching and return      /* If we are at the end of an assertion group or an atomic group, stop
1549      MATCH_MATCH, but record the current high water mark for use by positive      matching and return MATCH_MATCH, but record the current high water mark for
1550      assertions. Do this also for the "once" (atomic) groups. */      use by positive assertions. We also need to record the match start in case
1551        it was changed by \K. */
1552    
1553      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1554          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
# Line 1113  for (;;) Line 1556  for (;;)
1556        {        {
1557        md->end_match_ptr = eptr;      /* For ONCE */        md->end_match_ptr = eptr;      /* For ONCE */
1558        md->end_offset_top = offset_top;        md->end_offset_top = offset_top;
1559        RRETURN(MATCH_MATCH);        md->start_match_ptr = mstart;
1560          MRRETURN(MATCH_MATCH);
1561        }        }
1562    
1563      /* For capturing groups we have to check the group number back at the start      /* For capturing groups we have to check the group number back at the start
# Line 1127  for (;;) Line 1571  for (;;)
1571        number = GET2(prev, 1+LINK_SIZE);        number = GET2(prev, 1+LINK_SIZE);
1572        offset = number << 1;        offset = number << 1;
1573    
1574  #ifdef DEBUG  #ifdef PCRE_DEBUG
1575        printf("end bracket %d", number);        printf("end bracket %d", number);
1576        printf("\n");        printf("\n");
1577  #endif  #endif
# Line 1137  for (;;) Line 1581  for (;;)
1581          {          {
1582          md->offset_vector[offset] =          md->offset_vector[offset] =
1583            md->offset_vector[md->offset_end - number];            md->offset_vector[md->offset_end - number];
1584          md->offset_vector[offset+1] = eptr - md->start_subject;          md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1585          if (offset_top <= offset) offset_top = offset + 2;          if (offset_top <= offset) offset_top = offset + 2;
1586          }          }
1587    
# Line 1149  for (;;) Line 1593  for (;;)
1593          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1594          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1595          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
         md->start_match = rec->save_start;  
1596          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1597            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1598            offset_top = rec->save_offset_top;
1599          ecode = rec->after_call;          ecode = rec->after_call;
         ims = original_ims;  
1600          break;          break;
1601          }          }
1602        }        }
1603    
     /* For both capturing and non-capturing groups, reset the value of the ims  
     flags, in case they got changed during the group. */  
   
     ims = original_ims;  
     DPRINTF(("ims reset to %02lx\n", ims));  
   
1604      /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1605      happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1606      This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
# Line 1178  for (;;) Line 1615  for (;;)
1615    
1616      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1617      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1618      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1619        unlimited repeat of a group that can match an empty string. */
1620    
1621      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1622    
1623      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1624        {        {
1625        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, 0, RM12);
1626        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1627          if (flags != 0)    /* Could match an empty string */
1628            {
1629            RMATCH(eptr, prev, offset_top, md, eptrb, flags, RM50);
1630            RRETURN(rrc);
1631            }
1632        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1633        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1634        }        }
1635      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1636        {        {
1637        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);        RMATCH(eptr, prev, offset_top, md, eptrb, flags, RM13);
1638        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1639        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1640        flags = match_tail_recursed;        flags = 0;
1641        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1642        }        }
1643      /* Control never gets here */      /* Control never gets here */
1644    
1645      /* Start of subject unless notbol, or after internal newline if multiline */      /* Not multiline mode: start of subject assertion, unless notbol. */
1646    
1647      case OP_CIRC:      case OP_CIRC:
1648      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1649      if ((ims & PCRE_MULTILINE) != 0)  
       {  
       if (eptr != md->start_subject &&  
           (eptr == md->end_subject || !WAS_NEWLINE(eptr)))  
         RRETURN(MATCH_NOMATCH);  
       ecode++;  
       break;  
       }  
     /* ... else fall through */  
   
1650      /* Start of subject assertion */      /* Start of subject assertion */
1651    
1652      case OP_SOD:      case OP_SOD:
1653      if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject) MRRETURN(MATCH_NOMATCH);
1654        ecode++;
1655        break;
1656    
1657        /* Multiline mode: start of subject unless notbol, or after any newline. */
1658    
1659        case OP_CIRCM:
1660        if (md->notbol && eptr == md->start_subject) MRRETURN(MATCH_NOMATCH);
1661        if (eptr != md->start_subject &&
1662            (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1663          MRRETURN(MATCH_NOMATCH);
1664      ecode++;      ecode++;
1665      break;      break;
1666    
1667      /* Start of match assertion */      /* Start of match assertion */
1668    
1669      case OP_SOM:      case OP_SOM:
1670      if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);      if (eptr != md->start_subject + md->start_offset) MRRETURN(MATCH_NOMATCH);
1671      ecode++;      ecode++;
1672      break;      break;
1673    
1674      /* Assert before internal newline if multiline, or before a terminating      /* Reset the start of match point */
     newline unless endonly is set, else end of subject unless noteol is set. */  
1675    
1676      case OP_DOLL:      case OP_SET_SOM:
1677      if ((ims & PCRE_MULTILINE) != 0)      mstart = eptr;
1678        {      ecode++;
1679        if (eptr < md->end_subject)      break;
1680          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }  
1681        else      /* Multiline mode: assert before any newline, or before end of subject
1682          { if (md->noteol) RRETURN(MATCH_NOMATCH); }      unless noteol is set. */
1683        ecode++;  
1684        break;      case OP_DOLLM:
1685        }      if (eptr < md->end_subject)
1686          { if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); }
1687      else      else
1688        {        {
1689        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1690        if (!md->endonly)        SCHECK_PARTIAL();
         {  
         if (eptr != md->end_subject &&  
             (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))  
           RRETURN(MATCH_NOMATCH);  
         ecode++;  
         break;  
         }  
1691        }        }
1692        ecode++;
1693        break;
1694    
1695        /* Not multiline mode: assert before a terminating newline or before end of
1696        subject unless noteol is set. */
1697    
1698        case OP_DOLL:
1699        if (md->noteol) MRRETURN(MATCH_NOMATCH);
1700        if (!md->endonly) goto ASSERT_NL_OR_EOS;
1701    
1702      /* ... else fall through for endonly */      /* ... else fall through for endonly */
1703    
1704      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1705    
1706      case OP_EOD:      case OP_EOD:
1707      if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH);
1708        SCHECK_PARTIAL();
1709      ecode++;      ecode++;
1710      break;      break;
1711    
1712      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1713    
1714      case OP_EODN:      case OP_EODN:
1715      if (eptr != md->end_subject &&      ASSERT_NL_OR_EOS:
1716        if (eptr < md->end_subject &&
1717          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1718        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1719    
1720        /* Either at end of string or \n before end. */
1721    
1722        SCHECK_PARTIAL();
1723      ecode++;      ecode++;
1724      break;      break;
1725    
# Line 1279  for (;;) Line 1731  for (;;)
1731    
1732        /* Find out if the previous and current characters are "word" characters.        /* Find out if the previous and current characters are "word" characters.
1733        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to        It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
1734        be "non-word" characters. */        be "non-word" characters. Remember the earliest consulted character for
1735          partial matching. */
1736    
1737  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1738        if (utf8)        if (utf8)
1739          {          {
1740            /* Get status of previous character */
1741    
1742          if (eptr == md->start_subject) prev_is_word = FALSE; else          if (eptr == md->start_subject) prev_is_word = FALSE; else
1743            {            {
1744            const uschar *lastptr = eptr - 1;            USPTR lastptr = eptr - 1;
1745            while((*lastptr & 0xc0) == 0x80) lastptr--;            while((*lastptr & 0xc0) == 0x80) lastptr--;
1746              if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
1747            GETCHAR(c, lastptr);            GETCHAR(c, lastptr);
1748    #ifdef SUPPORT_UCP
1749              if (md->use_ucp)
1750                {
1751                if (c == '_') prev_is_word = TRUE; else
1752                  {
1753                  int cat = UCD_CATEGORY(c);
1754                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1755                  }
1756                }
1757              else
1758    #endif
1759            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1760            }            }
1761          if (eptr >= md->end_subject) cur_is_word = FALSE; else  
1762            /* Get status of next character */
1763    
1764            if (eptr >= md->end_subject)
1765              {
1766              SCHECK_PARTIAL();
1767              cur_is_word = FALSE;
1768              }
1769            else
1770            {            {
1771            GETCHAR(c, eptr);            GETCHAR(c, eptr);
1772    #ifdef SUPPORT_UCP
1773              if (md->use_ucp)
1774                {
1775                if (c == '_') cur_is_word = TRUE; else
1776                  {
1777                  int cat = UCD_CATEGORY(c);
1778                  cur_is_word = (cat == ucp_L || cat == ucp_N);
1779                  }
1780                }
1781              else
1782    #endif
1783            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;            cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
1784            }            }
1785          }          }
1786        else        else
1787  #endif  #endif
1788    
1789        /* More streamlined when not in UTF-8 mode */        /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
1790          consistency with the behaviour of \w we do use it in this case. */
1791    
1792          {          {
1793          prev_is_word = (eptr != md->start_subject) &&          /* Get status of previous character */
           ((md->ctypes[eptr[-1]] & ctype_word) != 0);  
         cur_is_word = (eptr < md->end_subject) &&  
           ((md->ctypes[*eptr] & ctype_word) != 0);  
         }  
   
       /* Now see if the situation is what we want */  
1794    
1795        if ((*ecode++ == OP_WORD_BOUNDARY)?          if (eptr == md->start_subject) prev_is_word = FALSE; else
1796             cur_is_word == prev_is_word : cur_is_word != prev_is_word)            {
1797          RRETURN(MATCH_NOMATCH);            if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
1798        }  #ifdef SUPPORT_UCP
1799      break;            if (md->use_ucp)
1800                {
1801                c = eptr[-1];
1802                if (c == '_') prev_is_word = TRUE; else
1803                  {
1804                  int cat = UCD_CATEGORY(c);
1805                  prev_is_word = (cat == ucp_L || cat == ucp_N);
1806                  }
1807                }
1808              else
1809    #endif
1810              prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
1811              }
1812    
1813      /* Match a single character type; inline for speed */          /* Get status of next character */
1814    
1815      case OP_ANY:          if (eptr >= md->end_subject)
1816      if ((ims & PCRE_DOTALL) == 0)            {
1817        {            SCHECK_PARTIAL();
1818        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);            cur_is_word = FALSE;
1819        }            }
1820      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);          else
1821      if (utf8)  #ifdef SUPPORT_UCP
1822        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;          if (md->use_ucp)
1823              {
1824              c = *eptr;
1825              if (c == '_') cur_is_word = TRUE; else
1826                {
1827                int cat = UCD_CATEGORY(c);
1828                cur_is_word = (cat == ucp_L || cat == ucp_N);
1829                }
1830              }
1831            else
1832    #endif
1833            cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
1834            }
1835    
1836          /* Now see if the situation is what we want */
1837    
1838          if ((*ecode++ == OP_WORD_BOUNDARY)?
1839               cur_is_word == prev_is_word : cur_is_word != prev_is_word)
1840            MRRETURN(MATCH_NOMATCH);
1841          }
1842        break;
1843    
1844        /* Match a single character type; inline for speed */
1845    
1846        case OP_ANY:
1847        if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
1848        /* Fall through */
1849    
1850        case OP_ALLANY:
1851        if (eptr++ >= md->end_subject)
1852          {
1853          SCHECK_PARTIAL();
1854          MRRETURN(MATCH_NOMATCH);
1855          }
1856        if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
1857      ecode++;      ecode++;
1858      break;      break;
1859    
# Line 1334  for (;;) Line 1861  for (;;)
1861      any byte, even newline, independent of the setting of PCRE_DOTALL. */      any byte, even newline, independent of the setting of PCRE_DOTALL. */
1862    
1863      case OP_ANYBYTE:      case OP_ANYBYTE:
1864      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject)
1865          {
1866          SCHECK_PARTIAL();
1867          MRRETURN(MATCH_NOMATCH);
1868          }
1869      ecode++;      ecode++;
1870      break;      break;
1871    
1872      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
1873      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1874          {
1875          SCHECK_PARTIAL();
1876          MRRETURN(MATCH_NOMATCH);
1877          }
1878      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1879      if (      if (
1880  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1347  for (;;) Line 1882  for (;;)
1882  #endif  #endif
1883         (md->ctypes[c] & ctype_digit) != 0         (md->ctypes[c] & ctype_digit) != 0
1884         )         )
1885        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1886      ecode++;      ecode++;
1887      break;      break;
1888    
1889      case OP_DIGIT:      case OP_DIGIT:
1890      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1891          {
1892          SCHECK_PARTIAL();
1893          MRRETURN(MATCH_NOMATCH);
1894          }
1895      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1896      if (      if (
1897  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1360  for (;;) Line 1899  for (;;)
1899  #endif  #endif
1900         (md->ctypes[c] & ctype_digit) == 0         (md->ctypes[c] & ctype_digit) == 0
1901         )         )
1902        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1903      ecode++;      ecode++;
1904      break;      break;
1905    
1906      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
1907      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1908          {
1909          SCHECK_PARTIAL();
1910          MRRETURN(MATCH_NOMATCH);
1911          }
1912      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1913      if (      if (
1914  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1373  for (;;) Line 1916  for (;;)
1916  #endif  #endif
1917         (md->ctypes[c] & ctype_space) != 0         (md->ctypes[c] & ctype_space) != 0
1918         )         )
1919        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1920      ecode++;      ecode++;
1921      break;      break;
1922    
1923      case OP_WHITESPACE:      case OP_WHITESPACE:
1924      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1925          {
1926          SCHECK_PARTIAL();
1927          MRRETURN(MATCH_NOMATCH);
1928          }
1929      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1930      if (      if (
1931  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1386  for (;;) Line 1933  for (;;)
1933  #endif  #endif
1934         (md->ctypes[c] & ctype_space) == 0         (md->ctypes[c] & ctype_space) == 0
1935         )         )
1936        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1937      ecode++;      ecode++;
1938      break;      break;
1939    
1940      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
1941      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1942          {
1943          SCHECK_PARTIAL();
1944          MRRETURN(MATCH_NOMATCH);
1945          }
1946      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1947      if (      if (
1948  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1399  for (;;) Line 1950  for (;;)
1950  #endif  #endif
1951         (md->ctypes[c] & ctype_word) != 0         (md->ctypes[c] & ctype_word) != 0
1952         )         )
1953        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1954      ecode++;      ecode++;
1955      break;      break;
1956    
1957      case OP_WORDCHAR:      case OP_WORDCHAR:
1958      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1959          {
1960          SCHECK_PARTIAL();
1961          MRRETURN(MATCH_NOMATCH);
1962          }
1963      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1964      if (      if (
1965  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1412  for (;;) Line 1967  for (;;)
1967  #endif  #endif
1968         (md->ctypes[c] & ctype_word) == 0         (md->ctypes[c] & ctype_word) == 0
1969         )         )
1970        RRETURN(MATCH_NOMATCH);        MRRETURN(MATCH_NOMATCH);
1971      ecode++;      ecode++;
1972      break;      break;
1973    
1974      case OP_ANYNL:      case OP_ANYNL:
1975      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
1976          {
1977          SCHECK_PARTIAL();
1978          MRRETURN(MATCH_NOMATCH);
1979          }
1980      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1981      switch(c)      switch(c)
1982        {        {
1983        default: RRETURN(MATCH_NOMATCH);        default: MRRETURN(MATCH_NOMATCH);
1984    
1985        case 0x000d:        case 0x000d:
1986        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1987        break;        break;
1988    
1989        case 0x000a:        case 0x000a:
1990          break;
1991    
1992        case 0x000b:        case 0x000b:
1993        case 0x000c:        case 0x000c:
1994        case 0x0085:        case 0x0085:
1995        case 0x2028:        case 0x2028:
1996        case 0x2029:        case 0x2029:
1997          if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
1998          break;
1999          }
2000        ecode++;
2001        break;
2002    
2003        case OP_NOT_HSPACE:
2004        if (eptr >= md->end_subject)
2005          {
2006          SCHECK_PARTIAL();
2007          MRRETURN(MATCH_NOMATCH);
2008          }
2009        GETCHARINCTEST(c, eptr);
2010        switch(c)
2011          {
2012          default: break;
2013          case 0x09:      /* HT */
2014          case 0x20:      /* SPACE */
2015          case 0xa0:      /* NBSP */
2016          case 0x1680:    /* OGHAM SPACE MARK */
2017          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2018          case 0x2000:    /* EN QUAD */
2019          case 0x2001:    /* EM QUAD */
2020          case 0x2002:    /* EN SPACE */
2021          case 0x2003:    /* EM SPACE */
2022          case 0x2004:    /* THREE-PER-EM SPACE */
2023          case 0x2005:    /* FOUR-PER-EM SPACE */
2024          case 0x2006:    /* SIX-PER-EM SPACE */
2025          case 0x2007:    /* FIGURE SPACE */
2026          case 0x2008:    /* PUNCTUATION SPACE */
2027          case 0x2009:    /* THIN SPACE */
2028          case 0x200A:    /* HAIR SPACE */
2029          case 0x202f:    /* NARROW NO-BREAK SPACE */
2030          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2031          case 0x3000:    /* IDEOGRAPHIC SPACE */
2032          MRRETURN(MATCH_NOMATCH);
2033          }
2034        ecode++;
2035        break;
2036    
2037        case OP_HSPACE:
2038        if (eptr >= md->end_subject)
2039          {
2040          SCHECK_PARTIAL();
2041          MRRETURN(MATCH_NOMATCH);
2042          }
2043        GETCHARINCTEST(c, eptr);
2044        switch(c)
2045          {
2046          default: MRRETURN(MATCH_NOMATCH);
2047          case 0x09:      /* HT */
2048          case 0x20:      /* SPACE */
2049          case 0xa0:      /* NBSP */
2050          case 0x1680:    /* OGHAM SPACE MARK */
2051          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2052          case 0x2000:    /* EN QUAD */
2053          case 0x2001:    /* EM QUAD */
2054          case 0x2002:    /* EN SPACE */
2055          case 0x2003:    /* EM SPACE */
2056          case 0x2004:    /* THREE-PER-EM SPACE */
2057          case 0x2005:    /* FOUR-PER-EM SPACE */
2058          case 0x2006:    /* SIX-PER-EM SPACE */
2059          case 0x2007:    /* FIGURE SPACE */
2060          case 0x2008:    /* PUNCTUATION SPACE */
2061          case 0x2009:    /* THIN SPACE */
2062          case 0x200A:    /* HAIR SPACE */
2063          case 0x202f:    /* NARROW NO-BREAK SPACE */
2064          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2065          case 0x3000:    /* IDEOGRAPHIC SPACE */
2066          break;
2067          }
2068        ecode++;
2069        break;
2070    
2071        case OP_NOT_VSPACE:
2072        if (eptr >= md->end_subject)
2073          {
2074          SCHECK_PARTIAL();
2075          MRRETURN(MATCH_NOMATCH);
2076          }
2077        GETCHARINCTEST(c, eptr);
2078        switch(c)
2079          {
2080          default: break;
2081          case 0x0a:      /* LF */
2082          case 0x0b:      /* VT */
2083          case 0x0c:      /* FF */
2084          case 0x0d:      /* CR */
2085          case 0x85:      /* NEL */
2086          case 0x2028:    /* LINE SEPARATOR */
2087          case 0x2029:    /* PARAGRAPH SEPARATOR */
2088          MRRETURN(MATCH_NOMATCH);
2089          }
2090        ecode++;
2091        break;
2092    
2093        case OP_VSPACE:
2094        if (eptr >= md->end_subject)
2095          {
2096          SCHECK_PARTIAL();
2097          MRRETURN(MATCH_NOMATCH);
2098          }
2099        GETCHARINCTEST(c, eptr);
2100        switch(c)
2101          {
2102          default: MRRETURN(MATCH_NOMATCH);
2103          case 0x0a:      /* LF */
2104          case 0x0b:      /* VT */
2105          case 0x0c:      /* FF */
2106          case 0x0d:      /* CR */
2107          case 0x85:      /* NEL */
2108          case 0x2028:    /* LINE SEPARATOR */
2109          case 0x2029:    /* PARAGRAPH SEPARATOR */
2110        break;        break;
2111        }        }
2112      ecode++;      ecode++;
# Line 1442  for (;;) Line 2118  for (;;)
2118    
2119      case OP_PROP:      case OP_PROP:
2120      case OP_NOTPROP:      case OP_NOTPROP:
2121      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2122          {
2123          SCHECK_PARTIAL();
2124          MRRETURN(MATCH_NOMATCH);
2125          }
2126      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2127        {        {
2128        int chartype, script;        const ucd_record *prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
2129    
2130        switch(ecode[1])        switch(ecode[1])
2131          {          {
2132          case PT_ANY:          case PT_ANY:
2133          if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);          if (op == OP_NOTPROP) MRRETURN(MATCH_NOMATCH);
2134          break;          break;
2135    
2136          case PT_LAMP:          case PT_LAMP:
2137          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
2138               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
2139               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2140            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2141           break;          break;
2142    
2143          case PT_GC:          case PT_GC:
2144          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
2145            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2146          break;          break;
2147    
2148          case PT_PC:          case PT_PC:
2149          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2150            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2151          break;          break;
2152    
2153          case PT_SC:          case PT_SC:
2154          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
2155            RRETURN(MATCH_NOMATCH);            MRRETURN(MATCH_NOMATCH);
2156            break;
2157    
2158            /* These are specials */
2159    
2160            case PT_ALNUM:
2161            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2162                 _pcre_ucp_gentype[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2163              MRRETURN(MATCH_NOMATCH);
2164            break;
2165    
2166            case PT_SPACE:    /* Perl space */
2167            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2168                 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2169                   == (op == OP_NOTPROP))
2170              MRRETURN(MATCH_NOMATCH);
2171            break;
2172    
2173            case PT_PXSPACE:  /* POSIX space */
2174            if ((_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2175                 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2176                 c == CHAR_FF || c == CHAR_CR)
2177                   == (op == OP_NOTPROP))
2178              MRRETURN(MATCH_NOMATCH);
2179            break;
2180    
2181            case PT_WORD:
2182            if ((_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2183                 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2184                 c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2185              MRRETURN(MATCH_NOMATCH);
2186          break;          break;
2187    
2188            /* This should never occur */
2189    
2190          default:          default:
2191          RRETURN(PCRE_ERROR_INTERNAL);          RRETURN(PCRE_ERROR_INTERNAL);
2192          }          }
# Line 1488  for (;;) Line 2199  for (;;)
2199      is in the binary; otherwise a compile-time error occurs. */      is in the binary; otherwise a compile-time error occurs. */
2200    
2201      case OP_EXTUNI:      case OP_EXTUNI:
2202      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject)
2203          {
2204          SCHECK_PARTIAL();
2205          MRRETURN(MATCH_NOMATCH);
2206          }
2207      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
2208        {        {
2209        int chartype, script;        int category = UCD_CATEGORY(c);
2210        int category = _pcre_ucp_findprop(c, &chartype, &script);        if (category == ucp_M) MRRETURN(MATCH_NOMATCH);
       if (category == ucp_M) RRETURN(MATCH_NOMATCH);  
2211        while (eptr < md->end_subject)        while (eptr < md->end_subject)
2212          {          {
2213          int len = 1;          int len = 1;
# Line 1501  for (;;) Line 2215  for (;;)
2215            {            {
2216            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
2217            }            }
2218          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
2219          if (category != ucp_M) break;          if (category != ucp_M) break;
2220          eptr += len;          eptr += len;
2221          }          }
# Line 1520  for (;;) Line 2234  for (;;)
2234      loops). */      loops). */
2235    
2236      case OP_REF:      case OP_REF:
2237        {      case OP_REFI:
2238        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */      caseless = op == OP_REFI;
2239        ecode += 3;                                 /* Advance past item */      offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
2240        ecode += 3;
2241    
2242        /* If the reference is unset, set the length to be longer than the amount      /* If the reference is unset, there are two possibilities:
       of subject left; this ensures that every attempt at a match fails. We  
       can't just fail here, because of the possibility of quantifiers with zero  
       minima. */  
   
       length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
         md->end_subject - eptr + 1 :  
         md->offset_vector[offset+1] - md->offset_vector[offset];  
2243    
2244        /* Set up for repetition, or handle the non-repeated case */      (a) In the default, Perl-compatible state, set the length negative;
2245        this ensures that every attempt at a match fails. We can't just fail
2246        here, because of the possibility of quantifiers with zero minima.
2247    
2248        switch (*ecode)      (b) If the JavaScript compatibility flag is set, set the length to zero
2249          {      so that the back reference matches an empty string.
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         c = *ecode++ - OP_CRSTAR;  
         minimize = (c & 1) != 0;  
         min = rep_min[c];                 /* Pick up values from tables; */  
         max = rep_max[c];                 /* zero for max => infinity */  
         if (max == 0) max = INT_MAX;  
         break;  
2250    
2251          case OP_CRRANGE:      Otherwise, set the length to the length of what was matched by the
2252          case OP_CRMINRANGE:      referenced subpattern. */
         minimize = (*ecode == OP_CRMINRANGE);  
         min = GET2(ecode, 1);  
         max = GET2(ecode, 3);  
         if (max == 0) max = INT_MAX;  
         ecode += 5;  
         break;  
2253    
2254          default:               /* No repeat follows */      if (offset >= offset_top || md->offset_vector[offset] < 0)
2255          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);        length = (md->jscript_compat)? 0 : -1;
2256          eptr += length;      else
2257          continue;              /* With the main loop */        length = md->offset_vector[offset+1] - md->offset_vector[offset];
2258    
2259        /* Set up for repetition, or handle the non-repeated case */
2260    
2261        switch (*ecode)
2262          {
2263          case OP_CRSTAR:
2264          case OP_CRMINSTAR:
2265          case OP_CRPLUS:
2266          case OP_CRMINPLUS:
2267          case OP_CRQUERY:
2268          case OP_CRMINQUERY:
2269          c = *ecode++ - OP_CRSTAR;
2270          minimize = (c & 1) != 0;
2271          min = rep_min[c];                 /* Pick up values from tables; */
2272          max = rep_max[c];                 /* zero for max => infinity */
2273          if (max == 0) max = INT_MAX;
2274          break;
2275    
2276          case OP_CRRANGE:
2277          case OP_CRMINRANGE:
2278          minimize = (*ecode == OP_CRMINRANGE);
2279          min = GET2(ecode, 1);
2280          max = GET2(ecode, 3);
2281          if (max == 0) max = INT_MAX;
2282          ecode += 5;
2283          break;
2284    
2285          default:               /* No repeat follows */
2286          if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2287            {
2288            CHECK_PARTIAL();
2289            MRRETURN(MATCH_NOMATCH);
2290          }          }
2291          eptr += length;
2292          continue;              /* With the main loop */
2293          }
2294    
2295        /* If the length of the reference is zero, just continue with the      /* Handle repeated back references. If the length of the reference is
2296        main loop. */      zero, just continue with the main loop. */
2297    
2298        if (length == 0) continue;      if (length == 0) continue;
2299    
2300        /* First, ensure the minimum number of matches are present. We get back      /* First, ensure the minimum number of matches are present. We get back
2301        the length of the reference string explicitly rather than passing the      the length of the reference string explicitly rather than passing the
2302        address of eptr, so that eptr can be a register variable. */      address of eptr, so that eptr can be a register variable. */
2303    
2304        for (i = 1; i <= min; i++)      for (i = 1; i <= min; i++)
2305          {
2306          int slength;
2307          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2308          {          {
2309          if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);          CHECK_PARTIAL();
2310          eptr += length;          MRRETURN(MATCH_NOMATCH);
2311          }          }
2312          eptr += slength;
2313          }
2314    
2315        /* If min = max, continue at the same level without recursion.      /* If min = max, continue at the same level without recursion.
2316        They are not both allowed to be zero. */      They are not both allowed to be zero. */
2317    
2318        if (min == max) continue;      if (min == max) continue;
2319    
2320        /* If minimizing, keep trying and advancing the pointer */      /* If minimizing, keep trying and advancing the pointer */
2321    
2322        if (minimize)      if (minimize)
2323          {
2324          for (fi = min;; fi++)
2325          {          {
2326          for (fi = min;; fi++)          int slength;
2327            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM14);
2328            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2329            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2330            if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2331            {            {
2332            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            CHECK_PARTIAL();
2333            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            MRRETURN(MATCH_NOMATCH);
           if (fi >= max || !match_ref(offset, eptr, length, md, ims))  
             RRETURN(MATCH_NOMATCH);  
           eptr += length;  
2334            }            }
2335          /* Control never gets here */          eptr += slength;
2336          }          }
2337          /* Control never gets here */
2338          }
2339    
2340        /* If maximizing, find the longest string and work backwards */      /* If maximizing, find the longest string and work backwards */
2341    
2342        else      else
2343          {
2344          pp = eptr;
2345          for (i = min; i < max; i++)
2346          {          {
2347          pp = eptr;          int slength;
2348          for (i = min; i < max; i++)          if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
           {  
           if (!match_ref(offset, eptr, length, md, ims)) break;  
           eptr += length;  
           }  
         while (eptr >= pp)  
2349            {            {
2350            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            CHECK_PARTIAL();
2351            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            break;
           eptr -= length;  
2352            }            }
2353          RRETURN(MATCH_NOMATCH);          eptr += slength;
2354            }
2355          while (eptr >= pp)
2356            {
2357            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM15);
2358            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2359            eptr -= length;
2360          }          }
2361          MRRETURN(MATCH_NOMATCH);
2362        }        }
2363      /* Control never gets here */      /* Control never gets here */
2364    
   
   
2365      /* Match a bit-mapped character class, possibly repeatedly. This op code is      /* Match a bit-mapped character class, possibly repeatedly. This op code is
2366      used when all the characters in the class have values in the range 0-255,      used when all the characters in the class have values in the range 0-255,
2367      and either the matching is caseful, or the characters are in the range      and either the matching is caseful, or the characters are in the range
# Line 1677  for (;;) Line 2416  for (;;)
2416          {          {
2417          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2418            {            {
2419            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2420                {
2421                SCHECK_PARTIAL();
2422                MRRETURN(MATCH_NOMATCH);
2423                }
2424            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
2425            if (c > 255)            if (c > 255)
2426              {              {
2427              if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);              if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2428              }              }
2429            else            else
2430              {              {
2431              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2432              }              }
2433            }            }
2434          }          }
# Line 1695  for (;;) Line 2438  for (;;)
2438          {          {
2439          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2440            {            {
2441            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
2442                {
2443                SCHECK_PARTIAL();
2444                MRRETURN(MATCH_NOMATCH);
2445                }
2446            c = *eptr++;            c = *eptr++;
2447            if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);            if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2448            }            }
2449          }          }
2450    
# Line 1717  for (;;) Line 2464  for (;;)
2464            {            {
2465            for (fi = min;; fi++)            for (fi = min;; fi++)
2466              {              {
2467              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM16);
2468              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2469              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2470                if (eptr >= md->end_subject)
2471                  {
2472                  SCHECK_PARTIAL();
2473                  MRRETURN(MATCH_NOMATCH);
2474                  }
2475              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
2476              if (c > 255)              if (c > 255)
2477                {                {
2478                if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);                if (op == OP_CLASS) MRRETURN(MATCH_NOMATCH);
2479                }                }
2480              else              else
2481                {                {
2482                if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);                if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2483                }                }
2484              }              }
2485            }            }
# Line 1737  for (;;) Line 2489  for (;;)
2489            {            {
2490            for (fi = min;; fi++)            for (fi = min;; fi++)
2491              {              {
2492              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM17);
2493              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2494              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2495                if (eptr >= md->end_subject)
2496                  {
2497                  SCHECK_PARTIAL();
2498                  MRRETURN(MATCH_NOMATCH);
2499                  }
2500              c = *eptr++;              c = *eptr++;
2501              if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);              if ((data[c/8] & (1 << (c&7))) == 0) MRRETURN(MATCH_NOMATCH);
2502              }              }
2503            }            }
2504          /* Control never gets here */          /* Control never gets here */
# Line 1760  for (;;) Line 2517  for (;;)
2517            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2518              {              {
2519              int len = 1;              int len = 1;
2520              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2521                  {
2522                  SCHECK_PARTIAL();
2523                  break;
2524                  }
2525              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
2526              if (c > 255)              if (c > 255)
2527                {                {
# Line 1774  for (;;) Line 2535  for (;;)
2535              }              }
2536            for (;;)            for (;;)
2537              {              {
2538              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM18);
2539              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2540              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2541              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1786  for (;;) Line 2547  for (;;)
2547            {            {
2548            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2549              {              {
2550              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
2551                  {
2552                  SCHECK_PARTIAL();
2553                  break;
2554                  }
2555              c = *eptr;              c = *eptr;
2556              if ((data[c/8] & (1 << (c&7))) == 0) break;              if ((data[c/8] & (1 << (c&7))) == 0) break;
2557              eptr++;              eptr++;
2558              }              }
2559            while (eptr >= pp)            while (eptr >= pp)
2560              {              {
2561              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM19);
2562              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2563              eptr--;              eptr--;
2564              }              }
2565            }            }
2566    
2567          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2568          }          }
2569        }        }
2570      /* Control never gets here */      /* Control never gets here */
2571    
2572    
2573      /* Match an extended character class. This opcode is encountered only      /* Match an extended character class. This opcode is encountered only
2574      in UTF-8 mode, because that's the only time it is compiled. */      when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
2575        mode, because Unicode properties are supported in non-UTF-8 mode. */
2576    
2577  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2578      case OP_XCLASS:      case OP_XCLASS:
# Line 1847  for (;;) Line 2613  for (;;)
2613    
2614        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2615          {          {
2616          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);          if (eptr >= md->end_subject)
2617          GETCHARINC(c, eptr);            {
2618          if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);            SCHECK_PARTIAL();
2619              MRRETURN(MATCH_NOMATCH);
2620              }
2621            GETCHARINCTEST(c, eptr);
2622            if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2623          }          }
2624    
2625        /* If max == min we can continue with the main loop without the        /* If max == min we can continue with the main loop without the
# Line 1864  for (;;) Line 2634  for (;;)
2634          {          {
2635          for (fi = min;; fi++)          for (fi = min;; fi++)
2636            {            {
2637            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM20);
2638            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2639            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2640            GETCHARINC(c, eptr);            if (eptr >= md->end_subject)
2641            if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);              {
2642                SCHECK_PARTIAL();
2643                MRRETURN(MATCH_NOMATCH);
2644                }
2645              GETCHARINCTEST(c, eptr);
2646              if (!_pcre_xclass(c, data)) MRRETURN(MATCH_NOMATCH);
2647            }            }
2648          /* Control never gets here */          /* Control never gets here */
2649          }          }
# Line 1881  for (;;) Line 2656  for (;;)
2656          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2657            {            {
2658            int len = 1;            int len = 1;
2659            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject)
2660            GETCHARLEN(c, eptr, len);              {
2661                SCHECK_PARTIAL();
2662                break;
2663                }
2664              GETCHARLENTEST(c, eptr, len);
2665            if (!_pcre_xclass(c, data)) break;            if (!_pcre_xclass(c, data)) break;
2666            eptr += len;            eptr += len;
2667            }            }
2668          for(;;)          for(;;)
2669            {            {
2670            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM21);
2671            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2672            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2673            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2674            }            }
2675          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
2676          }          }
2677    
2678        /* Control never gets here */        /* Control never gets here */
# Line 1909  for (;;) Line 2688  for (;;)
2688        length = 1;        length = 1;
2689        ecode++;        ecode++;
2690        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2691        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2692        while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);          {
2693            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2694            MRRETURN(MATCH_NOMATCH);
2695            }
2696          while (length-- > 0) if (*ecode++ != *eptr++) MRRETURN(MATCH_NOMATCH);
2697        }        }
2698      else      else
2699  #endif  #endif
2700    
2701      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2702        {        {
2703        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2704        if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);          {
2705            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2706            MRRETURN(MATCH_NOMATCH);
2707            }
2708          if (ecode[1] != *eptr++) MRRETURN(MATCH_NOMATCH);
2709        ecode += 2;        ecode += 2;
2710        }        }
2711      break;      break;
2712    
2713      /* Match a single character, caselessly */      /* Match a single character, caselessly */
2714    
2715      case OP_CHARNC:      case OP_CHARI:
2716  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2717      if (utf8)      if (utf8)
2718        {        {
# Line 1933  for (;;) Line 2720  for (;;)
2720        ecode++;        ecode++;
2721        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
2722    
2723        if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);        if (length > md->end_subject - eptr)
2724            {
2725            CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
2726            MRRETURN(MATCH_NOMATCH);
2727            }
2728    
2729        /* If the pattern character's value is < 128, we have only one byte, and        /* If the pattern character's value is < 128, we have only one byte, and
2730        can use the fast lookup table. */        can use the fast lookup table. */
2731    
2732        if (fc < 128)        if (fc < 128)
2733          {          {
2734          if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          if (md->lcc[*ecode++] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2735          }          }
2736    
2737        /* Otherwise we must pick up the subject character */        /* Otherwise we must pick up the subject character */
# Line 1957  for (;;) Line 2748  for (;;)
2748          if (fc != dc)          if (fc != dc)
2749            {            {
2750  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2751            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2752  #endif  #endif
2753              RRETURN(MATCH_NOMATCH);              MRRETURN(MATCH_NOMATCH);
2754            }            }
2755          }          }
2756        }        }
# Line 1968  for (;;) Line 2759  for (;;)
2759    
2760      /* Non-UTF-8 mode */      /* Non-UTF-8 mode */
2761        {        {
2762        if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);        if (md->end_subject - eptr < 1)
2763        if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2764            SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
2765            MRRETURN(MATCH_NOMATCH);
2766            }
2767          if (md->lcc[ecode[1]] != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2768        ecode += 2;        ecode += 2;
2769        }        }
2770      break;      break;
# Line 1977  for (;;) Line 2772  for (;;)
2772      /* Match a single character repeatedly. */      /* Match a single character repeatedly. */
2773    
2774      case OP_EXACT:      case OP_EXACT:
2775        case OP_EXACTI:
2776      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
2777      ecode += 3;      ecode += 3;
2778      goto REPEATCHAR;      goto REPEATCHAR;
2779    
2780      case OP_POSUPTO:      case OP_POSUPTO:
2781        case OP_POSUPTOI:
2782      possessive = TRUE;      possessive = TRUE;
2783      /* Fall through */      /* Fall through */
2784    
2785      case OP_UPTO:      case OP_UPTO:
2786        case OP_UPTOI:
2787      case OP_MINUPTO:      case OP_MINUPTO:
2788        case OP_MINUPTOI:
2789      min = 0;      min = 0;
2790      max = GET2(ecode, 1);      max = GET2(ecode, 1);
2791      minimize = *ecode == OP_MINUPTO;      minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
2792      ecode += 3;      ecode += 3;
2793      goto REPEATCHAR;      goto REPEATCHAR;
2794    
2795      case OP_POSSTAR:      case OP_POSSTAR:
2796        case OP_POSSTARI:
2797      possessive = TRUE;      possessive = TRUE;
2798      min = 0;      min = 0;
2799      max = INT_MAX;      max = INT_MAX;
# Line 2001  for (;;) Line 2801  for (;;)
2801      goto REPEATCHAR;      goto REPEATCHAR;
2802    
2803      case OP_POSPLUS:      case OP_POSPLUS:
2804        case OP_POSPLUSI:
2805      possessive = TRUE;      possessive = TRUE;
2806      min = 1;      min = 1;
2807      max = INT_MAX;      max = INT_MAX;
# Line 2008  for (;;) Line 2809  for (;;)
2809      goto REPEATCHAR;      goto REPEATCHAR;
2810    
2811      case OP_POSQUERY:      case OP_POSQUERY:
2812        case OP_POSQUERYI:
2813      possessive = TRUE;      possessive = TRUE;
2814      min = 0;      min = 0;
2815      max = 1;      max = 1;
# Line 2015  for (;;) Line 2817  for (;;)
2817      goto REPEATCHAR;      goto REPEATCHAR;
2818    
2819      case OP_STAR:      case OP_STAR:
2820        case OP_STARI:
2821      case OP_MINSTAR:      case OP_MINSTAR:
2822        case OP_MINSTARI:
2823      case OP_PLUS:      case OP_PLUS:
2824        case OP_PLUSI:
2825      case OP_MINPLUS:      case OP_MINPLUS:
2826        case OP_MINPLUSI:
2827      case OP_QUERY:      case OP_QUERY:
2828        case OP_QUERYI:
2829      case OP_MINQUERY:      case OP_MINQUERY:
2830      c = *ecode++ - OP_STAR;      case OP_MINQUERYI:
2831        c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
2832      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
2833      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
2834      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
2835      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
2836    
2837      /* Common code for all repeated single-character matches. We can give      /* Common code for all repeated single-character matches. */
     up quickly if there are fewer than the minimum number of characters left in  
     the subject. */  
2838    
2839      REPEATCHAR:      REPEATCHAR:
2840  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 2037  for (;;) Line 2843  for (;;)
2843        length = 1;        length = 1;
2844        charptr = ecode;        charptr = ecode;
2845        GETCHARLEN(fc, ecode, length);        GETCHARLEN(fc, ecode, length);
       if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
2846        ecode += length;        ecode += length;
2847    
2848        /* Handle multibyte character matching specially here. There is        /* Handle multibyte character matching specially here. There is
# Line 2045  for (;;) Line 2850  for (;;)
2850    
2851        if (length > 1)        if (length > 1)
2852          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2853  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2854          unsigned int othercase;          unsigned int othercase;
2855          if ((ims & PCRE_CASELESS) != 0 &&          if (op >= OP_STARI &&     /* Caseless */
2856              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2857            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2858            else oclength = 0;
2859  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2860    
2861          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2862            {            {
2863            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (eptr <= md->end_subject - length &&
2864            /* Need braces because of following else */              memcmp(eptr, charptr, length) == 0) eptr += length;
2865            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2866              else if (oclength > 0 &&
2867                       eptr <= md->end_subject - oclength &&
2868                       memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2869    #endif  /* SUPPORT_UCP */
2870            else            else
2871              {              {
2872              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              CHECK_PARTIAL();
2873              eptr += oclength;              MRRETURN(MATCH_NOMATCH);
2874              }              }
2875            }            }
2876    
# Line 2073  for (;;) Line 2880  for (;;)
2880            {            {
2881            for (fi = min;; fi++)            for (fi = min;; fi++)
2882              {              {
2883              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM22);
2884              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2885              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
2886              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (eptr <= md->end_subject - length &&
2887              /* Need braces because of following else */                memcmp(eptr, charptr, length) == 0) eptr += length;
2888              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }  #ifdef SUPPORT_UCP
2889                else if (oclength > 0 &&
2890                         eptr <= md->end_subject - oclength &&
2891                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2892    #endif  /* SUPPORT_UCP */
2893              else              else
2894                {                {
2895                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                CHECK_PARTIAL();
2896                eptr += oclength;                MRRETURN(MATCH_NOMATCH);
2897                }                }
2898              }              }
2899            /* Control never gets here */            /* Control never gets here */
# Line 2093  for (;;) Line 2904  for (;;)
2904            pp = eptr;            pp = eptr;
2905            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2906              {              {
2907              if (eptr > md->end_subject - length) break;              if (eptr <= md->end_subject - length &&
2908              if (memcmp(eptr, charptr, length) == 0) eptr += length;                  memcmp(eptr, charptr, length) == 0) eptr += length;
2909              else if (oclength == 0) break;  #ifdef SUPPORT_UCP
2910                else if (oclength > 0 &&
2911                         eptr <= md->end_subject - oclength &&
2912                         memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
2913    #endif  /* SUPPORT_UCP */
2914              else              else
2915                {                {
2916                if (memcmp(eptr, occhars, oclength) != 0) break;                CHECK_PARTIAL();
2917                eptr += oclength;                break;
2918                }                }
2919              }              }
2920    
2921            if (possessive) continue;            if (possessive) continue;
2922            while (eptr >= pp)  
2923             {            for(;;)
2924             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              {
2925             if (rrc != MATCH_NOMATCH) RRETURN(rrc);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM23);
2926             eptr -= length;              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2927             }              if (eptr == pp) { MRRETURN(MATCH_NOMATCH); }
2928            RRETURN(MATCH_NOMATCH);  #ifdef SUPPORT_UCP
2929                eptr--;
2930                BACKCHAR(eptr);
2931    #else   /* without SUPPORT_UCP */
2932                eptr -= length;
2933    #endif  /* SUPPORT_UCP */
2934                }
2935            }            }
2936          /* Control never gets here */          /* Control never gets here */
2937          }          }
# Line 2123  for (;;) Line 2944  for (;;)
2944  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2945    
2946      /* When not in UTF-8 mode, load a single-byte character. */      /* When not in UTF-8 mode, load a single-byte character. */
2947        {  
2948        if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);      fc = *ecode++;
       fc = *ecode++;  
       }  
2949    
2950      /* The value of fc at this point is always less than 256, though we may or      /* The value of fc at this point is always less than 256, though we may or
2951      may not be in UTF-8 mode. The code is duplicated for the caseless and      may not be in UTF-8 mode. The code is duplicated for the caseless and
# Line 2140  for (;;) Line 2959  for (;;)
2959      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
2960        max, eptr));        max, eptr));
2961    
2962      if ((ims & PCRE_CASELESS) != 0)      if (op >= OP_STARI)  /* Caseless */
2963        {        {
2964        fc = md->lcc[fc];        fc = md->lcc[fc];
2965        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
2966          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);          {
2967            if (eptr >= md->end_subject)
2968              {
2969              SCHECK_PARTIAL();
2970              MRRETURN(MATCH_NOMATCH);
2971              }
2972            if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2973            }
2974        if (min == max) continue;        if (min == max) continue;
2975        if (minimize)        if (minimize)
2976          {          {
2977          for (fi = min;; fi++)          for (fi = min;; fi++)
2978            {            {
2979            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM24);
2980            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2981            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max) MRRETURN(MATCH_NOMATCH);
2982                fc != md->lcc[*eptr++])            if (eptr >= md->end_subject)
2983              RRETURN(MATCH_NOMATCH);              {
2984                SCHECK_PARTIAL();
2985                MRRETURN(MATCH_NOMATCH);
2986                }
2987              if (fc != md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
2988            }            }
2989          /* Control never gets here */          /* Control never gets here */
2990          }          }
# Line 2163  for (;;) Line 2993  for (;;)
2993          pp = eptr;          pp = eptr;
2994          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2995            {            {
2996            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject)
2997                {
2998                SCHECK_PARTIAL();
2999                break;
3000                }
3001              if (fc != md->lcc[*eptr]) break;
3002            eptr++;            eptr++;
3003            }            }
3004    
3005          if (possessive) continue;          if (possessive) continue;
3006    
3007          while (eptr >= pp)          while (eptr >= pp)
3008            {            {
3009            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM25);
3010            eptr--;            eptr--;
3011            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3012            }            }
3013          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3014          }          }
3015        /* Control never gets here */        /* Control never gets here */
3016        }        }
# Line 2182  for (;;) Line 3019  for (;;)
3019    
3020      else      else
3021        {        {
3022        for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);        for (i = 1; i <= min; i++)
3023            {
3024            if (eptr >= md->end_subject)
3025              {
3026              SCHECK_PARTIAL();
3027              MRRETURN(MATCH_NOMATCH);
3028              }
3029            if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3030            }
3031    
3032        if (min == max) continue;        if (min == max) continue;
3033    
3034        if (minimize)        if (minimize)
3035          {          {
3036          for (fi = min;; fi++)          for (fi = min;; fi++)
3037            {            {
3038            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM26);
3039            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3040            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max) MRRETURN(MATCH_NOMATCH);
3041              RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3042                {
3043                SCHECK_PARTIAL();
3044                MRRETURN(MATCH_NOMATCH);
3045                }
3046              if (fc != *eptr++) MRRETURN(MATCH_NOMATCH);
3047            }            }
3048          /* Control never gets here */          /* Control never gets here */
3049          }          }
# Line 2200  for (;;) Line 3052  for (;;)
3052          pp = eptr;          pp = eptr;
3053          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3054            {            {
3055            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject)
3056                {
3057                SCHECK_PARTIAL();
3058                break;
3059                }
3060              if (fc != *eptr) break;
3061            eptr++;            eptr++;
3062            }            }
3063          if (possessive) continue;          if (possessive) continue;
3064    
3065          while (eptr >= pp)          while (eptr >= pp)
3066            {            {
3067            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM27);
3068            eptr--;            eptr--;
3069            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3070            }            }
3071          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3072          }          }
3073        }        }
3074      /* Control never gets here */      /* Control never gets here */
# Line 2219  for (;;) Line 3077  for (;;)
3077      checking can be multibyte. */      checking can be multibyte. */
3078    
3079      case OP_NOT:      case OP_NOT:
3080      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      case OP_NOTI:
3081        if (eptr >= md->end_subject)
3082          {
3083          SCHECK_PARTIAL();
3084          MRRETURN(MATCH_NOMATCH);
3085          }
3086      ecode++;      ecode++;
3087      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
3088      if ((ims & PCRE_CASELESS) != 0)      if (op == OP_NOTI)         /* The caseless case */
3089        {        {
3090  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3091        if (c < 256)        if (c < 256)
3092  #endif  #endif
3093        c = md->lcc[c];        c = md->lcc[c];
3094        if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);        if (md->lcc[*ecode++] == c) MRRETURN(MATCH_NOMATCH);
3095        }        }
3096      else      else    /* Caseful */
3097        {        {
3098        if (*ecode++ == c) RRETURN(MATCH_NOMATCH);        if (*ecode++ == c) MRRETURN(MATCH_NOMATCH);
3099        }        }
3100      break;      break;
3101    
# Line 2244  for (;;) Line 3107  for (;;)
3107      about... */      about... */
3108    
3109      case OP_NOTEXACT:      case OP_NOTEXACT:
3110        case OP_NOTEXACTI:
3111      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
3112      ecode += 3;      ecode += 3;
3113      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3114    
3115      case OP_NOTUPTO:      case OP_NOTUPTO:
3116        case OP_NOTUPTOI:
3117      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
3118        case OP_NOTMINUPTOI:
3119      min = 0;      min = 0;
3120      max = GET2(ecode, 1);      max = GET2(ecode, 1);
3121      minimize = *ecode == OP_NOTMINUPTO;      minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
3122      ecode += 3;      ecode += 3;
3123      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3124    
3125      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
3126        case OP_NOTPOSSTARI:
3127      possessive = TRUE;      possessive = TRUE;
3128      min = 0;      min = 0;
3129      max = INT_MAX;      max = INT_MAX;
# Line 2264  for (;;) Line 3131  for (;;)
3131      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3132    
3133      case OP_NOTPOSPLUS:      case OP_NOTPOSPLUS:
3134        case OP_NOTPOSPLUSI:
3135      possessive = TRUE;      possessive = TRUE;
3136      min = 1;      min = 1;
3137      max = INT_MAX;      max = INT_MAX;
# Line 2271  for (;;) Line 3139  for (;;)
3139      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3140    
3141      case OP_NOTPOSQUERY:      case OP_NOTPOSQUERY:
3142        case OP_NOTPOSQUERYI:
3143      possessive = TRUE;      possessive = TRUE;
3144      min = 0;      min = 0;
3145      max = 1;      max = 1;
# Line 2278  for (;;) Line 3147  for (;;)
3147      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3148    
3149      case OP_NOTPOSUPTO:      case OP_NOTPOSUPTO:
3150        case OP_NOTPOSUPTOI:
3151      possessive = TRUE;      possessive = TRUE;
3152      min = 0;      min = 0;
3153      max = GET2(ecode, 1);      max = GET2(ecode, 1);
# Line 2285  for (;;) Line 3155  for (;;)
3155      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
3156    
3157      case OP_NOTSTAR:      case OP_NOTSTAR:
3158        case OP_NOTSTARI:
3159      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
3160        case OP_NOTMINSTARI:
3161      case OP_NOTPLUS:      case OP_NOTPLUS:
3162        case OP_NOTPLUSI:
3163      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
3164        case OP_NOTMINPLUSI:
3165      case OP_NOTQUERY:      case OP_NOTQUERY:
3166        case OP_NOTQUERYI:
3167      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
3168      c = *ecode++ - OP_NOTSTAR;      case OP_NOTMINQUERYI:
3169        c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3170      minimize = (c & 1) != 0;      minimize = (c & 1) != 0;
3171      min = rep_min[c];                 /* Pick up values from tables; */      min = rep_min[c];                 /* Pick up values from tables; */
3172      max = rep_max[c];                 /* zero for max => infinity */      max = rep_max[c];                 /* zero for max => infinity */
3173      if (max == 0) max = INT_MAX;      if (max == 0) max = INT_MAX;
3174    
3175      /* Common code for all repeated single-byte matches. We can give up quickly      /* Common code for all repeated single-byte matches. */
     if there are fewer than the minimum number of bytes left in the  
     subject. */  
3176    
3177      REPEATNOTCHAR:      REPEATNOTCHAR:
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3178      fc = *ecode++;      fc = *ecode++;
3179    
3180      /* The code is duplicated for the caseless and caseful cases, for speed,      /* The code is duplicated for the caseless and caseful cases, for speed,
# Line 2315  for (;;) Line 3188  for (;;)
3188      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3189        max, eptr));        max, eptr));
3190    
3191      if ((ims & PCRE_CASELESS) != 0)      if (op >= OP_NOTSTARI)     /* Caseless */
3192        {        {
3193        fc = md->lcc[fc];        fc = md->lcc[fc];
3194    
# Line 2326  for (;;) Line 3199  for (;;)
3199          register unsigned int d;          register unsigned int d;
3200          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3201            {            {
3202              if (eptr >= md->end_subject)
3203                {
3204                SCHECK_PARTIAL();
3205                MRRETURN(MATCH_NOMATCH);
3206                }
3207            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3208            if (d < 256) d = md->lcc[d];            if (d < 256) d = md->lcc[d];
3209            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3210            }            }
3211          }          }
3212        else        else
# Line 2337  for (;;) Line 3215  for (;;)
3215        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3216          {          {
3217          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3218            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);            {
3219              if (eptr >= md->end_subject)
3220                {
3221                SCHECK_PARTIAL();
3222                MRRETURN(MATCH_NOMATCH);
3223                }
3224              if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3225              }
3226          }          }
3227    
3228        if (min == max) continue;        if (min == max) continue;
# Line 2351  for (;;) Line 3236  for (;;)
3236            register unsigned int d;            register unsigned int d;
3237            for (fi = min;; fi++)            for (fi = min;; fi++)
3238              {              {
3239              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM28);
3240              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3241              GETCHARINC(d, eptr);              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3242                if (eptr >= md->end_subject)
3243                  {
3244                  SCHECK_PARTIAL();
3245                  MRRETURN(MATCH_NOMATCH);
3246                  }
3247                GETCHARINC(d, eptr);
3248              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3249              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3250              }              }
3251            }            }
3252          else          else
# Line 2365  for (;;) Line 3255  for (;;)
3255            {            {
3256            for (fi = min;; fi++)            for (fi = min;; fi++)
3257              {              {
3258              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM29);
3259              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3260              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3261                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3262                  {
3263                  SCHECK_PARTIAL();
3264                  MRRETURN(MATCH_NOMATCH);
3265                  }
3266                if (fc == md->lcc[*eptr++]) MRRETURN(MATCH_NOMATCH);
3267              }              }
3268            }            }
3269          /* Control never gets here */          /* Control never gets here */
# Line 2388  for (;;) Line 3283  for (;;)
3283            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3284              {              {
3285              int len = 1;              int len = 1;
3286              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3287                  {
3288                  SCHECK_PARTIAL();
3289                  break;
3290                  }
3291              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3292              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
3293              if (fc == d) break;              if (fc == d) break;
# Line 2397  for (;;) Line 3296  for (;;)
3296          if (possessive) continue;          if (possessive) continue;
3297          for(;;)          for(;;)
3298              {              {
3299              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM30);
3300              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3301              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3302              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2409  for (;;) Line 3308  for (;;)
3308            {            {
3309            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3310              {              {
3311              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject)
3312                  {
3313                  SCHECK_PARTIAL();
3314                  break;
3315                  }
3316                if (fc == md->lcc[*eptr]) break;
3317              eptr++;              eptr++;
3318              }              }
3319            if (possessive) continue;            if (possessive) continue;
3320            while (eptr >= pp)            while (eptr >= pp)
3321              {              {
3322              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM31);
3323              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3324              eptr--;              eptr--;
3325              }              }
3326            }            }
3327    
3328          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3329          }          }
3330        /* Control never gets here */        /* Control never gets here */
3331        }        }
# Line 2437  for (;;) Line 3341  for (;;)
3341          register unsigned int d;          register unsigned int d;
3342          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3343            {            {
3344              if (eptr >= md->end_subject)
3345                {
3346                SCHECK_PARTIAL();
3347                MRRETURN(MATCH_NOMATCH);
3348                }
3349            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
3350            if (fc == d) RRETURN(MATCH_NOMATCH);            if (fc == d) MRRETURN(MATCH_NOMATCH);
3351            }            }
3352          }          }
3353        else        else
# Line 2446  for (;;) Line 3355  for (;;)
3355        /* Not UTF-8 mode */        /* Not UTF-8 mode */
3356          {          {
3357          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3358            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);            {
3359              if (eptr >= md->end_subject)
3360                {
3361                SCHECK_PARTIAL();
3362                MRRETURN(MATCH_NOMATCH);
3363                }
3364              if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3365              }
3366          }          }
3367    
3368        if (min == max) continue;        if (min == max) continue;
# Line 2460  for (;;) Line 3376  for (;;)
3376            register unsigned int d;            register unsigned int d;
3377            for (fi = min;; fi++)            for (fi = min;; fi++)
3378              {              {
3379              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM32);
3380              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3381                if (fi >= max) MRRETURN(MATCH_NOMATCH);
3382                if (eptr >= md->end_subject)
3383                  {
3384                  SCHECK_PARTIAL();
3385                  MRRETURN(MATCH_NOMATCH);
3386                  }
3387              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
3388              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) MRRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
3389              }              }
3390            }            }
3391          else          else
# Line 2473  for (;;) Line 3394  for (;;)
3394            {            {
3395            for (fi = min;; fi++)            for (fi = min;; fi++)
3396              {              {
3397              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM33);
3398              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3399              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max) MRRETURN(MATCH_NOMATCH);
3400                RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3401                  {
3402                  SCHECK_PARTIAL();
3403                  MRRETURN(MATCH_NOMATCH);
3404                  }
3405                if (fc == *eptr++) MRRETURN(MATCH_NOMATCH);
3406              }              }
3407            }            }
3408          /* Control never gets here */          /* Control never gets here */
# Line 2496  for (;;) Line 3422  for (;;)
3422            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3423              {              {
3424              int len = 1;              int len = 1;
3425              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject)
3426                  {
3427                  SCHECK_PARTIAL();
3428                  break;
3429                  }
3430              GETCHARLEN(d, eptr, len);              GETCHARLEN(d, eptr, len);
3431              if (fc == d) break;              if (fc == d) break;
3432              eptr += len;              eptr += len;
# Line 2504  for (;;) Line 3434  for (;;)
3434            if (possessive) continue;            if (possessive) continue;
3435            for(;;)            for(;;)
3436              {              {
3437              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM34);
3438              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3439              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
3440              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2516  for (;;) Line 3446  for (;;)
3446            {            {
3447            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3448              {              {
3449              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject)
3450                  {
3451                  SCHECK_PARTIAL();
3452                  break;
3453                  }
3454                if (fc == *eptr) break;
3455              eptr++;              eptr++;
3456              }              }
3457            if (possessive) continue;            if (possessive) continue;
3458            while (eptr >= pp)            while (eptr >= pp)
3459              {              {
3460              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, eptrb, 0, RM35);
3461              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3462              eptr--;              eptr--;
3463              }              }
3464            }            }
3465    
3466          RRETURN(MATCH_NOMATCH);          MRRETURN(MATCH_NOMATCH);
3467          }          }
3468        }        }
3469      /* Control never gets here */      /* Control never gets here */
# Line 2610  for (;;) Line 3545  for (;;)
3545    
3546      /* First, ensure the minimum number of matches are present. Use inline      /* First, ensure the minimum number of matches are present. Use inline
3547      code for maximizing the speed, and do the type test once at the start      code for maximizing the speed, and do the type test once at the start
3548      (i.e. keep it out of the loop). Also we can test that there are at least      (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     the minimum number of bytes before we start. This isn't as effective in  
     UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that  
3549      is tidier. Also separate the UCP code, which can be the same for both UTF-8      is tidier. Also separate the UCP code, which can be the same for both UTF-8
3550      and single-bytes. */      and single-bytes. */
3551    
     if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);  
3552      if (min > 0)      if (min > 0)
3553        {        {
3554  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2625  for (;;) Line 3557  for (;;)
3557          switch(prop_type)          switch(prop_type)
3558            {            {
3559            case PT_ANY:            case PT_ANY:
3560            if (prop_fail_result) RRETURN(MATCH_NOMATCH);            if (prop_fail_result) MRRETURN(MATCH_NOMATCH);
3561            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3562              {              {
3563              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3564              GETCHARINC(c, eptr);                {
3565                  SCHECK_PARTIAL();
3566                  MRRETURN(MATCH_NOMATCH);
3567                  }
3568                GETCHARINCTEST(c, eptr);
3569              }              }
3570            break;            break;
3571    
3572            case PT_LAMP:            case PT_LAMP:
3573            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3574              {              {
3575              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3576              GETCHARINC(c, eptr);                {
3577              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3578                  MRRETURN(MATCH_NOMATCH);
3579                  }
3580                GETCHARINCTEST(c, eptr);
3581                prop_chartype = UCD_CHARTYPE(c);
3582              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3583                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3584                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
3585                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3586              }              }
3587            break;            break;
3588    
3589            case PT_GC:            case PT_GC:
3590            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3591              {              {
3592              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3593              GETCHARINC(c, eptr);                {
3594              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3595                  MRRETURN(MATCH_NOMATCH);
3596                  }
3597                GETCHARINCTEST(c, eptr);
3598                prop_category = UCD_CATEGORY(c);
3599              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3600                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3601              }              }
3602            break;            break;
3603    
3604            case PT_PC:            case PT_PC:
3605            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3606              {              {
3607              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3608              GETCHARINC(c, eptr);                {
3609              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3610                  MRRETURN(MATCH_NOMATCH);
3611                  }
3612                GETCHARINCTEST(c, eptr);
3613                prop_chartype = UCD_CHARTYPE(c);
3614              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3615                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3616              }              }
3617            break;            break;
3618    
3619            case PT_SC:            case PT_SC:
3620            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
3621              {              {
3622              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject)
3623              GETCHARINC(c, eptr);                {
3624              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);                SCHECK_PARTIAL();
3625                  MRRETURN(MATCH_NOMATCH);
3626                  }
3627                GETCHARINCTEST(c, eptr);
3628                prop_script = UCD_SCRIPT(c);
3629              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3630                RRETURN(MATCH_NOMATCH);                MRRETURN(MATCH_NOMATCH);
3631                }
3632              break;
3633    
3634              case PT_ALNUM:
3635              for (i = 1; i <= min; i++)
3636                {
3637                if (eptr >= md->end_subject)
3638                  {
3639                  SCHECK_PARTIAL();
3640                  MRRETURN(MATCH_NOMATCH);
3641                  }
3642                GETCHARINCTEST(c, eptr);
3643                prop_category = UCD_CATEGORY(c);
3644                if ((prop_category == ucp_L || prop_category == ucp_N)
3645                       == prop_fail_result)
3646                  MRRETURN(MATCH_NOMATCH);
3647                }
3648              break;
3649    
3650              case PT_SPACE:    /* Perl space */
3651              for (i = 1; i <= min; i++)
3652                {
3653                if (eptr >= md->end_subject)
3654                  {
3655                  SCHECK_PARTIAL();
3656                  MRRETURN(MATCH_NOMATCH);
3657                  }
3658                GETCHARINCTEST(c, eptr);
3659                prop_category = UCD_CATEGORY(c);
3660                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3661                     c == CHAR_FF || c == CHAR_CR)
3662                       == prop_fail_result)
3663                  MRRETURN(MATCH_NOMATCH);
3664                }
3665              break;
3666    
3667              case PT_PXSPACE:  /* POSIX space */
3668              for (i = 1; i <= min; i++)
3669                {
3670                if (eptr >= md->end_subject)
3671                  {
3672                  SCHECK_PARTIAL();
3673                  MRRETURN(MATCH_NOMATCH);
3674                  }
3675                GETCHARINCTEST(c, eptr);
3676                prop_category = UCD_CATEGORY(c);
3677                if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL ||
3678                     c == CHAR_VT || c == CHAR_FF || c == CHAR_CR)
3679                       == prop_fail_result)
3680                  MRRETURN(MATCH_NOMATCH);
3681              }              }
3682            break;            break;
3683    
3684              case PT_WORD:
3685              for (i = 1; i <= min; i++)
3686                {
3687                if (eptr >= md->end_subject)
3688                  {
3689                  SCHECK_PARTIAL();
3690                  MRRETURN(MATCH_NOMATCH);
3691                  }
3692                GETCHARINCTEST(c, eptr);
3693                prop_category = UCD_CATEGORY(c);
3694                if ((prop_category == ucp_L || prop_category == ucp_N ||
3695                     c == CHAR_UNDERSCORE)
3696                       == prop_fail_result)
3697                  MRRETURN(MATCH_NOMATCH);
3698                }
3699              break;
3700    
3701              /* This should not occur */
3702    
3703            default:            default:
3704            RRETURN(PCRE_ERROR_INTERNAL);            RRETURN(PCRE_ERROR_INTERNAL);
3705            }            }
# Line 2691  for (;;) Line 3712  for (;;)
3712          {          {
3713          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3714            {            {
3715              if (eptr >= md->end_subject)
3716                {
3717                SCHECK_PARTIAL();
3718                MRRETURN(MATCH_NOMATCH);
3719                }
3720            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3721            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3722            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH);
3723            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3724              {              {
3725              int len = 1;              int len = 1;
3726              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr;
3727                {                else { GETCHARLEN(c, eptr, len); }
3728                GETCHARLEN(c, eptr, len);              prop_category = UCD_CATEGORY(c);
               }  
             prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);  
3729              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3730              eptr += len;              eptr += len;
3731              }              }
# Line 2719  for (;;) Line 3743  for (;;)
3743          case OP_ANY:          case OP_ANY:
3744          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3745            {            {
3746            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject)
3747                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))              {
3748              RRETURN(MATCH_NOMATCH);              SCHECK_PARTIAL();
3749                MRRETURN(MATCH_NOMATCH);
3750                }
3751              if (IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH);
3752              eptr++;
3753              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3754              }
3755            break;
3756    
3757            case OP_ALLANY:
3758            for (i = 1; i <= min; i++)
3759              {
3760              if (eptr >= md->end_subject)
3761                {
3762                SCHECK_PARTIAL();
3763                MRRETURN(MATCH_NOMATCH);
3764                }
3765            eptr++;            eptr++;
3766            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3767            }            }
3768          break;          break;
3769    
3770          case OP_ANYBYTE:          case OP_ANYBYTE:
3771            if (eptr > md->end_subject - min) MRRETURN(MATCH_NOMATCH);
3772          eptr += min;          eptr += min;
3773          break;          break;
3774    
3775          case OP_ANYNL:          case OP_ANYNL:
3776          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3777            {            {
3778            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (eptr >= md->end_subject)
3779                {
3780                SCHECK_PARTIAL();
3781                MRRETURN(MATCH_NOMATCH);
3782                }
3783            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3784            switch(c)            switch(c)
3785              {              {
3786              default: RRETURN(MATCH_NOMATCH);              default: MRRETURN(MATCH_NOMATCH);
3787    
3788              case 0x000d:              case 0x000d:
3789              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3790              break;              break;
3791    
3792              case 0x000a:              case 0x000a:
3793                break;
3794    
3795              case 0x000b:              case 0x000b:
3796              case 0x000c:              case 0x000c:
3797              case 0x0085:              case 0x0085:
3798              case 0x2028:              case 0x2028:
3799              case 0x2029:              case 0x2029:
3800                if (md->bsr_anycrlf) MRRETURN(MATCH_NOMATCH);
3801                break;
3802                }
3803              }
3804            break;
3805    
3806            case OP_NOT_HSPACE:
3807            for (i = 1; i <= min; i++)
3808              {
3809              if (eptr >= md->end_subject)
3810                {
3811                SCHECK_PARTIAL();
3812                MRRETURN(MATCH_NOMATCH);
3813                }
3814              GETCHARINC(c, eptr);
3815              switch(c)
3816                {
3817                default: break;
3818                case 0x09:      /* HT */
3819                case 0x20:      /* SPACE */
3820                case 0xa0:      /* NBSP */
3821                case 0x1680:    /* OGHAM SPACE MARK */
3822                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3823                case 0x2000:    /* EN QUAD */
3824                case 0x2001:    /* EM QUAD */
3825                case 0x2002:    /* EN SPACE */
3826                case 0x2003:    /* EM SPACE */
3827                case 0x2004:    /* THREE-PER-EM SPACE */
3828                case 0x2005:    /* FOUR-PER-EM SPACE */
3829                case 0x2006:    /* SIX-PER-EM SPACE */
3830<