/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC revision 211 by ph10, Thu Aug 9 09:52:43 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include <config.h>
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55  /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  /* Undefine some potentially clashing cpp symbols */
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
56    
57  #define EPTR_WORK_SIZE (1000)  #undef min
58    #undef max
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 65  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_THEN         (-996)
78    
79  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
80  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 183  calls by keeping local variables that ne Line 194  calls by keeping local variables that ne
194  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
195  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
196  always used to.  always used to.
197    
198    The original heap-recursive code used longjmp(). However, it seems that this
199    can be very slow on some operating systems. Following a suggestion from Stan
200    Switzer, the use of longjmp() has been abolished, at the cost of having to
201    provide a unique number for each call to RMATCH. There is no way of generating
202    a sequence of numbers at compile time in C. I have given them names, to make
203    them stand out more clearly.
204    
205    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
206    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
207    tests. Furthermore, not using longjmp() means that local dynamic variables
208    don't have indeterminate values; this has meant that the frame size can be
209    reduced because the result can be "passed back" by straight setting of the
210    variable instead of being passed in the frame.
211  ****************************************************************************  ****************************************************************************
212  ***************************************************************************/  ***************************************************************************/
213    
214    
215    /* Numbers for RMATCH calls */
216    
217    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
218           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
219           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
220           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
221           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
222           RM51,  RM52, RM53 };
223    
224    
225  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
226  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
227    actuall used in this definition. */
228    
229  #ifndef NO_RECURSE  #ifndef NO_RECURSE
230  #define REGISTER register  #define REGISTER register
231    
232  #ifdef DEBUG  #ifdef DEBUG
233  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
234    { \    { \
235    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
236    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
237    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
238    }    }
239  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 205  versions and production versions. */ Line 242  versions and production versions. */
242    return ra; \    return ra; \
243    }    }
244  #else  #else
245  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
246    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
247  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
248  #endif  #endif
249    
250  #else  #else
251    
252    
253  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
254  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
255  match(), which never changes. */  argument of match(), which never changes. */
256    
257  #define REGISTER  #define REGISTER
258    
259  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
260    {\    {\
261    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
262    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
263      {\    newframe->Xeptr = ra;\
264      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
265      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
266      newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
267      newframe->Xims = re;\    newframe->Xims = re;\
268      newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
269      newframe->Xflags = rg;\    newframe->Xflags = rg;\
270      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
271      newframe->Xprevframe = frame;\    newframe->Xprevframe = frame;\
272      frame = newframe;\    frame = newframe;\
273      DPRINTF(("restarting from line %d\n", __LINE__));\    DPRINTF(("restarting from line %d\n", __LINE__));\
274      goto HEAP_RECURSE;\    goto HEAP_RECURSE;\
275      }\    L_##rw:\
276    else\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     {\  
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
277    }    }
278    
279  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 251  match(), which never changes. */ Line 283  match(), which never changes. */
283    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
284    if (frame != NULL)\    if (frame != NULL)\
285      {\      {\
286      frame->Xresult = ra;\      rrc = ra;\
287      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
288      }\      }\
289    return ra;\    return ra;\
290    }    }
# Line 268  typedef struct heapframe { Line 299  typedef struct heapframe {
299    
300    const uschar *Xeptr;    const uschar *Xeptr;
301    const uschar *Xecode;    const uschar *Xecode;
302      const uschar *Xmstart;
303    int Xoffset_top;    int Xoffset_top;
304    long int Xims;    long int Xims;
305    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 299  typedef struct heapframe { Line 331  typedef struct heapframe {
331    int Xprop_category;    int Xprop_category;
332    int Xprop_chartype;    int Xprop_chartype;
333    int Xprop_script;    int Xprop_script;
334      int Xoclength;
335      uschar Xocchars[8];
336  #endif  #endif
337    
338    int Xctype;    int Xctype;
# Line 316  typedef struct heapframe { Line 350  typedef struct heapframe {
350    
351    eptrblock Xnewptrb;    eptrblock Xnewptrb;
352    
353    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
354    
355    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
356    
357  } heapframe;  } heapframe;
358    
# Line 347  made performance worse. Line 380  made performance worse.
380  Arguments:  Arguments:
381     eptr        pointer to current character in subject     eptr        pointer to current character in subject
382     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
383       mstart      pointer to the current match start position (can be modified
384                     by encountering \K)
385     offset_top  current top pointer     offset_top  current top pointer
386     md          pointer to "static" info for the match     md          pointer to "static" info for the match
387     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 356  Arguments: Line 391  Arguments:
391                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
392                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
393                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
394     rdepth      the recursion depth     rdepth      the recursion depth
395    
396  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 366  Returns: MATCH_MATCH if matched Line 400  Returns: MATCH_MATCH if matched
400  */  */
401    
402  static int  static int
403  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
404    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
405    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
406  {  {
# Line 394  frame->Xprevframe = NULL; /* Line 428  frame->Xprevframe = NULL; /*
428    
429  frame->Xeptr = eptr;  frame->Xeptr = eptr;
430  frame->Xecode = ecode;  frame->Xecode = ecode;
431    frame->Xmstart = mstart;
432  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
433  frame->Xims = ims;  frame->Xims = ims;
434  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 408  HEAP_RECURSE: Line 443  HEAP_RECURSE:
443    
444  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
445  #define ecode              frame->Xecode  #define ecode              frame->Xecode
446    #define mstart             frame->Xmstart
447  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
448  #define ims                frame->Xims  #define ims                frame->Xims
449  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 441  HEAP_RECURSE: Line 477  HEAP_RECURSE:
477  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
478  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
479  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
480    #define oclength           frame->Xoclength
481    #define occhars            frame->Xocchars
482  #endif  #endif
483    
484  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 494  int prop_fail_result; Line 532  int prop_fail_result;
532  int prop_category;  int prop_category;
533  int prop_chartype;  int prop_chartype;
534  int prop_script;  int prop_script;
535    int oclength;
536    uschar occhars[8];
537  #endif  #endif
538    
539  int ctype;  int ctype;
# Line 534  defined). However, RMATCH isn't like a f Line 574  defined). However, RMATCH isn't like a f
574  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
575  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
576    
577    #ifdef SUPPORT_UTF8
578    utf8 = md->utf8;       /* Local copy of the flag */
579    #else
580    utf8 = FALSE;
581    #endif
582    
583  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
584  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
585    
# Line 542  if (rdepth >= md->match_limit_recursion) Line 588  if (rdepth >= md->match_limit_recursion)
588    
589  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
590    
 #ifdef SUPPORT_UTF8  
 utf8 = md->utf8;       /* Local copy of the flag */  
 #else  
 utf8 = FALSE;  
 #endif  
   
591  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
592  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
593  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
594  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
595  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
596  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
597  already used. */  block that is used is on the stack, so a new one may be required for each
598    match(). */
599    
600  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
601    {    {
602    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
603    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
604      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
605    }    }
606    
607  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 582  for (;;) Line 616  for (;;)
616    
617    if (md->partial &&    if (md->partial &&
618        eptr >= md->end_subject &&        eptr >= md->end_subject &&
619        eptr > md->start_match)        eptr > mstart)
620      md->hitend = TRUE;      md->hitend = TRUE;
621    
622    switch(op)    switch(op)
623      {      {
624        case OP_FAIL:
625        return MATCH_NOMATCH;
626    
627        case OP_PRUNE:
628        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
629          ims, eptrb, flags, RM51);
630        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
631        return MATCH_PRUNE;
632    
633        case OP_COMMIT:
634        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
635          ims, eptrb, flags, RM52);
636        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
637        return MATCH_COMMIT;
638    
639        case OP_SKIP:
640        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
641          ims, eptrb, flags, RM53);
642        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
643        md->start_match_ptr = eptr;   /* Pass back current position */
644        return MATCH_SKIP;
645    
646        case OP_THEN:
647        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
648          ims, eptrb, flags, RM53);
649        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
650        return MATCH_THEN;
651    
652      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
653      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
654      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 626  for (;;) Line 688  for (;;)
688        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
689        do        do
690          {          {
691          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
692            ims, eptrb, flags);            ims, eptrb, flags, RM1);
693          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
694          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
695          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
696          }          }
# Line 643  for (;;) Line 705  for (;;)
705        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
706        }        }
707    
708      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
709      bracket. */      as a non-capturing bracket. */
710    
711        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
712        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
713    
714      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
715    
716        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
717        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
718    
719      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
720      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
721      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
722      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
723        is set.*/
724    
725      case OP_BRA:      case OP_BRA:
726      case OP_SBRA:      case OP_SBRA:
# Line 659  for (;;) Line 728  for (;;)
728      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
729      for (;;)      for (;;)
730        {        {
731        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
732          {          {
733          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
734          flags |= match_tail_recursed;            {
735          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
736          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
737              goto TAIL_RECURSE;
738              }
739    
740            /* Possibly empty group; can't use tail recursion. */
741    
742            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
743              eptrb, flags, RM48);
744            RRETURN(rrc);
745          }          }
746    
747        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
748        otherwise return. */        otherwise return. */
749    
750        RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
751          eptrb, flags);          eptrb, flags, RM2);
752        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
753        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
754        }        }
755      /* Control never reaches here. */      /* Control never reaches here. */
# Line 712  for (;;) Line 789  for (;;)
789    
790      else      else
791        {        {
792        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
793            match_condassert);            match_condassert, RM3);
794        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
795          {          {
796          condition = TRUE;          condition = TRUE;
797          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
798          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
799          }          }
800        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
801          {          {
802          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
803          }          }
# Line 732  for (;;) Line 809  for (;;)
809        }        }
810    
811      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
812      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
813      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
814        group. If the second alternative doesn't exist, we can just plough on. */
815    
816      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
817        {        {
818        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
819        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
820        goto TAIL_RECURSE;          {
821            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
822            RRETURN(rrc);
823            }
824          else                       /* Group must match something */
825            {
826            flags = 0;
827            goto TAIL_RECURSE;
828            }
829        }        }
830      else      else                         /* Condition false & no 2nd alternative */
831        {        {
832        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
833        }        }
834      break;      break;
835    
836    
837      /* End of the pattern. If we are in a top-level recursion, we should      /* End of the pattern, either real or forced. If we are in a top-level
838      restore the offsets appropriately and continue from after the call. */      recursion, we should restore the offsets appropriately and continue from
839        after the call. */
840    
841        case OP_ACCEPT:
842      case OP_END:      case OP_END:
843      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
844        {        {
# Line 759  for (;;) Line 847  for (;;)
847        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
848        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
849          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
850        md->start_match = rec->save_start;        mstart = rec->save_start;
851        ims = original_ims;        ims = original_ims;
852        ecode = rec->after_call;        ecode = rec->after_call;
853        break;        break;
# Line 768  for (;;) Line 856  for (;;)
856      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
857      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
858    
859      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
860      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
861      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
862        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
863      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
864    
865      /* Change option settings */      /* Change option settings */
# Line 791  for (;;) Line 880  for (;;)
880      case OP_ASSERTBACK:      case OP_ASSERTBACK:
881      do      do
882        {        {
883        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
884            RM4);
885        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
886        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
887        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
888        }        }
889      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 817  for (;;) Line 907  for (;;)
907      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
908      do      do
909        {        {
910        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
911            RM5);
912        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
913        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
914        ecode += GET(ecode,1);        ecode += GET(ecode,1);
915        }        }
916      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 843  for (;;) Line 934  for (;;)
934          {          {
935          eptr--;          eptr--;
936          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
937          BACKCHAR(eptr)          BACKCHAR(eptr);
938          }          }
939        }        }
940      else      else
# Line 874  for (;;) Line 965  for (;;)
965        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
966        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
967        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
968        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
969        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
970        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
971        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 936  for (;;) Line 1027  for (;;)
1027    
1028        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1029              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1030        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
1031        md->start_match = eptr;        mstart = eptr;
1032    
1033        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1034        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 946  for (;;) Line 1037  for (;;)
1037        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1038        do        do
1039          {          {
1040          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1041            md, ims, eptrb, flags);            md, ims, eptrb, flags, RM6);
1042          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1043            {            {
1044            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 956  for (;;) Line 1047  for (;;)
1047              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1048            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
1049            }            }
1050          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1051            {            {
1052            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1053            RRETURN(rrc);            RRETURN(rrc);
# Line 990  for (;;) Line 1081  for (;;)
1081    
1082      do      do
1083        {        {
1084        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
         eptrb, 0);  
1085        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1086        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1087        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1088        }        }
1089      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1036  for (;;) Line 1126  for (;;)
1126    
1127      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1128        {        {
1129        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1130        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1131        ecode = prev;        ecode = prev;
1132        flags = match_tail_recursed;        flags = 0;
1133        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1134        }        }
1135      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1136        {        {
1137        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1138        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1139        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1140        flags = match_tail_recursed;        flags = 0;
1141        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1142        }        }
1143      /* Control never gets here */      /* Control never gets here */
# Line 1068  for (;;) Line 1158  for (;;)
1158      case OP_BRAZERO:      case OP_BRAZERO:
1159        {        {
1160        next = ecode+1;        next = ecode+1;
1161        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1162        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1163        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1164        ecode = next + 1 + LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
# Line 1079  for (;;) Line 1169  for (;;)
1169        {        {
1170        next = ecode+1;        next = ecode+1;
1171        do next += GET(next, 1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1172        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1173        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1174        ecode++;        ecode++;
1175        }        }
# Line 1149  for (;;) Line 1239  for (;;)
1239          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1240          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1241          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
1242          md->start_match = rec->save_start;          mstart = rec->save_start;
1243          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1244            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1245          ecode = rec->after_call;          ecode = rec->after_call;
# Line 1178  for (;;) Line 1268  for (;;)
1268    
1269      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1270      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1271      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1272        unlimited repeat of a group that can match an empty string. */
1273    
1274      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1275    
1276      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1277        {        {
1278        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1279        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1280          if (flags != 0)    /* Could match an empty string */
1281            {
1282            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1283            RRETURN(rrc);
1284            }
1285        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1286        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1287        }        }
1288      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1289        {        {
1290        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1291        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1292        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1293        flags = match_tail_recursed;        flags = 0;
1294        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1295        }        }
1296      /* Control never gets here */      /* Control never gets here */
# Line 1228  for (;;) Line 1323  for (;;)
1323      ecode++;      ecode++;
1324      break;      break;
1325    
1326        /* Reset the start of match point */
1327    
1328        case OP_SET_SOM:
1329        mstart = eptr;
1330        ecode++;
1331        break;
1332    
1333      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1334      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1335    
# Line 1436  for (;;) Line 1538  for (;;)
1538      ecode++;      ecode++;
1539      break;      break;
1540    
1541        case OP_NOT_HSPACE:
1542        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1543        GETCHARINCTEST(c, eptr);
1544        switch(c)
1545          {
1546          default: break;
1547          case 0x09:      /* HT */
1548          case 0x20:      /* SPACE */
1549          case 0xa0:      /* NBSP */
1550          case 0x1680:    /* OGHAM SPACE MARK */
1551          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1552          case 0x2000:    /* EN QUAD */
1553          case 0x2001:    /* EM QUAD */
1554          case 0x2002:    /* EN SPACE */
1555          case 0x2003:    /* EM SPACE */
1556          case 0x2004:    /* THREE-PER-EM SPACE */
1557          case 0x2005:    /* FOUR-PER-EM SPACE */
1558          case 0x2006:    /* SIX-PER-EM SPACE */
1559          case 0x2007:    /* FIGURE SPACE */
1560          case 0x2008:    /* PUNCTUATION SPACE */
1561          case 0x2009:    /* THIN SPACE */
1562          case 0x200A:    /* HAIR SPACE */
1563          case 0x202f:    /* NARROW NO-BREAK SPACE */
1564          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1565          case 0x3000:    /* IDEOGRAPHIC SPACE */
1566          RRETURN(MATCH_NOMATCH);
1567          }
1568        ecode++;
1569        break;
1570    
1571        case OP_HSPACE:
1572        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1573        GETCHARINCTEST(c, eptr);
1574        switch(c)
1575          {
1576          default: RRETURN(MATCH_NOMATCH);
1577          case 0x09:      /* HT */
1578          case 0x20:      /* SPACE */
1579          case 0xa0:      /* NBSP */
1580          case 0x1680:    /* OGHAM SPACE MARK */
1581          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1582          case 0x2000:    /* EN QUAD */
1583          case 0x2001:    /* EM QUAD */
1584          case 0x2002:    /* EN SPACE */
1585          case 0x2003:    /* EM SPACE */
1586          case 0x2004:    /* THREE-PER-EM SPACE */
1587          case 0x2005:    /* FOUR-PER-EM SPACE */
1588          case 0x2006:    /* SIX-PER-EM SPACE */
1589          case 0x2007:    /* FIGURE SPACE */
1590          case 0x2008:    /* PUNCTUATION SPACE */
1591          case 0x2009:    /* THIN SPACE */
1592          case 0x200A:    /* HAIR SPACE */
1593          case 0x202f:    /* NARROW NO-BREAK SPACE */
1594          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1595          case 0x3000:    /* IDEOGRAPHIC SPACE */
1596          break;
1597          }
1598        ecode++;
1599        break;
1600    
1601        case OP_NOT_VSPACE:
1602        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1603        GETCHARINCTEST(c, eptr);
1604        switch(c)
1605          {
1606          default: break;
1607          case 0x0a:      /* LF */
1608          case 0x0b:      /* VT */
1609          case 0x0c:      /* FF */
1610          case 0x0d:      /* CR */
1611          case 0x85:      /* NEL */
1612          case 0x2028:    /* LINE SEPARATOR */
1613          case 0x2029:    /* PARAGRAPH SEPARATOR */
1614          RRETURN(MATCH_NOMATCH);
1615          }
1616        ecode++;
1617        break;
1618    
1619        case OP_VSPACE:
1620        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1621        GETCHARINCTEST(c, eptr);
1622        switch(c)
1623          {
1624          default: RRETURN(MATCH_NOMATCH);
1625          case 0x0a:      /* LF */
1626          case 0x0b:      /* VT */
1627          case 0x0c:      /* FF */
1628          case 0x0d:      /* CR */
1629          case 0x85:      /* NEL */
1630          case 0x2028:    /* LINE SEPARATOR */
1631          case 0x2029:    /* PARAGRAPH SEPARATOR */
1632          break;
1633          }
1634        ecode++;
1635        break;
1636    
1637  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1638      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1639      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1591  for (;;) Line 1789  for (;;)
1789          {          {
1790          for (fi = min;; fi++)          for (fi = min;; fi++)
1791            {            {
1792            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1793            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1794            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1795              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 1612  for (;;) Line 1810  for (;;)
1810            }            }
1811          while (eptr >= pp)          while (eptr >= pp)
1812            {            {
1813            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1814            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1815            eptr -= length;            eptr -= length;
1816            }            }
# Line 1717  for (;;) Line 1915  for (;;)
1915            {            {
1916            for (fi = min;; fi++)            for (fi = min;; fi++)
1917              {              {
1918              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1919              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1920              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1921              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 1737  for (;;) Line 1935  for (;;)
1935            {            {
1936            for (fi = min;; fi++)            for (fi = min;; fi++)
1937              {              {
1938              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1939              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1940              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1941              c = *eptr++;              c = *eptr++;
# Line 1774  for (;;) Line 1972  for (;;)
1972              }              }
1973            for (;;)            for (;;)
1974              {              {
1975              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1976              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1977              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
1978              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1793  for (;;) Line 1991  for (;;)
1991              }              }
1992            while (eptr >= pp)            while (eptr >= pp)
1993              {              {
1994              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1995              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1996              eptr--;              eptr--;
1997              }              }
# Line 1864  for (;;) Line 2062  for (;;)
2062          {          {
2063          for (fi = min;; fi++)          for (fi = min;; fi++)
2064            {            {
2065            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2066            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2067            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2068            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 1888  for (;;) Line 2086  for (;;)
2086            }            }
2087          for(;;)          for(;;)
2088            {            {
2089            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2090            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2091            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2092            BACKCHAR(eptr)            BACKCHAR(eptr);
2093            }            }
2094          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2095          }          }
# Line 2045  for (;;) Line 2243  for (;;)
2243    
2244        if (length > 1)        if (length > 1)
2245          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2246  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2247          unsigned int othercase;          unsigned int othercase;
2248          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2249              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2250            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2251            else oclength = 0;
2252  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2253    
2254          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2255            {            {
2256            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (memcmp(eptr, charptr, length) == 0) eptr += length;
2257    #ifdef SUPPORT_UCP
2258            /* Need braces because of following else */            /* Need braces because of following else */
2259            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2260            else            else
# Line 2065  for (;;) Line 2262  for (;;)
2262              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2263              eptr += oclength;              eptr += oclength;
2264              }              }
2265    #else   /* without SUPPORT_UCP */
2266              else { RRETURN(MATCH_NOMATCH); }
2267    #endif  /* SUPPORT_UCP */
2268            }            }
2269    
2270          if (min == max) continue;          if (min == max) continue;
# Line 2073  for (;;) Line 2273  for (;;)
2273            {            {
2274            for (fi = min;; fi++)            for (fi = min;; fi++)
2275              {              {
2276              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2277              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2278              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2279              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2280    #ifdef SUPPORT_UCP
2281              /* Need braces because of following else */              /* Need braces because of following else */
2282              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2283              else              else
# Line 2084  for (;;) Line 2285  for (;;)
2285                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2286                eptr += oclength;                eptr += oclength;
2287                }                }
2288    #else   /* without SUPPORT_UCP */
2289                else { RRETURN (MATCH_NOMATCH); }
2290    #endif  /* SUPPORT_UCP */
2291              }              }
2292            /* Control never gets here */            /* Control never gets here */
2293            }            }
# Line 2095  for (;;) Line 2299  for (;;)
2299              {              {
2300              if (eptr > md->end_subject - length) break;              if (eptr > md->end_subject - length) break;
2301              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2302    #ifdef SUPPORT_UCP
2303              else if (oclength == 0) break;              else if (oclength == 0) break;
2304              else              else
2305                {                {
2306                if (memcmp(eptr, occhars, oclength) != 0) break;                if (memcmp(eptr, occhars, oclength) != 0) break;
2307                eptr += oclength;                eptr += oclength;
2308                }                }
2309    #else   /* without SUPPORT_UCP */
2310                else break;
2311    #endif  /* SUPPORT_UCP */
2312              }              }
2313    
2314            if (possessive) continue;            if (possessive) continue;
2315            while (eptr >= pp)            for(;;)
2316             {             {
2317             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2318             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2319               if (eptr == pp) RRETURN(MATCH_NOMATCH);
2320    #ifdef SUPPORT_UCP
2321               eptr--;
2322               BACKCHAR(eptr);
2323    #else   /* without SUPPORT_UCP */
2324             eptr -= length;             eptr -= length;
2325    #endif  /* SUPPORT_UCP */
2326             }             }
           RRETURN(MATCH_NOMATCH);  
2327            }            }
2328          /* Control never gets here */          /* Control never gets here */
2329          }          }
# Line 2150  for (;;) Line 2363  for (;;)
2363          {          {
2364          for (fi = min;; fi++)          for (fi = min;; fi++)
2365            {            {
2366            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2367            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2368            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
2369                fc != md->lcc[*eptr++])                fc != md->lcc[*eptr++])
# Line 2169  for (;;) Line 2382  for (;;)
2382          if (possessive) continue;          if (possessive) continue;
2383          while (eptr >= pp)          while (eptr >= pp)
2384            {            {
2385            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2386            eptr--;            eptr--;
2387            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2388            }            }
# Line 2188  for (;;) Line 2401  for (;;)
2401          {          {
2402          for (fi = min;; fi++)          for (fi = min;; fi++)
2403            {            {
2404            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2405            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2406            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2407              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 2206  for (;;) Line 2419  for (;;)
2419          if (possessive) continue;          if (possessive) continue;
2420          while (eptr >= pp)          while (eptr >= pp)
2421            {            {
2422            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2423            eptr--;            eptr--;
2424            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2425            }            }
# Line 2351  for (;;) Line 2564  for (;;)
2564            register unsigned int d;            register unsigned int d;
2565            for (fi = min;; fi++)            for (fi = min;; fi++)
2566              {              {
2567              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2568              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2569              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2570              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
# Line 2365  for (;;) Line 2578  for (;;)
2578            {            {
2579            for (fi = min;; fi++)            for (fi = min;; fi++)
2580              {              {
2581              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2582              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2583              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2584                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2397  for (;;) Line 2610  for (;;)
2610          if (possessive) continue;          if (possessive) continue;
2611          for(;;)          for(;;)
2612              {              {
2613              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2614              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2615              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2616              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2415  for (;;) Line 2628  for (;;)
2628            if (possessive) continue;            if (possessive) continue;
2629            while (eptr >= pp)            while (eptr >= pp)
2630              {              {
2631              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2632              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2633              eptr--;              eptr--;
2634              }              }
# Line 2460  for (;;) Line 2673  for (;;)
2673            register unsigned int d;            register unsigned int d;
2674            for (fi = min;; fi++)            for (fi = min;; fi++)
2675              {              {
2676              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2677              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2678              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2679              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fi >= max || eptr >= md->end_subject || fc == d)
# Line 2473  for (;;) Line 2686  for (;;)
2686            {            {
2687            for (fi = min;; fi++)            for (fi = min;; fi++)
2688              {              {
2689              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2690              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2691              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2692                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2504  for (;;) Line 2717  for (;;)
2717            if (possessive) continue;            if (possessive) continue;
2718            for(;;)            for(;;)
2719              {              {
2720              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2721              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2722              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2723              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2522  for (;;) Line 2735  for (;;)
2735            if (possessive) continue;            if (possessive) continue;
2736            while (eptr >= pp)            while (eptr >= pp)
2737              {              {
2738              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2739              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2740              eptr--;              eptr--;
2741              }              }
# Line 2629  for (;;) Line 2842  for (;;)
2842            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2843              {              {
2844              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2845              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2846              }              }
2847            break;            break;
2848    
# Line 2637  for (;;) Line 2850  for (;;)
2850            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2851              {              {
2852              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2853              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2854              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2855              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2856                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 2650  for (;;) Line 2863  for (;;)
2863            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2864              {              {
2865              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2866              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2867              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2868              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2869                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2661  for (;;) Line 2874  for (;;)
2874            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2875              {              {
2876              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2877              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2878              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2879              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2880                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2672  for (;;) Line 2885  for (;;)
2885            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2886              {              {
2887              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2888              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2889              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2890              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2891                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2753  for (;;) Line 2966  for (;;)
2966            }            }
2967          break;          break;
2968    
2969            case OP_NOT_HSPACE:
2970            for (i = 1; i <= min; i++)
2971              {
2972              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2973              GETCHARINC(c, eptr);
2974              switch(c)
2975                {
2976                default: break;
2977                case 0x09:      /* HT */
2978                case 0x20:      /* SPACE */
2979                case 0xa0:      /* NBSP */
2980                case 0x1680:    /* OGHAM SPACE MARK */
2981                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2982                case 0x2000:    /* EN QUAD */
2983                case 0x2001:    /* EM QUAD */
2984                case 0x2002:    /* EN SPACE */
2985                case 0x2003:    /* EM SPACE */
2986                case 0x2004:    /* THREE-PER-EM SPACE */
2987                case 0x2005:    /* FOUR-PER-EM SPACE */
2988                case 0x2006:    /* SIX-PER-EM SPACE */
2989                case 0x2007:    /* FIGURE SPACE */
2990                case 0x2008:    /* PUNCTUATION SPACE */
2991                case 0x2009:    /* THIN SPACE */
2992                case 0x200A:    /* HAIR SPACE */
2993                case 0x202f:    /* NARROW NO-BREAK SPACE */
2994                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2995                case 0x3000:    /* IDEOGRAPHIC SPACE */
2996                RRETURN(MATCH_NOMATCH);
2997                }
2998              }
2999            break;
3000    
3001            case OP_HSPACE:
3002            for (i = 1; i <= min; i++)
3003              {
3004              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3005              GETCHARINC(c, eptr);
3006              switch(c)
3007                {
3008                default: RRETURN(MATCH_NOMATCH);
3009                case 0x09:      /* HT */
3010                case 0x20:      /* SPACE */
3011                case 0xa0:      /* NBSP */
3012                case 0x1680:    /* OGHAM SPACE MARK */
3013                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3014                case 0x2000:    /* EN QUAD */
3015                case 0x2001:    /* EM QUAD */
3016                case 0x2002:    /* EN SPACE */
3017                case 0x2003:    /* EM SPACE */
3018                case 0x2004:    /* THREE-PER-EM SPACE */
3019                case 0x2005:    /* FOUR-PER-EM SPACE */
3020                case 0x2006:    /* SIX-PER-EM SPACE */
3021                case 0x2007:    /* FIGURE SPACE */
3022                case 0x2008:    /* PUNCTUATION SPACE */
3023                case 0x2009:    /* THIN SPACE */
3024                case 0x200A:    /* HAIR SPACE */
3025                case 0x202f:    /* NARROW NO-BREAK SPACE */
3026                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3027                case 0x3000:    /* IDEOGRAPHIC SPACE */
3028                break;
3029                }
3030              }
3031            break;
3032    
3033            case OP_NOT_VSPACE:
3034            for (i = 1; i <= min; i++)
3035              {
3036              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3037              GETCHARINC(c, eptr);
3038              switch(c)
3039                {
3040                default: break;
3041                case 0x0a:      /* LF */
3042                case 0x0b:      /* VT */
3043                case 0x0c:      /* FF */
3044                case 0x0d:      /* CR */
3045                case 0x85:      /* NEL */
3046                case 0x2028:    /* LINE SEPARATOR */
3047                case 0x2029:    /* PARAGRAPH SEPARATOR */
3048                RRETURN(MATCH_NOMATCH);
3049                }
3050              }
3051            break;
3052    
3053            case OP_VSPACE:
3054            for (i = 1; i <= min; i++)
3055              {
3056              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3057              GETCHARINC(c, eptr);
3058              switch(c)
3059                {
3060                default: RRETURN(MATCH_NOMATCH);
3061                case 0x0a:      /* LF */
3062                case 0x0b:      /* VT */
3063                case 0x0c:      /* FF */
3064                case 0x0d:      /* CR */
3065                case 0x85:      /* NEL */
3066                case 0x2028:    /* LINE SEPARATOR */
3067                case 0x2029:    /* PARAGRAPH SEPARATOR */
3068                break;
3069                }
3070              }
3071            break;
3072    
3073          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3074          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3075            {            {
# Line 2864  for (;;) Line 3181  for (;;)
3181            }            }
3182          break;          break;
3183    
3184            case OP_NOT_HSPACE:
3185            for (i = 1; i <= min; i++)
3186              {
3187              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3188              switch(*eptr++)
3189                {
3190                default: break;
3191                case 0x09:      /* HT */
3192                case 0x20:      /* SPACE */
3193                case 0xa0:      /* NBSP */
3194                RRETURN(MATCH_NOMATCH);
3195                }
3196              }
3197            break;
3198    
3199            case OP_HSPACE:
3200            for (i = 1; i <= min; i++)
3201              {
3202              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3203              switch(*eptr++)
3204                {
3205                default: RRETURN(MATCH_NOMATCH);
3206                case 0x09:      /* HT */
3207                case 0x20:      /* SPACE */
3208                case 0xa0:      /* NBSP */
3209                break;
3210                }
3211              }
3212            break;
3213    
3214            case OP_NOT_VSPACE:
3215            for (i = 1; i <= min; i++)
3216              {
3217              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3218              switch(*eptr++)
3219                {
3220                default: break;
3221                case 0x0a:      /* LF */
3222                case 0x0b:      /* VT */
3223                case 0x0c:      /* FF */
3224                case 0x0d:      /* CR */
3225                case 0x85:      /* NEL */
3226                RRETURN(MATCH_NOMATCH);
3227                }
3228              }
3229            break;
3230    
3231            case OP_VSPACE:
3232            for (i = 1; i <= min; i++)
3233              {
3234              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3235              switch(*eptr++)
3236                {
3237                default: RRETURN(MATCH_NOMATCH);
3238                case 0x0a:      /* LF */
3239                case 0x0b:      /* VT */
3240                case 0x0c:      /* FF */
3241                case 0x0d:      /* CR */
3242                case 0x85:      /* NEL */
3243                break;
3244                }
3245              }
3246            break;
3247    
3248          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3249          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3250            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2919  for (;;) Line 3300  for (;;)
3300            case PT_ANY:            case PT_ANY:
3301            for (fi = min;; fi++)            for (fi = min;; fi++)
3302              {              {
3303              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3304              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3305              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3306              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2930  for (;;) Line 3311  for (;;)
3311            case PT_LAMP:            case PT_LAMP:
3312            for (fi = min;; fi++)            for (fi = min;; fi++)
3313              {              {
3314              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3315              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3316              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3317              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2945  for (;;) Line 3326  for (;;)
3326            case PT_GC:            case PT_GC:
3327            for (fi = min;; fi++)            for (fi = min;; fi++)
3328              {              {
3329              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3330              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3331              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3332              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2958  for (;;) Line 3339  for (;;)
3339            case PT_PC:            case PT_PC:
3340            for (fi = min;; fi++)            for (fi = min;; fi++)
3341              {              {
3342              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3343              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3344              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3345              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2971  for (;;) Line 3352  for (;;)
3352            case PT_SC:            case PT_SC:
3353            for (fi = min;; fi++)            for (fi = min;; fi++)
3354              {              {
3355              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3356              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3357              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3358              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2993  for (;;) Line 3374  for (;;)
3374          {          {
3375          for (fi = min;; fi++)          for (fi = min;; fi++)
3376            {            {
3377            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3378            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3379            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3380            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
# Line 3022  for (;;) Line 3403  for (;;)
3403          {          {
3404          for (fi = min;; fi++)          for (fi = min;; fi++)
3405            {            {
3406            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3407            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3408            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3409                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
# Line 3055  for (;;) Line 3436  for (;;)
3436                }                }
3437              break;              break;
3438    
3439                case OP_NOT_HSPACE:
3440                switch(c)
3441                  {
3442                  default: break;
3443                  case 0x09:      /* HT */
3444                  case 0x20:      /* SPACE */
3445                  case 0xa0:      /* NBSP */
3446                  case 0x1680:    /* OGHAM SPACE MARK */
3447                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3448                  case 0x2000:    /* EN QUAD */
3449                  case 0x2001:    /* EM QUAD */
3450                  case 0x2002:    /* EN SPACE */
3451                  case 0x2003:    /* EM SPACE */
3452                  case 0x2004:    /* THREE-PER-EM SPACE */
3453                  case 0x2005:    /* FOUR-PER-EM SPACE */
3454                  case 0x2006:    /* SIX-PER-EM SPACE */
3455                  case 0x2007:    /* FIGURE SPACE */
3456                  case 0x2008:    /* PUNCTUATION SPACE */
3457                  case 0x2009:    /* THIN SPACE */
3458                  case 0x200A:    /* HAIR SPACE */
3459                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3460                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3461                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3462                  RRETURN(MATCH_NOMATCH);
3463                  }
3464                break;
3465    
3466                case OP_HSPACE:
3467                switch(c)
3468                  {
3469                  default: RRETURN(MATCH_NOMATCH);
3470                  case 0x09:      /* HT */
3471                  case 0x20:      /* SPACE */
3472                  case 0xa0:      /* NBSP */
3473                  case 0x1680:    /* OGHAM SPACE MARK */
3474                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3475                  case 0x2000:    /* EN QUAD */
3476                  case 0x2001:    /* EM QUAD */
3477                  case 0x2002:    /* EN SPACE */
3478                  case 0x2003:    /* EM SPACE */
3479                  case 0x2004:    /* THREE-PER-EM SPACE */
3480                  case 0x2005:    /* FOUR-PER-EM SPACE */
3481                  case 0x2006:    /* SIX-PER-EM SPACE */
3482                  case 0x2007:    /* FIGURE SPACE */
3483                  case 0x2008:    /* PUNCTUATION SPACE */
3484                  case 0x2009:    /* THIN SPACE */
3485                  case 0x200A:    /* HAIR SPACE */
3486                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3487                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3488                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3489                  break;
3490                  }
3491                break;
3492    
3493                case OP_NOT_VSPACE:
3494                switch(c)
3495                  {
3496                  default: break;
3497                  case 0x0a:      /* LF */
3498                  case 0x0b:      /* VT */
3499                  case 0x0c:      /* FF */
3500                  case 0x0d:      /* CR */
3501                  case 0x85:      /* NEL */
3502                  case 0x2028:    /* LINE SEPARATOR */
3503                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3504                  RRETURN(MATCH_NOMATCH);
3505                  }
3506                break;
3507    
3508                case OP_VSPACE:
3509                switch(c)
3510                  {
3511                  default: RRETURN(MATCH_NOMATCH);
3512                  case 0x0a:      /* LF */
3513                  case 0x0b:      /* VT */
3514                  case 0x0c:      /* FF */
3515                  case 0x0d:      /* CR */
3516                  case 0x85:      /* NEL */
3517                  case 0x2028:    /* LINE SEPARATOR */
3518                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3519                  break;
3520                  }
3521                break;
3522    
3523              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3524              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3525                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 3096  for (;;) Line 3561  for (;;)
3561          {          {
3562          for (fi = min;; fi++)          for (fi = min;; fi++)
3563            {            {
3564            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3565            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3566            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3567                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
# Line 3126  for (;;) Line 3591  for (;;)
3591                }                }
3592              break;              break;
3593    
3594                case OP_NOT_HSPACE:
3595                switch(c)
3596                  {
3597                  default: break;
3598                  case 0x09:      /* HT */
3599                  case 0x20:      /* SPACE */
3600                  case 0xa0:      /* NBSP */
3601                  RRETURN(MATCH_NOMATCH);
3602                  }
3603                break;
3604    
3605                case OP_HSPACE:
3606                switch(c)
3607                  {
3608                  default: RRETURN(MATCH_NOMATCH);
3609                  case 0x09:      /* HT */
3610                  case 0x20:      /* SPACE */
3611                  case 0xa0:      /* NBSP */
3612                  break;
3613                  }
3614                break;
3615    
3616                case OP_NOT_VSPACE:
3617                switch(c)
3618                  {
3619                  default: break;
3620                  case 0x0a:      /* LF */
3621                  case 0x0b:      /* VT */
3622                  case 0x0c:      /* FF */
3623                  case 0x0d:      /* CR */
3624                  case 0x85:      /* NEL */
3625                  RRETURN(MATCH_NOMATCH);
3626                  }
3627                break;
3628    
3629                case OP_VSPACE:
3630                switch(c)
3631                  {
3632                  default: RRETURN(MATCH_NOMATCH);
3633                  case 0x0a:      /* LF */
3634                  case 0x0b:      /* VT */
3635                  case 0x0c:      /* FF */
3636                  case 0x0d:      /* CR */
3637                  case 0x85:      /* NEL */
3638                  break;
3639                  }
3640                break;
3641    
3642              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3643              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3644              break;              break;
# Line 3242  for (;;) Line 3755  for (;;)
3755          if (possessive) continue;          if (possessive) continue;
3756          for(;;)          for(;;)
3757            {            {
3758            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3759            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3760            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3761            BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
3762            }            }
3763          }          }
3764    
# Line 3278  for (;;) Line 3791  for (;;)
3791          if (possessive) continue;          if (possessive) continue;
3792          for(;;)          for(;;)
3793            {            {
3794            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3795            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3796            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3797            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
3798              {              {
3799              int len = 1;              int len = 1;
             BACKCHAR(eptr);  
3800              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else
3801                {                {
3802                  BACKCHAR(eptr);
3803                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3804                }                }
3805              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
# Line 3307  for (;;) Line 3820  for (;;)
3820          switch(ctype)          switch(ctype)
3821            {            {
3822            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is  
           unlimited we don't need it, so we repeat the non-UTF8 code. This is  
           probably worth it, because .* is quite a common idiom. */  
   
3823            if (max < INT_MAX)            if (max < INT_MAX)
3824              {              {
3825              if ((ims & PCRE_DOTALL) == 0)              if ((ims & PCRE_DOTALL) == 0)
# Line 3344  for (;;) Line 3852  for (;;)
3852                  {                  {
3853                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3854                  eptr++;                  eptr++;
3855                    while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3856                  }                  }
               break;  
3857                }                }
3858              else              else
3859                {                {
3860                c = max - min;                eptr = md->end_subject;
               if (c > (unsigned int)(md->end_subject - eptr))  
                 c = md->end_subject - eptr;  
               eptr += c;  
3861                }                }
3862              }              }
3863            break;            break;
# Line 3387  for (;;) Line 3892  for (;;)
3892              }              }
3893            break;            break;
3894    
3895              case OP_NOT_HSPACE:
3896              case OP_HSPACE:
3897              for (i = min; i < max; i++)
3898                {
3899                BOOL gotspace;
3900                int len = 1;
3901                if (eptr >= md->end_subject) break;
3902                GETCHARLEN(c, eptr, len);
3903                switch(c)
3904                  {
3905                  default: gotspace = FALSE; break;
3906                  case 0x09:      /* HT */
3907                  case 0x20:      /* SPACE */
3908                  case 0xa0:      /* NBSP */
3909                  case 0x1680:    /* OGHAM SPACE MARK */
3910                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3911                  case 0x2000:    /* EN QUAD */
3912                  case 0x2001:    /* EM QUAD */
3913                  case 0x2002:    /* EN SPACE */
3914                  case 0x2003:    /* EM SPACE */
3915                  case 0x2004:    /* THREE-PER-EM SPACE */
3916                  case 0x2005:    /* FOUR-PER-EM SPACE */
3917                  case 0x2006:    /* SIX-PER-EM SPACE */
3918                  case 0x2007:    /* FIGURE SPACE */
3919                  case 0x2008:    /* PUNCTUATION SPACE */
3920                  case 0x2009:    /* THIN SPACE */
3921                  case 0x200A:    /* HAIR SPACE */
3922                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3923                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3924                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3925                  gotspace = TRUE;
3926                  break;
3927                  }
3928                if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3929                eptr += len;
3930                }
3931              break;
3932    
3933              case OP_NOT_VSPACE:
3934              case OP_VSPACE:
3935              for (i = min; i < max; i++)
3936                {
3937                BOOL gotspace;
3938                int len = 1;
3939                if (eptr >= md->end_subject) break;
3940                GETCHARLEN(c, eptr, len);
3941                switch(c)
3942                  {
3943                  default: gotspace = FALSE; break;
3944                  case 0x0a:      /* LF */
3945                  case 0x0b:      /* VT */
3946                  case 0x0c:      /* FF */
3947                  case 0x0d:      /* CR */
3948                  case 0x85:      /* NEL */
3949                  case 0x2028:    /* LINE SEPARATOR */
3950                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3951                  gotspace = TRUE;
3952                  break;
3953                  }
3954                if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3955                eptr += len;
3956                }
3957              break;
3958    
3959            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3960            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3961              {              {
# Line 3462  for (;;) Line 4031  for (;;)
4031          if (possessive) continue;          if (possessive) continue;
4032          for(;;)          for(;;)
4033            {            {
4034            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
4035            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4036            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4037            BACKCHAR(eptr);            BACKCHAR(eptr);
4038            }            }
4039          }          }
4040        else        else
4041  #endif  #endif  /* SUPPORT_UTF8 */
4042    
4043        /* Not UTF-8 mode */        /* Not UTF-8 mode */
4044          {          {
# Line 3513  for (;;) Line 4082  for (;;)
4082              }              }
4083            break;            break;
4084    
4085              case OP_NOT_HSPACE:
4086              for (i = min; i < max; i++)
4087                {
4088                if (eptr >= md->end_subject) break;
4089                c = *eptr;
4090                if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4091                eptr++;
4092                }
4093              break;
4094    
4095              case OP_HSPACE:
4096              for (i = min; i < max; i++)
4097                {
4098                if (eptr >= md->end_subject) break;
4099                c = *eptr;
4100                if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4101                eptr++;
4102                }
4103              break;
4104    
4105              case OP_NOT_VSPACE:
4106              for (i = min; i < max; i++)
4107                {
4108                if (eptr >= md->end_subject) break;
4109                c = *eptr;
4110                if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4111                  break;
4112                eptr++;
4113                }
4114              break;
4115    
4116              case OP_VSPACE:
4117              for (i = min; i < max; i++)
4118                {
4119                if (eptr >= md->end_subject) break;
4120                c = *eptr;
4121                if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4122                  break;
4123                eptr++;
4124                }
4125              break;
4126    
4127            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4128            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4129              {              {
# Line 3576  for (;;) Line 4187  for (;;)
4187          if (possessive) continue;          if (possessive) continue;
4188          while (eptr >= pp)          while (eptr >= pp)
4189            {            {
4190            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4191            eptr--;            eptr--;
4192            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4193            }            }
# Line 3602  for (;;) Line 4213  for (;;)
4213    
4214    }             /* End of main loop */    }             /* End of main loop */
4215  /* Control never reaches here */  /* Control never reaches here */
4216    
4217    
4218    /* When compiling to use the heap rather than the stack for recursive calls to
4219    match(), the RRETURN() macro jumps here. The number that is saved in
4220    frame->Xwhere indicates which label we actually want to return to. */
4221    
4222    #ifdef NO_RECURSE
4223    #define LBL(val) case val: goto L_RM##val;
4224    HEAP_RETURN:
4225    switch (frame->Xwhere)
4226      {
4227      LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4228      LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
4229      LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4230      LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4231      LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4232      LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
4233      default:
4234      DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4235      return PCRE_ERROR_INTERNAL;
4236      }
4237    #undef LBL
4238    #endif  /* NO_RECURSE */
4239  }  }
4240    
4241    
# Line 3614  Undefine all the macros that were define Line 4248  Undefine all the macros that were define
4248  #ifdef NO_RECURSE  #ifdef NO_RECURSE
4249  #undef eptr  #undef eptr
4250  #undef ecode  #undef ecode
4251    #undef mstart
4252  #undef offset_top  #undef offset_top
4253  #undef ims  #undef ims
4254  #undef eptrb  #undef eptrb
# Line 3686  Returns: > 0 => success; value Line 4321  Returns: > 0 => success; value
4321                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4322  */  */
4323    
4324  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
4325  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4326    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4327    int offsetcount)    int offsetcount)
# Line 3711  const uschar *start_bits = NULL; Line 4346  const uschar *start_bits = NULL;
4346  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4347  USPTR end_subject;  USPTR end_subject;
4348  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
 eptrblock eptrchain[EPTR_WORK_SIZE];  
4349    
4350  pcre_study_data internal_study;  pcre_study_data internal_study;
4351  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3797  md->partial = (options & PCRE_PARTIAL) ! Line 4431  md->partial = (options & PCRE_PARTIAL) !
4431  md->hitend = FALSE;  md->hitend = FALSE;
4432    
4433  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
 md->eptrchain = eptrchain;              /* Make workspace generally available */  
4434    
4435  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
4436  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
4437    
4438  /* Handle different types of newline. The two bits give four cases. If nothing  /* Handle different types of newline. The three bits give eight cases. If
4439  is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
4440    
4441  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
4442         PCRE_NEWLINE_BITS)         PCRE_NEWLINE_BITS)
4443    {    {
4444    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
# Line 3814  switch ((((options & PCRE_NEWLINE_BITS) Line 4447  switch ((((options & PCRE_NEWLINE_BITS)
4447    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
4448         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4449    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
4450      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4451    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
4452    }    }
4453    
4454  if (newline < 0)  if (newline == -2)
4455      {
4456      md->nltype = NLTYPE_ANYCRLF;
4457      }
4458    else if (newline < 0)
4459    {    {
4460    md->nltype = NLTYPE_ANY;    md->nltype = NLTYPE_ANY;
4461    }    }
# Line 3948  the loop runs just once. */ Line 4586  the loop runs just once. */
4586  for(;;)  for(;;)
4587    {    {
4588    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4589      USPTR new_start_match;
4590    
4591    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
4592    
# Line 3992  for(;;) Line 4631  for(;;)
4631        {        {
4632        while (start_match <= end_subject && !WAS_NEWLINE(start_match))        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4633          start_match++;          start_match++;
4634    
4635          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4636          and we are now at a LF, advance the match position by one more character.
4637          */
4638    
4639          if (start_match[-1] == '\r' &&
4640               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4641               start_match < end_subject &&
4642               *start_match == '\n')
4643            start_match++;
4644        }        }
4645      }      }
4646    
# Line 4078  for(;;) Line 4727  for(;;)
4727    
4728    /* OK, we can now run the match. */    /* OK, we can now run the match. */
4729    
4730    md->start_match = start_match;    md->start_match_ptr = start_match;
4731    md->match_call_count = 0;    md->match_call_count = 0;
4732    md->eptrn = 0;                          /* Next free eptrchain slot */    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
4733    rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);  
4734      switch(rc)
4735        {
4736        /* NOMATCH and PRUNE advance by one character. THEN at this level acts
4737        exactly like PRUNE. */
4738    
4739    /* Any return other than MATCH_NOMATCH breaks the loop. */      case MATCH_NOMATCH:
4740        case MATCH_PRUNE:
4741        case MATCH_THEN:
4742        new_start_match = start_match + 1;
4743    #ifdef SUPPORT_UTF8
4744        if (utf8)
4745          while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
4746            new_start_match++;
4747    #endif
4748        break;
4749    
4750        /* SKIP passes back the next starting point explicitly. */
4751    
4752        case MATCH_SKIP:
4753        new_start_match = md->start_match_ptr;
4754        break;
4755    
4756    if (rc != MATCH_NOMATCH) break;      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
4757    
4758        case MATCH_COMMIT:
4759        rc = MATCH_NOMATCH;
4760        goto ENDLOOP;
4761    
4762        /* Any other return is some kind of error. */
4763    
4764        default:
4765        goto ENDLOOP;
4766        }
4767    
4768      /* Control reaches here for the various types of "no match at this point"
4769      result. Reset the code to MATCH_NOMATCH for subsequent checking. */
4770    
4771      rc = MATCH_NOMATCH;
4772    
4773    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4774    newline in the subject (though it may continue over the newline). Therefore,    newline in the subject (though it may continue over the newline). Therefore,
# Line 4093  for(;;) Line 4776  for(;;)
4776    
4777    if (firstline && IS_NEWLINE(start_match)) break;    if (firstline && IS_NEWLINE(start_match)) break;
4778    
4779    /* Advance the match position by one character. */    /* Advance to new matching position */
4780    
4781    start_match++;    start_match = new_start_match;
 #ifdef SUPPORT_UTF8  
   if (utf8)  
     while(start_match < end_subject && (*start_match & 0xc0) == 0x80)  
       start_match++;  
 #endif  
4782    
4783    /* Break the loop if the pattern is anchored or if we have passed the end of    /* Break the loop if the pattern is anchored or if we have passed the end of
4784    the subject. */    the subject. */
4785    
4786    if (anchored || start_match > end_subject) break;    if (anchored || start_match > end_subject) break;
4787    
4788    /* If we have just passed a CR and the newline option is CRLF or ANY, and we    /* If we have just passed a CR and the newline option is CRLF or ANY or
4789    are now at a LF, advance the match position by one more character. */    ANYCRLF, and we are now at a LF, advance the match position by one more
4790      character. */
4791    
4792    if (start_match[-1] == '\r' &&    if (start_match[-1] == '\r' &&
4793         (md->nltype == NLTYPE_ANY || md->nllen == 2) &&         (md->nltype == NLTYPE_ANY ||
4794            md->nltype == NLTYPE_ANYCRLF ||
4795            md->nllen == 2) &&
4796         start_match < end_subject &&         start_match < end_subject &&
4797         *start_match == '\n')         *start_match == '\n')
4798      start_match++;      start_match++;
# Line 4123  for(;;) Line 4804  for(;;)
4804  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4805  conditions is true:  conditions is true:
4806    
4807  (1) The pattern is anchored;  (1) The pattern is anchored or the match was failed by (*COMMIT);
4808    
4809  (2) We are past the end of the subject;  (2) We are past the end of the subject;
4810    
# Line 4138  processing, copy those that we can. In t Line 4819  processing, copy those that we can. In t
4819  certain parts of the pattern were not used, even though there are more  certain parts of the pattern were not used, even though there are more
4820  capturing parentheses than vector slots. */  capturing parentheses than vector slots. */
4821    
4822    ENDLOOP:
4823    
4824  if (rc == MATCH_MATCH)  if (rc == MATCH_MATCH)
4825    {    {
4826    if (using_temporary_offsets)    if (using_temporary_offsets)
# Line 4158  if (rc == MATCH_MATCH) Line 4841  if (rc == MATCH_MATCH)
4841    
4842    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
4843    
4844    /* If there is space, set up the whole thing as substring 0. */    /* If there is space, set up the whole thing as substring 0. The value of
4845      md->start_match_ptr might be modified if \K was encountered on the success
4846      matching path. */
4847    
4848    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4849      {      {
4850      offsets[0] = start_match - md->start_subject;      offsets[0] = md->start_match_ptr - md->start_subject;
4851      offsets[1] = md->end_match_ptr - md->start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4852      }      }
4853    

Legend:
Removed from v.93  
changed lines
  Added in v.211

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12