/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 137 by ph10, Thu Mar 29 13:56:00 2007 UTC revision 207 by ph10, Mon Aug 6 09:32:14 2007 UTC
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include <config.h>
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 53  possible. There are also some static sup Line 57  possible. There are also some static sup
57  #undef min  #undef min
58  #undef max  #undef max
59    
 /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
   
 #define EPTR_WORK_SIZE (1000)  
   
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 188  calls by keeping local variables that ne Line 186  calls by keeping local variables that ne
186  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
187  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
188  always used to.  always used to.
189    
190    The original heap-recursive code used longjmp(). However, it seems that this
191    can be very slow on some operating systems. Following a suggestion from Stan
192    Switzer, the use of longjmp() has been abolished, at the cost of having to
193    provide a unique number for each call to RMATCH. There is no way of generating
194    a sequence of numbers at compile time in C. I have given them names, to make
195    them stand out more clearly.
196    
197    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
198    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
199    tests. Furthermore, not using longjmp() means that local dynamic variables
200    don't have indeterminate values; this has meant that the frame size can be
201    reduced because the result can be "passed back" by straight setting of the
202    variable instead of being passed in the frame.
203  ****************************************************************************  ****************************************************************************
204  ***************************************************************************/  ***************************************************************************/
205    
206    
207    /* Numbers for RMATCH calls */
208    
209    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
210           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
211           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
212           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
213           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50 };
214    
215    
216  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
217  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
218    actuall used in this definition. */
219    
220  #ifndef NO_RECURSE  #ifndef NO_RECURSE
221  #define REGISTER register  #define REGISTER register
222    
223  #ifdef DEBUG  #ifdef DEBUG
224  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
225    { \    { \
226    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
227    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
228    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
229    }    }
230  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 210  versions and production versions. */ Line 233  versions and production versions. */
233    return ra; \    return ra; \
234    }    }
235  #else  #else
236  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
237    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
238  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
239  #endif  #endif
240    
241  #else  #else
242    
243    
244  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
245  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
246  match(), which never changes. */  argument of match(), which never changes. */
247    
248  #define REGISTER  #define REGISTER
249    
250  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
251    {\    {\
252    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
253    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
254      {\    newframe->Xeptr = ra;\
255      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
256      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
257      newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
258      newframe->Xims = re;\    newframe->Xims = re;\
259      newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
260      newframe->Xflags = rg;\    newframe->Xflags = rg;\
261      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
262      newframe->Xprevframe = frame;\    newframe->Xprevframe = frame;\
263      frame = newframe;\    frame = newframe;\
264      DPRINTF(("restarting from line %d\n", __LINE__));\    DPRINTF(("restarting from line %d\n", __LINE__));\
265      goto HEAP_RECURSE;\    goto HEAP_RECURSE;\
266      }\    L_##rw:\
267    else\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     {\  
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
268    }    }
269    
270  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 256  match(), which never changes. */ Line 274  match(), which never changes. */
274    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
275    if (frame != NULL)\    if (frame != NULL)\
276      {\      {\
277      frame->Xresult = ra;\      rrc = ra;\
278      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
279      }\      }\
280    return ra;\    return ra;\
281    }    }
# Line 273  typedef struct heapframe { Line 290  typedef struct heapframe {
290    
291    const uschar *Xeptr;    const uschar *Xeptr;
292    const uschar *Xecode;    const uschar *Xecode;
293      const uschar *Xmstart;
294    int Xoffset_top;    int Xoffset_top;
295    long int Xims;    long int Xims;
296    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 323  typedef struct heapframe { Line 341  typedef struct heapframe {
341    
342    eptrblock Xnewptrb;    eptrblock Xnewptrb;
343    
344    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
345    
346    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
347    
348  } heapframe;  } heapframe;
349    
# Line 354  made performance worse. Line 371  made performance worse.
371  Arguments:  Arguments:
372     eptr        pointer to current character in subject     eptr        pointer to current character in subject
373     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
374       mstart      pointer to the current match start position (can be modified
375                     by encountering \K)
376     offset_top  current top pointer     offset_top  current top pointer
377     md          pointer to "static" info for the match     md          pointer to "static" info for the match
378     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 363  Arguments: Line 382  Arguments:
382                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
383                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
384                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
385     rdepth      the recursion depth     rdepth      the recursion depth
386    
387  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 373  Returns: MATCH_MATCH if matched Line 391  Returns: MATCH_MATCH if matched
391  */  */
392    
393  static int  static int
394  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
395    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
396    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
397  {  {
# Line 401  frame->Xprevframe = NULL; /* Line 419  frame->Xprevframe = NULL; /*
419    
420  frame->Xeptr = eptr;  frame->Xeptr = eptr;
421  frame->Xecode = ecode;  frame->Xecode = ecode;
422    frame->Xmstart = mstart;
423  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
424  frame->Xims = ims;  frame->Xims = ims;
425  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 415  HEAP_RECURSE: Line 434  HEAP_RECURSE:
434    
435  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
436  #define ecode              frame->Xecode  #define ecode              frame->Xecode
437    #define mstart             frame->Xmstart
438  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
439  #define ims                frame->Xims  #define ims                frame->Xims
440  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 545  defined). However, RMATCH isn't like a f Line 565  defined). However, RMATCH isn't like a f
565  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
566  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
567    
568    #ifdef SUPPORT_UTF8
569    utf8 = md->utf8;       /* Local copy of the flag */
570    #else
571    utf8 = FALSE;
572    #endif
573    
574  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
575  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
576    
# Line 553  if (rdepth >= md->match_limit_recursion) Line 579  if (rdepth >= md->match_limit_recursion)
579    
580  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
581    
 #ifdef SUPPORT_UTF8  
 utf8 = md->utf8;       /* Local copy of the flag */  
 #else  
 utf8 = FALSE;  
 #endif  
   
582  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
583  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
584  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
585  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
586  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
587  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
588  already used. */  block that is used is on the stack, so a new one may be required for each
589    match(). */
590    
591  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
592    {    {
593    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
594    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
595      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
596    }    }
597    
598  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 593  for (;;) Line 607  for (;;)
607    
608    if (md->partial &&    if (md->partial &&
609        eptr >= md->end_subject &&        eptr >= md->end_subject &&
610        eptr > md->start_match)        eptr > mstart)
611      md->hitend = TRUE;      md->hitend = TRUE;
612    
613    switch(op)    switch(op)
614      {      {
615      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
# Line 637  for (;;) Line 651  for (;;)
651        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
652        do        do
653          {          {
654          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
655            ims, eptrb, flags);            ims, eptrb, flags, RM1);
656          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
657          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
658          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
# Line 654  for (;;) Line 668  for (;;)
668        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
669        }        }
670    
671      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
672      bracket. */      as a non-capturing bracket. */
673    
674        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
675        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
676    
677      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
678    
679        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
680        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
681    
682      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
683      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
684      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
685      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
686        is set.*/
687    
688      case OP_BRA:      case OP_BRA:
689      case OP_SBRA:      case OP_SBRA:
# Line 670  for (;;) Line 691  for (;;)
691      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
692      for (;;)      for (;;)
693        {        {
694        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
695          {          {
696          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
697          flags |= match_tail_recursed;            {
698          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
699          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
700              goto TAIL_RECURSE;
701              }
702    
703            /* Possibly empty group; can't use tail recursion. */
704    
705            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
706              eptrb, flags, RM48);
707            RRETURN(rrc);
708          }          }
709    
710        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
711        otherwise return. */        otherwise return. */
712    
713        RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
714          eptrb, flags);          eptrb, flags, RM2);
715        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
716        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
717        }        }
# Line 723  for (;;) Line 752  for (;;)
752    
753      else      else
754        {        {
755        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
756            match_condassert);            match_condassert, RM3);
757        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
758          {          {
759          condition = TRUE;          condition = TRUE;
# Line 743  for (;;) Line 772  for (;;)
772        }        }
773    
774      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
775      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
776      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
777        group. If the second alternative doesn't exist, we can just plough on. */
778    
779      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
780        {        {
781        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
782        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
783        goto TAIL_RECURSE;          {
784            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
785            RRETURN(rrc);
786            }
787          else                       /* Group must match something */
788            {
789            flags = 0;
790            goto TAIL_RECURSE;
791            }
792        }        }
793      else      else                         /* Condition false & no 2nd alternative */
794        {        {
795        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
796        }        }
# Line 770  for (;;) Line 808  for (;;)
808        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
809        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
810          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
811        md->start_match = rec->save_start;        mstart = rec->save_start;
812        ims = original_ims;        ims = original_ims;
813        ecode = rec->after_call;        ecode = rec->after_call;
814        break;        break;
# Line 779  for (;;) Line 817  for (;;)
817      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
818      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
819    
820      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
821      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
822      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
823        md->start_match_ptr = mstart;  /* and the start (\K can modify) */
824      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
825    
826      /* Change option settings */      /* Change option settings */
# Line 802  for (;;) Line 841  for (;;)
841      case OP_ASSERTBACK:      case OP_ASSERTBACK:
842      do      do
843        {        {
844        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
845            RM4);
846        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
847        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
848        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 828  for (;;) Line 868  for (;;)
868      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
869      do      do
870        {        {
871        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
872            RM5);
873        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
874        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
875        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 854  for (;;) Line 895  for (;;)
895          {          {
896          eptr--;          eptr--;
897          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
898          BACKCHAR(eptr)          BACKCHAR(eptr);
899          }          }
900        }        }
901      else      else
# Line 885  for (;;) Line 926  for (;;)
926        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
927        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
928        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
929        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
930        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
931        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
932        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 947  for (;;) Line 988  for (;;)
988    
989        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
990              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
991        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
992        md->start_match = eptr;        mstart = eptr;
993    
994        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
995        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 957  for (;;) Line 998  for (;;)
998        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
999        do        do
1000          {          {
1001          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1002            md, ims, eptrb, flags);            md, ims, eptrb, flags, RM6);
1003          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1004            {            {
1005            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 1001  for (;;) Line 1042  for (;;)
1042    
1043      do      do
1044        {        {
1045        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
         eptrb, 0);  
1046        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1047        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1048        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 1047  for (;;) Line 1087  for (;;)
1087    
1088      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1089        {        {
1090        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1091        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1092        ecode = prev;        ecode = prev;
1093        flags = match_tail_recursed;        flags = 0;
1094        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1095        }        }
1096      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1097        {        {
1098        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1099        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1100        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1101        flags = match_tail_recursed;        flags = 0;
1102        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1103        }        }
1104      /* Control never gets here */      /* Control never gets here */
# Line 1079  for (;;) Line 1119  for (;;)
1119      case OP_BRAZERO:      case OP_BRAZERO:
1120        {        {
1121        next = ecode+1;        next = ecode+1;
1122        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1123        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1124        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1125        ecode = next + 1 + LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
# Line 1090  for (;;) Line 1130  for (;;)
1130        {        {
1131        next = ecode+1;        next = ecode+1;
1132        do next += GET(next, 1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1133        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1134        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1135        ecode++;        ecode++;
1136        }        }
# Line 1160  for (;;) Line 1200  for (;;)
1200          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1201          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1202          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
1203          md->start_match = rec->save_start;          mstart = rec->save_start;
1204          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1205            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1206          ecode = rec->after_call;          ecode = rec->after_call;
# Line 1189  for (;;) Line 1229  for (;;)
1229    
1230      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1231      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1232      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1233        unlimited repeat of a group that can match an empty string. */
1234    
1235      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1236    
1237      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1238        {        {
1239        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1240        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1241          if (flags != 0)    /* Could match an empty string */
1242            {
1243            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1244            RRETURN(rrc);
1245            }
1246        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1247        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1248        }        }
1249      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1250        {        {
1251        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1252        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1253        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1254        flags = match_tail_recursed;        flags = 0;
1255        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1256        }        }
1257      /* Control never gets here */      /* Control never gets here */
# Line 1239  for (;;) Line 1284  for (;;)
1284      ecode++;      ecode++;
1285      break;      break;
1286    
1287        /* Reset the start of match point */
1288    
1289        case OP_SET_SOM:
1290        mstart = eptr;
1291        ecode++;
1292        break;
1293    
1294      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1295      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1296    
# Line 1447  for (;;) Line 1499  for (;;)
1499      ecode++;      ecode++;
1500      break;      break;
1501    
1502        case OP_NOT_HSPACE:
1503        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1504        GETCHARINCTEST(c, eptr);
1505        switch(c)
1506          {
1507          default: break;
1508          case 0x09:      /* HT */
1509          case 0x20:      /* SPACE */
1510          case 0xa0:      /* NBSP */
1511          case 0x1680:    /* OGHAM SPACE MARK */
1512          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1513          case 0x2000:    /* EN QUAD */
1514          case 0x2001:    /* EM QUAD */
1515          case 0x2002:    /* EN SPACE */
1516          case 0x2003:    /* EM SPACE */
1517          case 0x2004:    /* THREE-PER-EM SPACE */
1518          case 0x2005:    /* FOUR-PER-EM SPACE */
1519          case 0x2006:    /* SIX-PER-EM SPACE */
1520          case 0x2007:    /* FIGURE SPACE */
1521          case 0x2008:    /* PUNCTUATION SPACE */
1522          case 0x2009:    /* THIN SPACE */
1523          case 0x200A:    /* HAIR SPACE */
1524          case 0x202f:    /* NARROW NO-BREAK SPACE */
1525          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1526          case 0x3000:    /* IDEOGRAPHIC SPACE */
1527          RRETURN(MATCH_NOMATCH);
1528          }
1529        ecode++;
1530        break;
1531    
1532        case OP_HSPACE:
1533        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1534        GETCHARINCTEST(c, eptr);
1535        switch(c)
1536          {
1537          default: RRETURN(MATCH_NOMATCH);
1538          case 0x09:      /* HT */
1539          case 0x20:      /* SPACE */
1540          case 0xa0:      /* NBSP */
1541          case 0x1680:    /* OGHAM SPACE MARK */
1542          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1543          case 0x2000:    /* EN QUAD */
1544          case 0x2001:    /* EM QUAD */
1545          case 0x2002:    /* EN SPACE */
1546          case 0x2003:    /* EM SPACE */
1547          case 0x2004:    /* THREE-PER-EM SPACE */
1548          case 0x2005:    /* FOUR-PER-EM SPACE */
1549          case 0x2006:    /* SIX-PER-EM SPACE */
1550          case 0x2007:    /* FIGURE SPACE */
1551          case 0x2008:    /* PUNCTUATION SPACE */
1552          case 0x2009:    /* THIN SPACE */
1553          case 0x200A:    /* HAIR SPACE */
1554          case 0x202f:    /* NARROW NO-BREAK SPACE */
1555          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1556          case 0x3000:    /* IDEOGRAPHIC SPACE */
1557          break;
1558          }
1559        ecode++;
1560        break;
1561    
1562        case OP_NOT_VSPACE:
1563        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1564        GETCHARINCTEST(c, eptr);
1565        switch(c)
1566          {
1567          default: break;
1568          case 0x0a:      /* LF */
1569          case 0x0b:      /* VT */
1570          case 0x0c:      /* FF */
1571          case 0x0d:      /* CR */
1572          case 0x85:      /* NEL */
1573          case 0x2028:    /* LINE SEPARATOR */
1574          case 0x2029:    /* PARAGRAPH SEPARATOR */
1575          RRETURN(MATCH_NOMATCH);
1576          }
1577        ecode++;
1578        break;
1579    
1580        case OP_VSPACE:
1581        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1582        GETCHARINCTEST(c, eptr);
1583        switch(c)
1584          {
1585          default: RRETURN(MATCH_NOMATCH);
1586          case 0x0a:      /* LF */
1587          case 0x0b:      /* VT */
1588          case 0x0c:      /* FF */
1589          case 0x0d:      /* CR */
1590          case 0x85:      /* NEL */
1591          case 0x2028:    /* LINE SEPARATOR */
1592          case 0x2029:    /* PARAGRAPH SEPARATOR */
1593          break;
1594          }
1595        ecode++;
1596        break;
1597    
1598  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1599      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1600      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1602  for (;;) Line 1750  for (;;)
1750          {          {
1751          for (fi = min;; fi++)          for (fi = min;; fi++)
1752            {            {
1753            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1754            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1755            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1756              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 1623  for (;;) Line 1771  for (;;)
1771            }            }
1772          while (eptr >= pp)          while (eptr >= pp)
1773            {            {
1774            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1775            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1776            eptr -= length;            eptr -= length;
1777            }            }
# Line 1728  for (;;) Line 1876  for (;;)
1876            {            {
1877            for (fi = min;; fi++)            for (fi = min;; fi++)
1878              {              {
1879              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1880              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1881              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1882              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 1748  for (;;) Line 1896  for (;;)
1896            {            {
1897            for (fi = min;; fi++)            for (fi = min;; fi++)
1898              {              {
1899              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1900              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1901              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1902              c = *eptr++;              c = *eptr++;
# Line 1785  for (;;) Line 1933  for (;;)
1933              }              }
1934            for (;;)            for (;;)
1935              {              {
1936              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1937              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1938              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
1939              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1804  for (;;) Line 1952  for (;;)
1952              }              }
1953            while (eptr >= pp)            while (eptr >= pp)
1954              {              {
1955              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1956              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1957              eptr--;              eptr--;
1958              }              }
# Line 1875  for (;;) Line 2023  for (;;)
2023          {          {
2024          for (fi = min;; fi++)          for (fi = min;; fi++)
2025            {            {
2026            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2027            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2028            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2029            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 1899  for (;;) Line 2047  for (;;)
2047            }            }
2048          for(;;)          for(;;)
2049            {            {
2050            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2051            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2052            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2053            BACKCHAR(eptr)            BACKCHAR(eptr);
2054            }            }
2055          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2056          }          }
# Line 2086  for (;;) Line 2234  for (;;)
2234            {            {
2235            for (fi = min;; fi++)            for (fi = min;; fi++)
2236              {              {
2237              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2238              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2239              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2240              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
# Line 2127  for (;;) Line 2275  for (;;)
2275            if (possessive) continue;            if (possessive) continue;
2276            for(;;)            for(;;)
2277             {             {
2278             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2279             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2280             if (eptr == pp) RRETURN(MATCH_NOMATCH);             if (eptr == pp) RRETURN(MATCH_NOMATCH);
2281  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 2176  for (;;) Line 2324  for (;;)
2324          {          {
2325          for (fi = min;; fi++)          for (fi = min;; fi++)
2326            {            {
2327            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2328            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2329            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
2330                fc != md->lcc[*eptr++])                fc != md->lcc[*eptr++])
# Line 2195  for (;;) Line 2343  for (;;)
2343          if (possessive) continue;          if (possessive) continue;
2344          while (eptr >= pp)          while (eptr >= pp)
2345            {            {
2346            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2347            eptr--;            eptr--;
2348            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2349            }            }
# Line 2214  for (;;) Line 2362  for (;;)
2362          {          {
2363          for (fi = min;; fi++)          for (fi = min;; fi++)
2364            {            {
2365            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2366            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2367            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2368              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 2232  for (;;) Line 2380  for (;;)
2380          if (possessive) continue;          if (possessive) continue;
2381          while (eptr >= pp)          while (eptr >= pp)
2382            {            {
2383            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2384            eptr--;            eptr--;
2385            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2386            }            }
# Line 2377  for (;;) Line 2525  for (;;)
2525            register unsigned int d;            register unsigned int d;
2526            for (fi = min;; fi++)            for (fi = min;; fi++)
2527              {              {
2528              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2529              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2530              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2531              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
# Line 2391  for (;;) Line 2539  for (;;)
2539            {            {
2540            for (fi = min;; fi++)            for (fi = min;; fi++)
2541              {              {
2542              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2543              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2544              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2545                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2423  for (;;) Line 2571  for (;;)
2571          if (possessive) continue;          if (possessive) continue;
2572          for(;;)          for(;;)
2573              {              {
2574              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2575              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2576              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2577              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2441  for (;;) Line 2589  for (;;)
2589            if (possessive) continue;            if (possessive) continue;
2590            while (eptr >= pp)            while (eptr >= pp)
2591              {              {
2592              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2593              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2594              eptr--;              eptr--;
2595              }              }
# Line 2486  for (;;) Line 2634  for (;;)
2634            register unsigned int d;            register unsigned int d;
2635            for (fi = min;; fi++)            for (fi = min;; fi++)
2636              {              {
2637              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2638              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2639              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2640              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fi >= max || eptr >= md->end_subject || fc == d)
# Line 2499  for (;;) Line 2647  for (;;)
2647            {            {
2648            for (fi = min;; fi++)            for (fi = min;; fi++)
2649              {              {
2650              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2651              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2652              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2653                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2530  for (;;) Line 2678  for (;;)
2678            if (possessive) continue;            if (possessive) continue;
2679            for(;;)            for(;;)
2680              {              {
2681              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2682              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2683              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2684              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2548  for (;;) Line 2696  for (;;)
2696            if (possessive) continue;            if (possessive) continue;
2697            while (eptr >= pp)            while (eptr >= pp)
2698              {              {
2699              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2700              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2701              eptr--;              eptr--;
2702              }              }
# Line 2655  for (;;) Line 2803  for (;;)
2803            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2804              {              {
2805              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2806              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2807              }              }
2808            break;            break;
2809    
# Line 2663  for (;;) Line 2811  for (;;)
2811            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2812              {              {
2813              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2814              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2815              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2816              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2817                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 2676  for (;;) Line 2824  for (;;)
2824            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2825              {              {
2826              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2827              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2828              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2829              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2830                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2687  for (;;) Line 2835  for (;;)
2835            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2836              {              {
2837              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2838              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2839              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2840              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2841                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2698  for (;;) Line 2846  for (;;)
2846            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2847              {              {
2848              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2849              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2850              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2851              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2852                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2779  for (;;) Line 2927  for (;;)
2927            }            }
2928          break;          break;
2929    
2930            case OP_NOT_HSPACE:
2931            for (i = 1; i <= min; i++)
2932              {
2933              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2934              GETCHARINC(c, eptr);
2935              switch(c)
2936                {
2937                default: break;
2938                case 0x09:      /* HT */
2939                case 0x20:      /* SPACE */
2940                case 0xa0:      /* NBSP */
2941                case 0x1680:    /* OGHAM SPACE MARK */
2942                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2943                case 0x2000:    /* EN QUAD */
2944                case 0x2001:    /* EM QUAD */
2945                case 0x2002:    /* EN SPACE */
2946                case 0x2003:    /* EM SPACE */
2947                case 0x2004:    /* THREE-PER-EM SPACE */
2948                case 0x2005:    /* FOUR-PER-EM SPACE */
2949                case 0x2006:    /* SIX-PER-EM SPACE */
2950                case 0x2007:    /* FIGURE SPACE */
2951                case 0x2008:    /* PUNCTUATION SPACE */
2952                case 0x2009:    /* THIN SPACE */
2953                case 0x200A:    /* HAIR SPACE */
2954                case 0x202f:    /* NARROW NO-BREAK SPACE */
2955                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2956                case 0x3000:    /* IDEOGRAPHIC SPACE */
2957                RRETURN(MATCH_NOMATCH);
2958                }
2959              }
2960            break;
2961    
2962            case OP_HSPACE:
2963            for (i = 1; i <= min; i++)
2964              {
2965              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2966              GETCHARINC(c, eptr);
2967              switch(c)
2968                {
2969                default: RRETURN(MATCH_NOMATCH);
2970                case 0x09:      /* HT */
2971                case 0x20:      /* SPACE */
2972                case 0xa0:      /* NBSP */
2973                case 0x1680:    /* OGHAM SPACE MARK */
2974                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2975                case 0x2000:    /* EN QUAD */
2976                case 0x2001:    /* EM QUAD */
2977                case 0x2002:    /* EN SPACE */
2978                case 0x2003:    /* EM SPACE */
2979                case 0x2004:    /* THREE-PER-EM SPACE */
2980                case 0x2005:    /* FOUR-PER-EM SPACE */
2981                case 0x2006:    /* SIX-PER-EM SPACE */
2982                case 0x2007:    /* FIGURE SPACE */
2983                case 0x2008:    /* PUNCTUATION SPACE */
2984                case 0x2009:    /* THIN SPACE */
2985                case 0x200A:    /* HAIR SPACE */
2986                case 0x202f:    /* NARROW NO-BREAK SPACE */
2987                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2988                case 0x3000:    /* IDEOGRAPHIC SPACE */
2989                break;
2990                }
2991              }
2992            break;
2993    
2994            case OP_NOT_VSPACE:
2995            for (i = 1; i <= min; i++)
2996              {
2997              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2998              GETCHARINC(c, eptr);
2999              switch(c)
3000                {
3001                default: break;
3002                case 0x0a:      /* LF */
3003                case 0x0b:      /* VT */
3004                case 0x0c:      /* FF */
3005                case 0x0d:      /* CR */
3006                case 0x85:      /* NEL */
3007                case 0x2028:    /* LINE SEPARATOR */
3008                case 0x2029:    /* PARAGRAPH SEPARATOR */
3009                RRETURN(MATCH_NOMATCH);
3010                }
3011              }
3012            break;
3013    
3014            case OP_VSPACE:
3015            for (i = 1; i <= min; i++)
3016              {
3017              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3018              GETCHARINC(c, eptr);
3019              switch(c)
3020                {
3021                default: RRETURN(MATCH_NOMATCH);
3022                case 0x0a:      /* LF */
3023                case 0x0b:      /* VT */
3024                case 0x0c:      /* FF */
3025                case 0x0d:      /* CR */
3026                case 0x85:      /* NEL */
3027                case 0x2028:    /* LINE SEPARATOR */
3028                case 0x2029:    /* PARAGRAPH SEPARATOR */
3029                break;
3030                }
3031              }
3032            break;
3033    
3034          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3035          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3036            {            {
# Line 2890  for (;;) Line 3142  for (;;)
3142            }            }
3143          break;          break;
3144    
3145            case OP_NOT_HSPACE:
3146            for (i = 1; i <= min; i++)
3147              {
3148              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3149              switch(*eptr++)
3150                {
3151                default: break;
3152                case 0x09:      /* HT */
3153                case 0x20:      /* SPACE */
3154                case 0xa0:      /* NBSP */
3155                RRETURN(MATCH_NOMATCH);
3156                }
3157              }
3158            break;
3159    
3160            case OP_HSPACE:
3161            for (i = 1; i <= min; i++)
3162              {
3163              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3164              switch(*eptr++)
3165                {
3166                default: RRETURN(MATCH_NOMATCH);
3167                case 0x09:      /* HT */
3168                case 0x20:      /* SPACE */
3169                case 0xa0:      /* NBSP */
3170                break;
3171                }
3172              }
3173            break;
3174    
3175            case OP_NOT_VSPACE:
3176            for (i = 1; i <= min; i++)
3177              {
3178              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3179              switch(*eptr++)
3180                {
3181                default: break;
3182                case 0x0a:      /* LF */
3183                case 0x0b:      /* VT */
3184                case 0x0c:      /* FF */
3185                case 0x0d:      /* CR */
3186                case 0x85:      /* NEL */
3187                RRETURN(MATCH_NOMATCH);
3188                }
3189              }
3190            break;
3191    
3192            case OP_VSPACE:
3193            for (i = 1; i <= min; i++)
3194              {
3195              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3196              switch(*eptr++)
3197                {
3198                default: RRETURN(MATCH_NOMATCH);
3199                case 0x0a:      /* LF */
3200                case 0x0b:      /* VT */
3201                case 0x0c:      /* FF */
3202                case 0x0d:      /* CR */
3203                case 0x85:      /* NEL */
3204                break;
3205                }
3206              }
3207            break;
3208    
3209          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3210          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3211            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2945  for (;;) Line 3261  for (;;)
3261            case PT_ANY:            case PT_ANY:
3262            for (fi = min;; fi++)            for (fi = min;; fi++)
3263              {              {
3264              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3265              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3266              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3267              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2956  for (;;) Line 3272  for (;;)
3272            case PT_LAMP:            case PT_LAMP:
3273            for (fi = min;; fi++)            for (fi = min;; fi++)
3274              {              {
3275              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3276              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3277              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3278              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2971  for (;;) Line 3287  for (;;)
3287            case PT_GC:            case PT_GC:
3288            for (fi = min;; fi++)            for (fi = min;; fi++)
3289              {              {
3290              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3291              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3292              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3293              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2984  for (;;) Line 3300  for (;;)
3300            case PT_PC:            case PT_PC:
3301            for (fi = min;; fi++)            for (fi = min;; fi++)
3302              {              {
3303              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3304              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3305              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3306              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2997  for (;;) Line 3313  for (;;)
3313            case PT_SC:            case PT_SC:
3314            for (fi = min;; fi++)            for (fi = min;; fi++)
3315              {              {
3316              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3317              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3318              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3319              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 3019  for (;;) Line 3335  for (;;)
3335          {          {
3336          for (fi = min;; fi++)          for (fi = min;; fi++)
3337            {            {
3338            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3339            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3340            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3341            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
# Line 3048  for (;;) Line 3364  for (;;)
3364          {          {
3365          for (fi = min;; fi++)          for (fi = min;; fi++)
3366            {            {
3367            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3368            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3369            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3370                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
# Line 3081  for (;;) Line 3397  for (;;)
3397                }                }
3398              break;              break;
3399    
3400                case OP_NOT_HSPACE:
3401                switch(c)
3402                  {
3403                  default: break;
3404                  case 0x09:      /* HT */
3405                  case 0x20:      /* SPACE */
3406                  case 0xa0:      /* NBSP */
3407                  case 0x1680:    /* OGHAM SPACE MARK */
3408                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3409                  case 0x2000:    /* EN QUAD */
3410                  case 0x2001:    /* EM QUAD */
3411                  case 0x2002:    /* EN SPACE */
3412                  case 0x2003:    /* EM SPACE */
3413                  case 0x2004:    /* THREE-PER-EM SPACE */
3414                  case 0x2005:    /* FOUR-PER-EM SPACE */
3415                  case 0x2006:    /* SIX-PER-EM SPACE */
3416                  case 0x2007:    /* FIGURE SPACE */
3417                  case 0x2008:    /* PUNCTUATION SPACE */
3418                  case 0x2009:    /* THIN SPACE */
3419                  case 0x200A:    /* HAIR SPACE */
3420                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3421                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3422                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3423                  RRETURN(MATCH_NOMATCH);
3424                  }
3425                break;
3426    
3427                case OP_HSPACE:
3428                switch(c)
3429                  {
3430                  default: RRETURN(MATCH_NOMATCH);
3431                  case 0x09:      /* HT */
3432                  case 0x20:      /* SPACE */
3433                  case 0xa0:      /* NBSP */
3434                  case 0x1680:    /* OGHAM SPACE MARK */
3435                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3436                  case 0x2000:    /* EN QUAD */
3437                  case 0x2001:    /* EM QUAD */
3438                  case 0x2002:    /* EN SPACE */
3439                  case 0x2003:    /* EM SPACE */
3440                  case 0x2004:    /* THREE-PER-EM SPACE */
3441                  case 0x2005:    /* FOUR-PER-EM SPACE */
3442                  case 0x2006:    /* SIX-PER-EM SPACE */
3443                  case 0x2007:    /* FIGURE SPACE */
3444                  case 0x2008:    /* PUNCTUATION SPACE */
3445                  case 0x2009:    /* THIN SPACE */
3446                  case 0x200A:    /* HAIR SPACE */
3447                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3448                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3449                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3450                  break;
3451                  }
3452                break;
3453    
3454                case OP_NOT_VSPACE:
3455                switch(c)
3456                  {
3457                  default: break;
3458                  case 0x0a:      /* LF */
3459                  case 0x0b:      /* VT */
3460                  case 0x0c:      /* FF */
3461                  case 0x0d:      /* CR */
3462                  case 0x85:      /* NEL */
3463                  case 0x2028:    /* LINE SEPARATOR */
3464                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3465                  RRETURN(MATCH_NOMATCH);
3466                  }
3467                break;
3468    
3469                case OP_VSPACE:
3470                switch(c)
3471                  {
3472                  default: RRETURN(MATCH_NOMATCH);
3473                  case 0x0a:      /* LF */
3474                  case 0x0b:      /* VT */
3475                  case 0x0c:      /* FF */
3476                  case 0x0d:      /* CR */
3477                  case 0x85:      /* NEL */
3478                  case 0x2028:    /* LINE SEPARATOR */
3479                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3480                  break;
3481                  }
3482                break;
3483    
3484              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3485              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3486                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 3122  for (;;) Line 3522  for (;;)
3522          {          {
3523          for (fi = min;; fi++)          for (fi = min;; fi++)
3524            {            {
3525            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3526            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3527            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3528                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
# Line 3152  for (;;) Line 3552  for (;;)
3552                }                }
3553              break;              break;
3554    
3555                case OP_NOT_HSPACE:
3556                switch(c)
3557                  {
3558                  default: break;
3559                  case 0x09:      /* HT */
3560                  case 0x20:      /* SPACE */
3561                  case 0xa0:      /* NBSP */
3562                  RRETURN(MATCH_NOMATCH);
3563                  }
3564                break;
3565    
3566                case OP_HSPACE:
3567                switch(c)
3568                  {
3569                  default: RRETURN(MATCH_NOMATCH);
3570                  case 0x09:      /* HT */
3571                  case 0x20:      /* SPACE */
3572                  case 0xa0:      /* NBSP */
3573                  break;
3574                  }
3575                break;
3576    
3577                case OP_NOT_VSPACE:
3578                switch(c)
3579                  {
3580                  default: break;
3581                  case 0x0a:      /* LF */
3582                  case 0x0b:      /* VT */
3583                  case 0x0c:      /* FF */
3584                  case 0x0d:      /* CR */
3585                  case 0x85:      /* NEL */
3586                  RRETURN(MATCH_NOMATCH);
3587                  }
3588                break;
3589    
3590                case OP_VSPACE:
3591                switch(c)
3592                  {
3593                  default: RRETURN(MATCH_NOMATCH);
3594                  case 0x0a:      /* LF */
3595                  case 0x0b:      /* VT */
3596                  case 0x0c:      /* FF */
3597                  case 0x0d:      /* CR */
3598                  case 0x85:      /* NEL */
3599                  break;
3600                  }
3601                break;
3602    
3603              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3604              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3605              break;              break;
# Line 3268  for (;;) Line 3716  for (;;)
3716          if (possessive) continue;          if (possessive) continue;
3717          for(;;)          for(;;)
3718            {            {
3719            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3720            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3721            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3722            BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
3723            }            }
3724          }          }
3725    
# Line 3304  for (;;) Line 3752  for (;;)
3752          if (possessive) continue;          if (possessive) continue;
3753          for(;;)          for(;;)
3754            {            {
3755            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3756            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3757            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3758            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
3759              {              {
3760              int len = 1;              int len = 1;
             BACKCHAR(eptr);  
3761              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else
3762                {                {
3763                  BACKCHAR(eptr);
3764                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3765                }                }
3766              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
# Line 3333  for (;;) Line 3781  for (;;)
3781          switch(ctype)          switch(ctype)
3782            {            {
3783            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is  
           unlimited we don't need it, so we repeat the non-UTF8 code. This is  
           probably worth it, because .* is quite a common idiom. */  
   
3784            if (max < INT_MAX)            if (max < INT_MAX)
3785              {              {
3786              if ((ims & PCRE_DOTALL) == 0)              if ((ims & PCRE_DOTALL) == 0)
# Line 3370  for (;;) Line 3813  for (;;)
3813                  {                  {
3814                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3815                  eptr++;                  eptr++;
3816                    while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3817                  }                  }
               break;  
3818                }                }
3819              else              else
3820                {                {
3821                c = max - min;                eptr = md->end_subject;
               if (c > (unsigned int)(md->end_subject - eptr))  
                 c = md->end_subject - eptr;  
               eptr += c;  
3822                }                }
3823              }              }
3824            break;            break;
# Line 3413  for (;;) Line 3853  for (;;)
3853              }              }
3854            break;            break;
3855    
3856              case OP_NOT_HSPACE:
3857              case OP_HSPACE:
3858              for (i = min; i < max; i++)
3859                {
3860                BOOL gotspace;
3861                int len = 1;
3862                if (eptr >= md->end_subject) break;
3863                GETCHARLEN(c, eptr, len);
3864                switch(c)
3865                  {
3866                  default: gotspace = FALSE; break;
3867                  case 0x09:      /* HT */
3868                  case 0x20:      /* SPACE */
3869                  case 0xa0:      /* NBSP */
3870                  case 0x1680:    /* OGHAM SPACE MARK */
3871                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3872                  case 0x2000:    /* EN QUAD */
3873                  case 0x2001:    /* EM QUAD */
3874                  case 0x2002:    /* EN SPACE */
3875                  case 0x2003:    /* EM SPACE */
3876                  case 0x2004:    /* THREE-PER-EM SPACE */
3877                  case 0x2005:    /* FOUR-PER-EM SPACE */
3878                  case 0x2006:    /* SIX-PER-EM SPACE */
3879                  case 0x2007:    /* FIGURE SPACE */
3880                  case 0x2008:    /* PUNCTUATION SPACE */
3881                  case 0x2009:    /* THIN SPACE */
3882                  case 0x200A:    /* HAIR SPACE */
3883                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3884                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3885                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3886                  gotspace = TRUE;
3887                  break;
3888                  }
3889                if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3890                eptr += len;
3891                }
3892              break;
3893    
3894              case OP_NOT_VSPACE:
3895              case OP_VSPACE:
3896              for (i = min; i < max; i++)
3897                {
3898                BOOL gotspace;
3899                int len = 1;
3900                if (eptr >= md->end_subject) break;
3901                GETCHARLEN(c, eptr, len);
3902                switch(c)
3903                  {
3904                  default: gotspace = FALSE; break;
3905                  case 0x0a:      /* LF */
3906                  case 0x0b:      /* VT */
3907                  case 0x0c:      /* FF */
3908                  case 0x0d:      /* CR */
3909                  case 0x85:      /* NEL */
3910                  case 0x2028:    /* LINE SEPARATOR */
3911                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3912                  gotspace = TRUE;
3913                  break;
3914                  }
3915                if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3916                eptr += len;
3917                }
3918              break;
3919    
3920            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3921            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3922              {              {
# Line 3488  for (;;) Line 3992  for (;;)
3992          if (possessive) continue;          if (possessive) continue;
3993          for(;;)          for(;;)
3994            {            {
3995            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
3996            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3997            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3998            BACKCHAR(eptr);            BACKCHAR(eptr);
3999            }            }
4000          }          }
4001        else        else
4002  #endif  #endif  /* SUPPORT_UTF8 */
4003    
4004        /* Not UTF-8 mode */        /* Not UTF-8 mode */
4005          {          {
# Line 3539  for (;;) Line 4043  for (;;)
4043              }              }
4044            break;            break;
4045    
4046              case OP_NOT_HSPACE:
4047              for (i = min; i < max; i++)
4048                {
4049                if (eptr >= md->end_subject) break;
4050                c = *eptr;
4051                if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4052                eptr++;
4053                }
4054              break;
4055    
4056              case OP_HSPACE:
4057              for (i = min; i < max; i++)
4058                {
4059                if (eptr >= md->end_subject) break;
4060                c = *eptr;
4061                if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4062                eptr++;
4063                }
4064              break;
4065    
4066              case OP_NOT_VSPACE:
4067              for (i = min; i < max; i++)
4068                {
4069                if (eptr >= md->end_subject) break;
4070                c = *eptr;
4071                if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4072                  break;
4073                eptr++;
4074                }
4075              break;
4076    
4077              case OP_VSPACE:
4078              for (i = min; i < max; i++)
4079                {
4080                if (eptr >= md->end_subject) break;
4081                c = *eptr;
4082                if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4083                  break;
4084                eptr++;
4085                }
4086              break;
4087    
4088            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4089            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4090              {              {
# Line 3602  for (;;) Line 4148  for (;;)
4148          if (possessive) continue;          if (possessive) continue;
4149          while (eptr >= pp)          while (eptr >= pp)
4150            {            {
4151            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4152            eptr--;            eptr--;
4153            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4154            }            }
# Line 3628  for (;;) Line 4174  for (;;)
4174    
4175    }             /* End of main loop */    }             /* End of main loop */
4176  /* Control never reaches here */  /* Control never reaches here */
4177    
4178    
4179    /* When compiling to use the heap rather than the stack for recursive calls to
4180    match(), the RRETURN() macro jumps here. The number that is saved in
4181    frame->Xwhere indicates which label we actually want to return to. */
4182    
4183    #ifdef NO_RECURSE
4184    #define LBL(val) case val: goto L_RM##val;
4185    HEAP_RETURN:
4186    switch (frame->Xwhere)
4187      {
4188      LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4189      LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
4190      LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4191      LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4192      LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4193      LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47)
4194      default:
4195      DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4196      return PCRE_ERROR_INTERNAL;
4197      }
4198    #undef LBL
4199    #endif  /* NO_RECURSE */
4200  }  }
4201    
4202    
# Line 3640  Undefine all the macros that were define Line 4209  Undefine all the macros that were define
4209  #ifdef NO_RECURSE  #ifdef NO_RECURSE
4210  #undef eptr  #undef eptr
4211  #undef ecode  #undef ecode
4212    #undef mstart
4213  #undef offset_top  #undef offset_top
4214  #undef ims  #undef ims
4215  #undef eptrb  #undef eptrb
# Line 3712  Returns: > 0 => success; value Line 4282  Returns: > 0 => success; value
4282                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4283  */  */
4284    
4285  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
4286  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4287    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4288    int offsetcount)    int offsetcount)
# Line 3737  const uschar *start_bits = NULL; Line 4307  const uschar *start_bits = NULL;
4307  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4308  USPTR end_subject;  USPTR end_subject;
4309  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
 eptrblock eptrchain[EPTR_WORK_SIZE];  
4310    
4311  pcre_study_data internal_study;  pcre_study_data internal_study;
4312  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3823  md->partial = (options & PCRE_PARTIAL) ! Line 4392  md->partial = (options & PCRE_PARTIAL) !
4392  md->hitend = FALSE;  md->hitend = FALSE;
4393    
4394  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
 md->eptrchain = eptrchain;              /* Make workspace generally available */  
4395    
4396  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
4397  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
# Line 3831  md->ctypes = tables + ctypes_offset; Line 4399  md->ctypes = tables + ctypes_offset;
4399  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
4400  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
4401    
4402  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
4403         PCRE_NEWLINE_BITS)         PCRE_NEWLINE_BITS)
4404    {    {
4405    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
# Line 3840  switch ((((options & PCRE_NEWLINE_BITS) Line 4408  switch ((((options & PCRE_NEWLINE_BITS)
4408    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
4409         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4410    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
4411      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4412    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
4413    }    }
4414    
4415  if (newline < 0)  if (newline == -2)
4416      {
4417      md->nltype = NLTYPE_ANYCRLF;
4418      }
4419    else if (newline < 0)
4420    {    {
4421    md->nltype = NLTYPE_ANY;    md->nltype = NLTYPE_ANY;
4422    }    }
# Line 4019  for(;;) Line 4592  for(;;)
4592        while (start_match <= end_subject && !WAS_NEWLINE(start_match))        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4593          start_match++;          start_match++;
4594    
4595        /* If we have just passed a CR and the newline option is ANY, and we are        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4596        now at a LF, advance the match position by one more character. */        and we are now at a LF, advance the match position by one more character.
4597          */
4598    
4599        if (start_match[-1] == '\r' &&        if (start_match[-1] == '\r' &&
4600             md->nltype == NLTYPE_ANY &&             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4601             start_match < end_subject &&             start_match < end_subject &&
4602             *start_match == '\n')             *start_match == '\n')
4603          start_match++;          start_match++;
# Line 4113  for(;;) Line 4687  for(;;)
4687    
4688    /* OK, we can now run the match. */    /* OK, we can now run the match. */
4689    
4690    md->start_match = start_match;    md->start_match_ptr = start_match;      /* Insurance */
4691    md->match_call_count = 0;    md->match_call_count = 0;
4692    md->eptrn = 0;                          /* Next free eptrchain slot */    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
   rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);  
4693    
4694    /* Any return other than MATCH_NOMATCH breaks the loop. */    /* Any return other than MATCH_NOMATCH breaks the loop. */
4695    
# Line 4142  for(;;) Line 4715  for(;;)
4715    
4716    if (anchored || start_match > end_subject) break;    if (anchored || start_match > end_subject) break;
4717    
4718    /* If we have just passed a CR and the newline option is CRLF or ANY, and we    /* If we have just passed a CR and the newline option is CRLF or ANY or
4719    are now at a LF, advance the match position by one more character. */    ANYCRLF, and we are now at a LF, advance the match position by one more
4720      character. */
4721    
4722    if (start_match[-1] == '\r' &&    if (start_match[-1] == '\r' &&
4723         (md->nltype == NLTYPE_ANY || md->nllen == 2) &&         (md->nltype == NLTYPE_ANY ||
4724            md->nltype == NLTYPE_ANYCRLF ||
4725            md->nllen == 2) &&
4726         start_match < end_subject &&         start_match < end_subject &&
4727         *start_match == '\n')         *start_match == '\n')
4728      start_match++;      start_match++;
# Line 4193  if (rc == MATCH_MATCH) Line 4769  if (rc == MATCH_MATCH)
4769    
4770    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
4771    
4772    /* If there is space, set up the whole thing as substring 0. */    /* If there is space, set up the whole thing as substring 0. The value of
4773      md->start_match_ptr might be modified if \K was encountered on the success
4774      matching path. */
4775    
4776    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4777      {      {
4778      offsets[0] = start_match - md->start_subject;      offsets[0] = md->start_match_ptr - md->start_subject;
4779      offsets[1] = md->end_match_ptr - md->start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4780      }      }
4781    

Legend:
Removed from v.137  
changed lines
  Added in v.207

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12