/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC revision 227 by ph10, Tue Aug 21 15:00:15 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #ifdef HAVE_CONFIG_H
46    #include <config.h>
47    #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
50  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
51  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
52    
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55  /* The chain of eptrblocks for tail recursions uses memory in stack workspace,  /* Undefine some potentially clashing cpp symbols */
 obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */  
56    
57  #define EPTR_WORK_SIZE (1000)  #undef min
58    #undef max
59    
60  /* Flag bits for the match() function */  /* Flag bits for the match() function */
61    
62  #define match_condassert     0x01  /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
63  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
 #define match_tail_recursed  0x04  /* Tail recursive call */  
64    
65  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
66  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 65  defined PCRE_ERROR_xxx codes, which are Line 68  defined PCRE_ERROR_xxx codes, which are
68  #define MATCH_MATCH        1  #define MATCH_MATCH        1
69  #define MATCH_NOMATCH      0  #define MATCH_NOMATCH      0
70    
71    /* Special internal returns from the match() function. Make them sufficiently
72    negative to avoid the external error codes. */
73    
74    #define MATCH_COMMIT       (-999)
75    #define MATCH_PRUNE        (-998)
76    #define MATCH_SKIP         (-997)
77    #define MATCH_THEN         (-996)
78    
79  /* Maximum number of ints of offset to save on the stack for recursive calls.  /* Maximum number of ints of offset to save on the stack for recursive calls.
80  If the offset vector is bigger, malloc is used. This should be a multiple of 3,  If the offset vector is bigger, malloc is used. This should be a multiple of 3,
81  because the offset vector is always a multiple of 3 long. */  because the offset vector is always a multiple of 3 long. */
# Line 183  calls by keeping local variables that ne Line 194  calls by keeping local variables that ne
194  obtained from malloc() instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
195  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
196  always used to.  always used to.
197    
198    The original heap-recursive code used longjmp(). However, it seems that this
199    can be very slow on some operating systems. Following a suggestion from Stan
200    Switzer, the use of longjmp() has been abolished, at the cost of having to
201    provide a unique number for each call to RMATCH. There is no way of generating
202    a sequence of numbers at compile time in C. I have given them names, to make
203    them stand out more clearly.
204    
205    Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
206    FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
207    tests. Furthermore, not using longjmp() means that local dynamic variables
208    don't have indeterminate values; this has meant that the frame size can be
209    reduced because the result can be "passed back" by straight setting of the
210    variable instead of being passed in the frame.
211  ****************************************************************************  ****************************************************************************
212  ***************************************************************************/  ***************************************************************************/
213    
214    /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
215    below must be updated in sync.  */
216    
217    enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
218           RM11,  RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
219           RM21,  RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
220           RM31,  RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
221           RM41,  RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
222           RM51,  RM52, RM53, RM54 };
223    
224  /* These versions of the macros use the stack, as normal. There are debugging  /* These versions of the macros use the stack, as normal. There are debugging
225  versions and production versions. */  versions and production versions. Note that the "rw" argument of RMATCH isn't
226    actuall used in this definition. */
227    
228  #ifndef NO_RECURSE  #ifndef NO_RECURSE
229  #define REGISTER register  #define REGISTER register
230    
231  #ifdef DEBUG  #ifdef DEBUG
232  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
233    { \    { \
234    printf("match() called in line %d\n", __LINE__); \    printf("match() called in line %d\n", __LINE__); \
235    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
236    printf("to line %d\n", __LINE__); \    printf("to line %d\n", __LINE__); \
237    }    }
238  #define RRETURN(ra) \  #define RRETURN(ra) \
# Line 205  versions and production versions. */ Line 241  versions and production versions. */
241    return ra; \    return ra; \
242    }    }
243  #else  #else
244  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
245    rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)    rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
246  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
247  #endif  #endif
248    
249  #else  #else
250    
251    
252  /* These versions of the macros manage a private stack on the heap. Note  /* These versions of the macros manage a private stack on the heap. Note that
253  that the rd argument of RMATCH isn't actually used. It's the md argument of  the "rd" argument of RMATCH isn't actually used in this definition. It's the md
254  match(), which never changes. */  argument of match(), which never changes. */
255    
256  #define REGISTER  #define REGISTER
257    
258  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\  #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\
259    {\    {\
260    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\    heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\
261    if (setjmp(frame->Xwhere) == 0)\    frame->Xwhere = rw; \
262      {\    newframe->Xeptr = ra;\
263      newframe->Xeptr = ra;\    newframe->Xecode = rb;\
264      newframe->Xecode = rb;\    newframe->Xmstart = mstart;\
265      newframe->Xoffset_top = rc;\    newframe->Xoffset_top = rc;\
266      newframe->Xims = re;\    newframe->Xims = re;\
267      newframe->Xeptrb = rf;\    newframe->Xeptrb = rf;\
268      newframe->Xflags = rg;\    newframe->Xflags = rg;\
269      newframe->Xrdepth = frame->Xrdepth + 1;\    newframe->Xrdepth = frame->Xrdepth + 1;\
270      newframe->Xprevframe = frame;\    newframe->Xprevframe = frame;\
271      frame = newframe;\    frame = newframe;\
272      DPRINTF(("restarting from line %d\n", __LINE__));\    DPRINTF(("restarting from line %d\n", __LINE__));\
273      goto HEAP_RECURSE;\    goto HEAP_RECURSE;\
274      }\    L_##rw:\
275    else\    DPRINTF(("jumped back to line %d\n", __LINE__));\
     {\  
     DPRINTF(("longjumped back to line %d\n", __LINE__));\  
     frame = md->thisframe;\  
     rx = frame->Xresult;\  
     }\  
276    }    }
277    
278  #define RRETURN(ra)\  #define RRETURN(ra)\
# Line 251  match(), which never changes. */ Line 282  match(), which never changes. */
282    (pcre_stack_free)(newframe);\    (pcre_stack_free)(newframe);\
283    if (frame != NULL)\    if (frame != NULL)\
284      {\      {\
285      frame->Xresult = ra;\      rrc = ra;\
286      md->thisframe = frame;\      goto HEAP_RETURN;\
     longjmp(frame->Xwhere, 1);\  
287      }\      }\
288    return ra;\    return ra;\
289    }    }
# Line 268  typedef struct heapframe { Line 298  typedef struct heapframe {
298    
299    const uschar *Xeptr;    const uschar *Xeptr;
300    const uschar *Xecode;    const uschar *Xecode;
301      const uschar *Xmstart;
302    int Xoffset_top;    int Xoffset_top;
303    long int Xims;    long int Xims;
304    eptrblock *Xeptrb;    eptrblock *Xeptrb;
# Line 299  typedef struct heapframe { Line 330  typedef struct heapframe {
330    int Xprop_category;    int Xprop_category;
331    int Xprop_chartype;    int Xprop_chartype;
332    int Xprop_script;    int Xprop_script;
333      int Xoclength;
334      uschar Xocchars[8];
335  #endif  #endif
336    
337    int Xctype;    int Xctype;
# Line 316  typedef struct heapframe { Line 349  typedef struct heapframe {
349    
350    eptrblock Xnewptrb;    eptrblock Xnewptrb;
351    
352    /* Place to pass back result, and where to jump back to */    /* Where to jump back to */
353    
354    int  Xresult;    int Xwhere;
   jmp_buf Xwhere;  
355    
356  } heapframe;  } heapframe;
357    
# Line 347  made performance worse. Line 379  made performance worse.
379  Arguments:  Arguments:
380     eptr        pointer to current character in subject     eptr        pointer to current character in subject
381     ecode       pointer to current position in compiled code     ecode       pointer to current position in compiled code
382       mstart      pointer to the current match start position (can be modified
383                     by encountering \K)
384     offset_top  current top pointer     offset_top  current top pointer
385     md          pointer to "static" info for the match     md          pointer to "static" info for the match
386     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 356  Arguments: Line 390  Arguments:
390                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
391                   match_cbegroup - this is the start of an unlimited repeat                   match_cbegroup - this is the start of an unlimited repeat
392                     group that can match an empty string                     group that can match an empty string
                  match_tail_recursed - this is a tail_recursed group  
393     rdepth      the recursion depth     rdepth      the recursion depth
394    
395  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
# Line 366  Returns: MATCH_MATCH if matched Line 399  Returns: MATCH_MATCH if matched
399  */  */
400    
401  static int  static int
402  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
403    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
404    int flags, unsigned int rdepth)    int flags, unsigned int rdepth)
405  {  {
# Line 394  frame->Xprevframe = NULL; /* Line 427  frame->Xprevframe = NULL; /*
427    
428  frame->Xeptr = eptr;  frame->Xeptr = eptr;
429  frame->Xecode = ecode;  frame->Xecode = ecode;
430    frame->Xmstart = mstart;
431  frame->Xoffset_top = offset_top;  frame->Xoffset_top = offset_top;
432  frame->Xims = ims;  frame->Xims = ims;
433  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
# Line 408  HEAP_RECURSE: Line 442  HEAP_RECURSE:
442    
443  #define eptr               frame->Xeptr  #define eptr               frame->Xeptr
444  #define ecode              frame->Xecode  #define ecode              frame->Xecode
445    #define mstart             frame->Xmstart
446  #define offset_top         frame->Xoffset_top  #define offset_top         frame->Xoffset_top
447  #define ims                frame->Xims  #define ims                frame->Xims
448  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
# Line 441  HEAP_RECURSE: Line 476  HEAP_RECURSE:
476  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
477  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
478  #define prop_script        frame->Xprop_script  #define prop_script        frame->Xprop_script
479    #define oclength           frame->Xoclength
480    #define occhars            frame->Xocchars
481  #endif  #endif
482    
483  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 494  int prop_fail_result; Line 531  int prop_fail_result;
531  int prop_category;  int prop_category;
532  int prop_chartype;  int prop_chartype;
533  int prop_script;  int prop_script;
534    int oclength;
535    uschar occhars[8];
536  #endif  #endif
537    
538  int ctype;  int ctype;
# Line 534  defined). However, RMATCH isn't like a f Line 573  defined). However, RMATCH isn't like a f
573  complicated macro. It has to be used in one particular way. This shouldn't,  complicated macro. It has to be used in one particular way. This shouldn't,
574  however, impact performance when true recursion is being used. */  however, impact performance when true recursion is being used. */
575    
576    #ifdef SUPPORT_UTF8
577    utf8 = md->utf8;       /* Local copy of the flag */
578    #else
579    utf8 = FALSE;
580    #endif
581    
582  /* First check that we haven't called match() too many times, or that we  /* First check that we haven't called match() too many times, or that we
583  haven't exceeded the recursive call limit. */  haven't exceeded the recursive call limit. */
584    
# Line 542  if (rdepth >= md->match_limit_recursion) Line 587  if (rdepth >= md->match_limit_recursion)
587    
588  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
589    
 #ifdef SUPPORT_UTF8  
 utf8 = md->utf8;       /* Local copy of the flag */  
 #else  
 utf8 = FALSE;  
 #endif  
   
590  /* At the start of a group with an unlimited repeat that may match an empty  /* At the start of a group with an unlimited repeat that may match an empty
591  string, the match_cbegroup flag is set. When this is the case, add the current  string, the match_cbegroup flag is set. When this is the case, add the current
592  subject pointer to the chain of such remembered pointers, to be checked when we  subject pointer to the chain of such remembered pointers, to be checked when we
593  hit the closing ket, in order to break infinite loops that match no characters.  hit the closing ket, in order to break infinite loops that match no characters.
594  When match() is called in other circumstances, don't add to the chain. If this  When match() is called in other circumstances, don't add to the chain. The
595  is a tail recursion, use a block from the workspace, as the one on the stack is  match_cbegroup flag must NOT be used with tail recursion, because the memory
596  already used. */  block that is used is on the stack, so a new one may be required for each
597    match(). */
598    
599  if ((flags & match_cbegroup) != 0)  if ((flags & match_cbegroup) != 0)
600    {    {
601    eptrblock *p;    newptrb.epb_saved_eptr = eptr;
602    if ((flags & match_tail_recursed) != 0)    newptrb.epb_prev = eptrb;
603      {    eptrb = &newptrb;
     if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);  
     p = md->eptrchain + md->eptrn++;  
     }  
   else p = &newptrb;  
   p->epb_saved_eptr = eptr;  
   p->epb_prev = eptrb;  
   eptrb = p;  
604    }    }
605    
606  /* Now start processing the opcodes. */  /* Now start processing the opcodes. */
# Line 582  for (;;) Line 615  for (;;)
615    
616    if (md->partial &&    if (md->partial &&
617        eptr >= md->end_subject &&        eptr >= md->end_subject &&
618        eptr > md->start_match)        eptr > mstart)
619      md->hitend = TRUE;      md->hitend = TRUE;
620    
621    switch(op)    switch(op)
622      {      {
623        case OP_FAIL:
624        RRETURN(MATCH_NOMATCH);
625    
626        case OP_PRUNE:
627        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
628          ims, eptrb, flags, RM51);
629        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
630        RRETURN(MATCH_PRUNE);
631    
632        case OP_COMMIT:
633        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
634          ims, eptrb, flags, RM52);
635        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
636        RRETURN(MATCH_COMMIT);
637    
638        case OP_SKIP:
639        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
640          ims, eptrb, flags, RM53);
641        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
642        md->start_match_ptr = eptr;   /* Pass back current position */
643        RRETURN(MATCH_SKIP);
644    
645        case OP_THEN:
646        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
647          ims, eptrb, flags, RM54);
648        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
649        RRETURN(MATCH_THEN);
650    
651      /* Handle a capturing bracket. If there is space in the offset vector, save      /* Handle a capturing bracket. If there is space in the offset vector, save
652      the current subject position in the working slot at the top of the vector.      the current subject position in the working slot at the top of the vector.
653      We mustn't change the current values of the data slot, because they may be      We mustn't change the current values of the data slot, because they may be
# Line 626  for (;;) Line 687  for (;;)
687        flags = (op == OP_SCBRA)? match_cbegroup : 0;        flags = (op == OP_SCBRA)? match_cbegroup : 0;
688        do        do
689          {          {
690          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,          RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
691            ims, eptrb, flags);            ims, eptrb, flags, RM1);
692          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
693          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
694          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
695          }          }
# Line 643  for (;;) Line 704  for (;;)
704        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
705        }        }
706    
707      /* Insufficient room for saving captured contents. Treat as a non-capturing      /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
708      bracket. */      as a non-capturing bracket. */
709    
710        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
711        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
712    
713      DPRINTF(("insufficient capture room: treat as non-capturing\n"));      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
714    
715        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
716        /* VVVVVVVVVVVVVVVVVVVVVVVVV */
717    
718      /* Non-capturing bracket. Loop for all the alternatives. When we get to the      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
719      final alternative within the brackets, we would return the result of a      final alternative within the brackets, we would return the result of a
720      recursive call to match() whatever happened. We can reduce stack usage by      recursive call to match() whatever happened. We can reduce stack usage by
721      turning this into a tail recursion. */      turning this into a tail recursion, except in the case when match_cbegroup
722        is set.*/
723    
724      case OP_BRA:      case OP_BRA:
725      case OP_SBRA:      case OP_SBRA:
# Line 659  for (;;) Line 727  for (;;)
727      flags = (op >= OP_SBRA)? match_cbegroup : 0;      flags = (op >= OP_SBRA)? match_cbegroup : 0;
728      for (;;)      for (;;)
729        {        {
730        if (ecode[GET(ecode, 1)] != OP_ALT)        if (ecode[GET(ecode, 1)] != OP_ALT)   /* Final alternative */
731          {          {
732          ecode += _pcre_OP_lengths[*ecode];          if (flags == 0)    /* Not a possibly empty group */
733          flags |= match_tail_recursed;            {
734          DPRINTF(("bracket 0 tail recursion\n"));            ecode += _pcre_OP_lengths[*ecode];
735          goto TAIL_RECURSE;            DPRINTF(("bracket 0 tail recursion\n"));
736              goto TAIL_RECURSE;
737              }
738    
739            /* Possibly empty group; can't use tail recursion. */
740    
741            RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
742              eptrb, flags, RM48);
743            RRETURN(rrc);
744          }          }
745    
746        /* For non-final alternatives, continue the loop for a NOMATCH result;        /* For non-final alternatives, continue the loop for a NOMATCH result;
747        otherwise return. */        otherwise return. */
748    
749        RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,        RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
750          eptrb, flags);          eptrb, flags, RM2);
751        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
752        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
753        }        }
754      /* Control never reaches here. */      /* Control never reaches here. */
# Line 712  for (;;) Line 788  for (;;)
788    
789      else      else
790        {        {
791        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
792            match_condassert);            match_condassert, RM3);
793        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
794          {          {
795          condition = TRUE;          condition = TRUE;
796          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
797          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
798          }          }
799        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
800          {          {
801          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
802          }          }
# Line 732  for (;;) Line 808  for (;;)
808        }        }
809    
810      /* We are now at the branch that is to be obeyed. As there is only one,      /* We are now at the branch that is to be obeyed. As there is only one,
811      we can use tail recursion to avoid using another stack frame. If the second      we can use tail recursion to avoid using another stack frame, except when
812      alternative doesn't exist, we can just plough on. */      match_cbegroup is required for an unlimited repeat of a possibly empty
813        group. If the second alternative doesn't exist, we can just plough on. */
814    
815      if (condition || *ecode == OP_ALT)      if (condition || *ecode == OP_ALT)
816        {        {
817        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
818        flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);        if (op == OP_SCOND)        /* Possibly empty group */
819        goto TAIL_RECURSE;          {
820            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, match_cbegroup, RM49);
821            RRETURN(rrc);
822            }
823          else                       /* Group must match something */
824            {
825            flags = 0;
826            goto TAIL_RECURSE;
827            }
828        }        }
829      else      else                         /* Condition false & no 2nd alternative */
830        {        {
831        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
832        }        }
833      break;      break;
834    
835    
836      /* End of the pattern. If we are in a top-level recursion, we should      /* End of the pattern, either real or forced. If we are in a top-level
837      restore the offsets appropriately and continue from after the call. */      recursion, we should restore the offsets appropriately and continue from
838        after the call. */
839    
840        case OP_ACCEPT:
841      case OP_END:      case OP_END:
842      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
843        {        {
# Line 759  for (;;) Line 846  for (;;)
846        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
847        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
848          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
849        md->start_match = rec->save_start;        mstart = rec->save_start;
850        ims = original_ims;        ims = original_ims;
851        ecode = rec->after_call;        ecode = rec->after_call;
852        break;        break;
# Line 768  for (;;) Line 855  for (;;)
855      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty      /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
856      string - backtracking will then try other alternatives, if any. */      string - backtracking will then try other alternatives, if any. */
857    
858      if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);      if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
859      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;           /* Record where we ended */
860      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;    /* and how many extracts were taken */
861        md->start_match_ptr = mstart;       /* and the start (\K can modify) */
862      RRETURN(MATCH_MATCH);      RRETURN(MATCH_MATCH);
863    
864      /* Change option settings */      /* Change option settings */
# Line 791  for (;;) Line 879  for (;;)
879      case OP_ASSERTBACK:      case OP_ASSERTBACK:
880      do      do
881        {        {
882        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
883            RM4);
884        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
885        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
886        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
887        }        }
888      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 817  for (;;) Line 906  for (;;)
906      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
907      do      do
908        {        {
909        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
910            RM5);
911        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
912        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
913        ecode += GET(ecode,1);        ecode += GET(ecode,1);
914        }        }
915      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 843  for (;;) Line 933  for (;;)
933          {          {
934          eptr--;          eptr--;
935          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
936          BACKCHAR(eptr)          BACKCHAR(eptr);
937          }          }
938        }        }
939      else      else
# Line 874  for (;;) Line 964  for (;;)
964        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
965        cb.subject          = (PCRE_SPTR)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
966        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
967        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = mstart - md->start_subject;
968        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
969        cb.pattern_position = GET(ecode, 2);        cb.pattern_position = GET(ecode, 2);
970        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);        cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
# Line 936  for (;;) Line 1026  for (;;)
1026    
1027        memcpy(new_recursive.offset_save, md->offset_vector,        memcpy(new_recursive.offset_save, md->offset_vector,
1028              new_recursive.saved_max * sizeof(int));              new_recursive.saved_max * sizeof(int));
1029        new_recursive.save_start = md->start_match;        new_recursive.save_start = mstart;
1030        md->start_match = eptr;        mstart = eptr;
1031    
1032        /* OK, now we can do the recursion. For each top-level alternative we        /* OK, now we can do the recursion. For each top-level alternative we
1033        restore the offset and recursion data. */        restore the offset and recursion data. */
# Line 946  for (;;) Line 1036  for (;;)
1036        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;        flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
1037        do        do
1038          {          {
1039          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,          RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
1040            md, ims, eptrb, flags);            md, ims, eptrb, flags, RM6);
1041          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
1042            {            {
1043            DPRINTF(("Recursion matched\n"));            DPRINTF(("Recursion matched\n"));
# Line 956  for (;;) Line 1046  for (;;)
1046              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
1047            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
1048            }            }
1049          else if (rrc != MATCH_NOMATCH)          else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
1050            {            {
1051            DPRINTF(("Recursion gave error %d\n", rrc));            DPRINTF(("Recursion gave error %d\n", rrc));
1052            RRETURN(rrc);            RRETURN(rrc);
# Line 990  for (;;) Line 1080  for (;;)
1080    
1081      do      do
1082        {        {
1083        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
         eptrb, 0);  
1084        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1085        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
1086        ecode += GET(ecode,1);        ecode += GET(ecode,1);
1087        }        }
1088      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
# Line 1036  for (;;) Line 1125  for (;;)
1125    
1126      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1127        {        {
1128        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM8);
1129        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1130        ecode = prev;        ecode = prev;
1131        flags = match_tail_recursed;        flags = 0;
1132        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1133        }        }
1134      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1135        {        {
1136        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, match_cbegroup, RM9);
1137        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1138        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1139        flags = match_tail_recursed;        flags = 0;
1140        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1141        }        }
1142      /* Control never gets here */      /* Control never gets here */
# Line 1068  for (;;) Line 1157  for (;;)
1157      case OP_BRAZERO:      case OP_BRAZERO:
1158        {        {
1159        next = ecode+1;        next = ecode+1;
1160        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next, offset_top, md, ims, eptrb, 0, RM10);
1161        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1162        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1163        ecode = next + 1 + LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
# Line 1079  for (;;) Line 1168  for (;;)
1168        {        {
1169        next = ecode+1;        next = ecode+1;
1170        do next += GET(next, 1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1171        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0, RM11);
1172        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1173        ecode++;        ecode++;
1174        }        }
# Line 1149  for (;;) Line 1238  for (;;)
1238          recursion_info *rec = md->recursive;          recursion_info *rec = md->recursive;
1239          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1240          md->recursive = rec->prevrec;          md->recursive = rec->prevrec;
1241          md->start_match = rec->save_start;          mstart = rec->save_start;
1242          memcpy(md->offset_vector, rec->offset_save,          memcpy(md->offset_vector, rec->offset_save,
1243            rec->saved_max * sizeof(int));            rec->saved_max * sizeof(int));
1244          ecode = rec->after_call;          ecode = rec->after_call;
# Line 1178  for (;;) Line 1267  for (;;)
1267    
1268      /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1269      preceding bracket, in the appropriate order. In the second case, we can use      preceding bracket, in the appropriate order. In the second case, we can use
1270      tail recursion to avoid using another stack frame. */      tail recursion to avoid using another stack frame, unless we have an
1271        unlimited repeat of a group that can match an empty string. */
1272    
1273      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
1274    
1275      if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1276        {        {
1277        RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM12);
1278        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1279          if (flags != 0)    /* Could match an empty string */
1280            {
1281            RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM50);
1282            RRETURN(rrc);
1283            }
1284        ecode = prev;        ecode = prev;
       flags |= match_tail_recursed;  
1285        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1286        }        }
1287      else  /* OP_KETRMAX */      else  /* OP_KETRMAX */
1288        {        {
1289        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);        RMATCH(eptr, prev, offset_top, md, ims, eptrb, flags, RM13);
1290        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1291        ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1292        flags = match_tail_recursed;        flags = 0;
1293        goto TAIL_RECURSE;        goto TAIL_RECURSE;
1294        }        }
1295      /* Control never gets here */      /* Control never gets here */
# Line 1228  for (;;) Line 1322  for (;;)
1322      ecode++;      ecode++;
1323      break;      break;
1324    
1325        /* Reset the start of match point */
1326    
1327        case OP_SET_SOM:
1328        mstart = eptr;
1329        ecode++;
1330        break;
1331    
1332      /* Assert before internal newline if multiline, or before a terminating      /* Assert before internal newline if multiline, or before a terminating
1333      newline unless endonly is set, else end of subject unless noteol is set. */      newline unless endonly is set, else end of subject unless noteol is set. */
1334    
# Line 1436  for (;;) Line 1537  for (;;)
1537      ecode++;      ecode++;
1538      break;      break;
1539    
1540        case OP_NOT_HSPACE:
1541        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1542        GETCHARINCTEST(c, eptr);
1543        switch(c)
1544          {
1545          default: break;
1546          case 0x09:      /* HT */
1547          case 0x20:      /* SPACE */
1548          case 0xa0:      /* NBSP */
1549          case 0x1680:    /* OGHAM SPACE MARK */
1550          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1551          case 0x2000:    /* EN QUAD */
1552          case 0x2001:    /* EM QUAD */
1553          case 0x2002:    /* EN SPACE */
1554          case 0x2003:    /* EM SPACE */
1555          case 0x2004:    /* THREE-PER-EM SPACE */
1556          case 0x2005:    /* FOUR-PER-EM SPACE */
1557          case 0x2006:    /* SIX-PER-EM SPACE */
1558          case 0x2007:    /* FIGURE SPACE */
1559          case 0x2008:    /* PUNCTUATION SPACE */
1560          case 0x2009:    /* THIN SPACE */
1561          case 0x200A:    /* HAIR SPACE */
1562          case 0x202f:    /* NARROW NO-BREAK SPACE */
1563          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1564          case 0x3000:    /* IDEOGRAPHIC SPACE */
1565          RRETURN(MATCH_NOMATCH);
1566          }
1567        ecode++;
1568        break;
1569    
1570        case OP_HSPACE:
1571        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1572        GETCHARINCTEST(c, eptr);
1573        switch(c)
1574          {
1575          default: RRETURN(MATCH_NOMATCH);
1576          case 0x09:      /* HT */
1577          case 0x20:      /* SPACE */
1578          case 0xa0:      /* NBSP */
1579          case 0x1680:    /* OGHAM SPACE MARK */
1580          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1581          case 0x2000:    /* EN QUAD */
1582          case 0x2001:    /* EM QUAD */
1583          case 0x2002:    /* EN SPACE */
1584          case 0x2003:    /* EM SPACE */
1585          case 0x2004:    /* THREE-PER-EM SPACE */
1586          case 0x2005:    /* FOUR-PER-EM SPACE */
1587          case 0x2006:    /* SIX-PER-EM SPACE */
1588          case 0x2007:    /* FIGURE SPACE */
1589          case 0x2008:    /* PUNCTUATION SPACE */
1590          case 0x2009:    /* THIN SPACE */
1591          case 0x200A:    /* HAIR SPACE */
1592          case 0x202f:    /* NARROW NO-BREAK SPACE */
1593          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1594          case 0x3000:    /* IDEOGRAPHIC SPACE */
1595          break;
1596          }
1597        ecode++;
1598        break;
1599    
1600        case OP_NOT_VSPACE:
1601        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1602        GETCHARINCTEST(c, eptr);
1603        switch(c)
1604          {
1605          default: break;
1606          case 0x0a:      /* LF */
1607          case 0x0b:      /* VT */
1608          case 0x0c:      /* FF */
1609          case 0x0d:      /* CR */
1610          case 0x85:      /* NEL */
1611          case 0x2028:    /* LINE SEPARATOR */
1612          case 0x2029:    /* PARAGRAPH SEPARATOR */
1613          RRETURN(MATCH_NOMATCH);
1614          }
1615        ecode++;
1616        break;
1617    
1618        case OP_VSPACE:
1619        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1620        GETCHARINCTEST(c, eptr);
1621        switch(c)
1622          {
1623          default: RRETURN(MATCH_NOMATCH);
1624          case 0x0a:      /* LF */
1625          case 0x0b:      /* VT */
1626          case 0x0c:      /* FF */
1627          case 0x0d:      /* CR */
1628          case 0x85:      /* NEL */
1629          case 0x2028:    /* LINE SEPARATOR */
1630          case 0x2029:    /* PARAGRAPH SEPARATOR */
1631          break;
1632          }
1633        ecode++;
1634        break;
1635    
1636  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1637      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1638      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1591  for (;;) Line 1788  for (;;)
1788          {          {
1789          for (fi = min;; fi++)          for (fi = min;; fi++)
1790            {            {
1791            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
1792            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1793            if (fi >= max || !match_ref(offset, eptr, length, md, ims))            if (fi >= max || !match_ref(offset, eptr, length, md, ims))
1794              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 1612  for (;;) Line 1809  for (;;)
1809            }            }
1810          while (eptr >= pp)          while (eptr >= pp)
1811            {            {
1812            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15);
1813            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1814            eptr -= length;            eptr -= length;
1815            }            }
# Line 1717  for (;;) Line 1914  for (;;)
1914            {            {
1915            for (fi = min;; fi++)            for (fi = min;; fi++)
1916              {              {
1917              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
1918              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1919              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1920              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 1737  for (;;) Line 1934  for (;;)
1934            {            {
1935            for (fi = min;; fi++)            for (fi = min;; fi++)
1936              {              {
1937              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
1938              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1939              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1940              c = *eptr++;              c = *eptr++;
# Line 1774  for (;;) Line 1971  for (;;)
1971              }              }
1972            for (;;)            for (;;)
1973              {              {
1974              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM18);
1975              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1976              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
1977              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 1793  for (;;) Line 1990  for (;;)
1990              }              }
1991            while (eptr >= pp)            while (eptr >= pp)
1992              {              {
1993              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM19);
1994              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1995              eptr--;              eptr--;
1996              }              }
# Line 1864  for (;;) Line 2061  for (;;)
2061          {          {
2062          for (fi = min;; fi++)          for (fi = min;; fi++)
2063            {            {
2064            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
2065            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2066            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2067            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
# Line 1888  for (;;) Line 2085  for (;;)
2085            }            }
2086          for(;;)          for(;;)
2087            {            {
2088            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM21);
2089            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2090            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
2091            BACKCHAR(eptr)            if (utf8) BACKCHAR(eptr);
2092            }            }
2093          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
2094          }          }
# Line 2045  for (;;) Line 2242  for (;;)
2242    
2243        if (length > 1)        if (length > 1)
2244          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2245  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2246          unsigned int othercase;          unsigned int othercase;
2247          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2248              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
2249            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2250            else oclength = 0;
2251  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2252    
2253          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2254            {            {
2255            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (memcmp(eptr, charptr, length) == 0) eptr += length;
2256    #ifdef SUPPORT_UCP
2257            /* Need braces because of following else */            /* Need braces because of following else */
2258            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2259            else            else
# Line 2065  for (;;) Line 2261  for (;;)
2261              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2262              eptr += oclength;              eptr += oclength;
2263              }              }
2264    #else   /* without SUPPORT_UCP */
2265              else { RRETURN(MATCH_NOMATCH); }
2266    #endif  /* SUPPORT_UCP */
2267            }            }
2268    
2269          if (min == max) continue;          if (min == max) continue;
# Line 2073  for (;;) Line 2272  for (;;)
2272            {            {
2273            for (fi = min;; fi++)            for (fi = min;; fi++)
2274              {              {
2275              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
2276              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2277              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2278              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2279    #ifdef SUPPORT_UCP
2280              /* Need braces because of following else */              /* Need braces because of following else */
2281              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2282              else              else
# Line 2084  for (;;) Line 2284  for (;;)
2284                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2285                eptr += oclength;                eptr += oclength;
2286                }                }
2287    #else   /* without SUPPORT_UCP */
2288                else { RRETURN (MATCH_NOMATCH); }
2289    #endif  /* SUPPORT_UCP */
2290              }              }
2291            /* Control never gets here */            /* Control never gets here */
2292            }            }
# Line 2095  for (;;) Line 2298  for (;;)
2298              {              {
2299              if (eptr > md->end_subject - length) break;              if (eptr > md->end_subject - length) break;
2300              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2301    #ifdef SUPPORT_UCP
2302              else if (oclength == 0) break;              else if (oclength == 0) break;
2303              else              else
2304                {                {
2305                if (memcmp(eptr, occhars, oclength) != 0) break;                if (memcmp(eptr, occhars, oclength) != 0) break;
2306                eptr += oclength;                eptr += oclength;
2307                }                }
2308    #else   /* without SUPPORT_UCP */
2309                else break;
2310    #endif  /* SUPPORT_UCP */
2311              }              }
2312    
2313            if (possessive) continue;            if (possessive) continue;
2314            while (eptr >= pp)            for(;;)
2315             {             {
2316             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
2317             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2318               if (eptr == pp) RRETURN(MATCH_NOMATCH);
2319    #ifdef SUPPORT_UCP
2320               eptr--;
2321               BACKCHAR(eptr);
2322    #else   /* without SUPPORT_UCP */
2323             eptr -= length;             eptr -= length;
2324    #endif  /* SUPPORT_UCP */
2325             }             }
           RRETURN(MATCH_NOMATCH);  
2326            }            }
2327          /* Control never gets here */          /* Control never gets here */
2328          }          }
# Line 2150  for (;;) Line 2362  for (;;)
2362          {          {
2363          for (fi = min;; fi++)          for (fi = min;; fi++)
2364            {            {
2365            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
2366            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2367            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
2368                fc != md->lcc[*eptr++])                fc != md->lcc[*eptr++])
# Line 2169  for (;;) Line 2381  for (;;)
2381          if (possessive) continue;          if (possessive) continue;
2382          while (eptr >= pp)          while (eptr >= pp)
2383            {            {
2384            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
2385            eptr--;            eptr--;
2386            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2387            }            }
# Line 2188  for (;;) Line 2400  for (;;)
2400          {          {
2401          for (fi = min;; fi++)          for (fi = min;; fi++)
2402            {            {
2403            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
2404            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2405            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)            if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
2406              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
# Line 2206  for (;;) Line 2418  for (;;)
2418          if (possessive) continue;          if (possessive) continue;
2419          while (eptr >= pp)          while (eptr >= pp)
2420            {            {
2421            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
2422            eptr--;            eptr--;
2423            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2424            }            }
# Line 2351  for (;;) Line 2563  for (;;)
2563            register unsigned int d;            register unsigned int d;
2564            for (fi = min;; fi++)            for (fi = min;; fi++)
2565              {              {
2566              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2567              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2568              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2569              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
# Line 2365  for (;;) Line 2577  for (;;)
2577            {            {
2578            for (fi = min;; fi++)            for (fi = min;; fi++)
2579              {              {
2580              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
2581              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2582              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])              if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
2583                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2397  for (;;) Line 2609  for (;;)
2609          if (possessive) continue;          if (possessive) continue;
2610          for(;;)          for(;;)
2611              {              {
2612              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM30);
2613              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2614              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2615              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2415  for (;;) Line 2627  for (;;)
2627            if (possessive) continue;            if (possessive) continue;
2628            while (eptr >= pp)            while (eptr >= pp)
2629              {              {
2630              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM31);
2631              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2632              eptr--;              eptr--;
2633              }              }
# Line 2460  for (;;) Line 2672  for (;;)
2672            register unsigned int d;            register unsigned int d;
2673            for (fi = min;; fi++)            for (fi = min;; fi++)
2674              {              {
2675              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2676              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2677              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2678              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fi >= max || eptr >= md->end_subject || fc == d)
# Line 2473  for (;;) Line 2685  for (;;)
2685            {            {
2686            for (fi = min;; fi++)            for (fi = min;; fi++)
2687              {              {
2688              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
2689              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2690              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)              if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
2691                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2504  for (;;) Line 2716  for (;;)
2716            if (possessive) continue;            if (possessive) continue;
2717            for(;;)            for(;;)
2718              {              {
2719              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM34);
2720              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2721              if (eptr-- == pp) break;        /* Stop if tried at original pos */              if (eptr-- == pp) break;        /* Stop if tried at original pos */
2722              BACKCHAR(eptr);              BACKCHAR(eptr);
# Line 2522  for (;;) Line 2734  for (;;)
2734            if (possessive) continue;            if (possessive) continue;
2735            while (eptr >= pp)            while (eptr >= pp)
2736              {              {
2737              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM35);
2738              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2739              eptr--;              eptr--;
2740              }              }
# Line 2629  for (;;) Line 2841  for (;;)
2841            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2842              {              {
2843              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2844              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2845              }              }
2846            break;            break;
2847    
# Line 2637  for (;;) Line 2849  for (;;)
2849            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2850              {              {
2851              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2852              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2853              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2854              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2855                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
# Line 2650  for (;;) Line 2862  for (;;)
2862            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2863              {              {
2864              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2865              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2866              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2867              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2868                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2661  for (;;) Line 2873  for (;;)
2873            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2874              {              {
2875              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2876              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2877              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2878              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2879                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2672  for (;;) Line 2884  for (;;)
2884            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2885              {              {
2886              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2887              GETCHARINC(c, eptr);              GETCHARINCTEST(c, eptr);
2888              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2889              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2890                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2753  for (;;) Line 2965  for (;;)
2965            }            }
2966          break;          break;
2967    
2968            case OP_NOT_HSPACE:
2969            for (i = 1; i <= min; i++)
2970              {
2971              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2972              GETCHARINC(c, eptr);
2973              switch(c)
2974                {
2975                default: break;
2976                case 0x09:      /* HT */
2977                case 0x20:      /* SPACE */
2978                case 0xa0:      /* NBSP */
2979                case 0x1680:    /* OGHAM SPACE MARK */
2980                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
2981                case 0x2000:    /* EN QUAD */
2982                case 0x2001:    /* EM QUAD */
2983                case 0x2002:    /* EN SPACE */
2984                case 0x2003:    /* EM SPACE */
2985                case 0x2004:    /* THREE-PER-EM SPACE */
2986                case 0x2005:    /* FOUR-PER-EM SPACE */
2987                case 0x2006:    /* SIX-PER-EM SPACE */
2988                case 0x2007:    /* FIGURE SPACE */
2989                case 0x2008:    /* PUNCTUATION SPACE */
2990                case 0x2009:    /* THIN SPACE */
2991                case 0x200A:    /* HAIR SPACE */
2992                case 0x202f:    /* NARROW NO-BREAK SPACE */
2993                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
2994                case 0x3000:    /* IDEOGRAPHIC SPACE */
2995                RRETURN(MATCH_NOMATCH);
2996                }
2997              }
2998            break;
2999    
3000            case OP_HSPACE:
3001            for (i = 1; i <= min; i++)
3002              {
3003              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3004              GETCHARINC(c, eptr);
3005              switch(c)
3006                {
3007                default: RRETURN(MATCH_NOMATCH);
3008                case 0x09:      /* HT */
3009                case 0x20:      /* SPACE */
3010                case 0xa0:      /* NBSP */
3011                case 0x1680:    /* OGHAM SPACE MARK */
3012                case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3013                case 0x2000:    /* EN QUAD */
3014                case 0x2001:    /* EM QUAD */
3015                case 0x2002:    /* EN SPACE */
3016                case 0x2003:    /* EM SPACE */
3017                case 0x2004:    /* THREE-PER-EM SPACE */
3018                case 0x2005:    /* FOUR-PER-EM SPACE */
3019                case 0x2006:    /* SIX-PER-EM SPACE */
3020                case 0x2007:    /* FIGURE SPACE */
3021                case 0x2008:    /* PUNCTUATION SPACE */
3022                case 0x2009:    /* THIN SPACE */
3023                case 0x200A:    /* HAIR SPACE */
3024                case 0x202f:    /* NARROW NO-BREAK SPACE */
3025                case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3026                case 0x3000:    /* IDEOGRAPHIC SPACE */
3027                break;
3028                }
3029              }
3030            break;
3031    
3032            case OP_NOT_VSPACE:
3033            for (i = 1; i <= min; i++)
3034              {
3035              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3036              GETCHARINC(c, eptr);
3037              switch(c)
3038                {
3039                default: break;
3040                case 0x0a:      /* LF */
3041                case 0x0b:      /* VT */
3042                case 0x0c:      /* FF */
3043                case 0x0d:      /* CR */
3044                case 0x85:      /* NEL */
3045                case 0x2028:    /* LINE SEPARATOR */
3046                case 0x2029:    /* PARAGRAPH SEPARATOR */
3047                RRETURN(MATCH_NOMATCH);
3048                }
3049              }
3050            break;
3051    
3052            case OP_VSPACE:
3053            for (i = 1; i <= min; i++)
3054              {
3055              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3056              GETCHARINC(c, eptr);
3057              switch(c)
3058                {
3059                default: RRETURN(MATCH_NOMATCH);
3060                case 0x0a:      /* LF */
3061                case 0x0b:      /* VT */
3062                case 0x0c:      /* FF */
3063                case 0x0d:      /* CR */
3064                case 0x85:      /* NEL */
3065                case 0x2028:    /* LINE SEPARATOR */
3066                case 0x2029:    /* PARAGRAPH SEPARATOR */
3067                break;
3068                }
3069              }
3070            break;
3071    
3072          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3073          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3074            {            {
# Line 2777  for (;;) Line 3093  for (;;)
3093          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3094            {            {
3095            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
3096               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))               (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
3097              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3098            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3099            }            }
3100          break;          break;
3101    
# Line 2797  for (;;) Line 3113  for (;;)
3113          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3114            {            {
3115            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
3116               (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))               (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
3117              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3118            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
3119            }            }
3120          break;          break;
3121    
# Line 2864  for (;;) Line 3180  for (;;)
3180            }            }
3181          break;          break;
3182    
3183            case OP_NOT_HSPACE:
3184            for (i = 1; i <= min; i++)
3185              {
3186              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3187              switch(*eptr++)
3188                {
3189                default: break;
3190                case 0x09:      /* HT */
3191                case 0x20:      /* SPACE */
3192                case 0xa0:      /* NBSP */
3193                RRETURN(MATCH_NOMATCH);
3194                }
3195              }
3196            break;
3197    
3198            case OP_HSPACE:
3199            for (i = 1; i <= min; i++)
3200              {
3201              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3202              switch(*eptr++)
3203                {
3204                default: RRETURN(MATCH_NOMATCH);
3205                case 0x09:      /* HT */
3206                case 0x20:      /* SPACE */
3207                case 0xa0:      /* NBSP */
3208                break;
3209                }
3210              }
3211            break;
3212    
3213            case OP_NOT_VSPACE:
3214            for (i = 1; i <= min; i++)
3215              {
3216              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3217              switch(*eptr++)
3218                {
3219                default: break;
3220                case 0x0a:      /* LF */
3221                case 0x0b:      /* VT */
3222                case 0x0c:      /* FF */
3223                case 0x0d:      /* CR */
3224                case 0x85:      /* NEL */
3225                RRETURN(MATCH_NOMATCH);
3226                }
3227              }
3228            break;
3229    
3230            case OP_VSPACE:
3231            for (i = 1; i <= min; i++)
3232              {
3233              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3234              switch(*eptr++)
3235                {
3236                default: RRETURN(MATCH_NOMATCH);
3237                case 0x0a:      /* LF */
3238                case 0x0b:      /* VT */
3239                case 0x0c:      /* FF */
3240                case 0x0d:      /* CR */
3241                case 0x85:      /* NEL */
3242                break;
3243                }
3244              }
3245            break;
3246    
3247          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
3248          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3249            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2919  for (;;) Line 3299  for (;;)
3299            case PT_ANY:            case PT_ANY:
3300            for (fi = min;; fi++)            for (fi = min;; fi++)
3301              {              {
3302              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
3303              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3304              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3305              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2930  for (;;) Line 3310  for (;;)
3310            case PT_LAMP:            case PT_LAMP:
3311            for (fi = min;; fi++)            for (fi = min;; fi++)
3312              {              {
3313              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
3314              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3315              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3316              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2945  for (;;) Line 3325  for (;;)
3325            case PT_GC:            case PT_GC:
3326            for (fi = min;; fi++)            for (fi = min;; fi++)
3327              {              {
3328              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
3329              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3330              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3331              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2958  for (;;) Line 3338  for (;;)
3338            case PT_PC:            case PT_PC:
3339            for (fi = min;; fi++)            for (fi = min;; fi++)
3340              {              {
3341              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
3342              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3343              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3344              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2971  for (;;) Line 3351  for (;;)
3351            case PT_SC:            case PT_SC:
3352            for (fi = min;; fi++)            for (fi = min;; fi++)
3353              {              {
3354              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
3355              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3356              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3357              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
# Line 2993  for (;;) Line 3373  for (;;)
3373          {          {
3374          for (fi = min;; fi++)          for (fi = min;; fi++)
3375            {            {
3376            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
3377            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3378            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3379            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
# Line 3022  for (;;) Line 3402  for (;;)
3402          {          {
3403          for (fi = min;; fi++)          for (fi = min;; fi++)
3404            {            {
3405            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3406            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3407            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3408                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
# Line 3055  for (;;) Line 3435  for (;;)
3435                }                }
3436              break;              break;
3437    
3438                case OP_NOT_HSPACE:
3439                switch(c)
3440                  {
3441                  default: break;
3442                  case 0x09:      /* HT */
3443                  case 0x20:      /* SPACE */
3444                  case 0xa0:      /* NBSP */
3445                  case 0x1680:    /* OGHAM SPACE MARK */
3446                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3447                  case 0x2000:    /* EN QUAD */
3448                  case 0x2001:    /* EM QUAD */
3449                  case 0x2002:    /* EN SPACE */
3450                  case 0x2003:    /* EM SPACE */
3451                  case 0x2004:    /* THREE-PER-EM SPACE */
3452                  case 0x2005:    /* FOUR-PER-EM SPACE */
3453                  case 0x2006:    /* SIX-PER-EM SPACE */
3454                  case 0x2007:    /* FIGURE SPACE */
3455                  case 0x2008:    /* PUNCTUATION SPACE */
3456                  case 0x2009:    /* THIN SPACE */
3457                  case 0x200A:    /* HAIR SPACE */
3458                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3459                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3460                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3461                  RRETURN(MATCH_NOMATCH);
3462                  }
3463                break;
3464    
3465                case OP_HSPACE:
3466                switch(c)
3467                  {
3468                  default: RRETURN(MATCH_NOMATCH);
3469                  case 0x09:      /* HT */
3470                  case 0x20:      /* SPACE */
3471                  case 0xa0:      /* NBSP */
3472                  case 0x1680:    /* OGHAM SPACE MARK */
3473                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3474                  case 0x2000:    /* EN QUAD */
3475                  case 0x2001:    /* EM QUAD */
3476                  case 0x2002:    /* EN SPACE */
3477                  case 0x2003:    /* EM SPACE */
3478                  case 0x2004:    /* THREE-PER-EM SPACE */
3479                  case 0x2005:    /* FOUR-PER-EM SPACE */
3480                  case 0x2006:    /* SIX-PER-EM SPACE */
3481                  case 0x2007:    /* FIGURE SPACE */
3482                  case 0x2008:    /* PUNCTUATION SPACE */
3483                  case 0x2009:    /* THIN SPACE */
3484                  case 0x200A:    /* HAIR SPACE */
3485                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3486                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3487                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3488                  break;
3489                  }
3490                break;
3491    
3492                case OP_NOT_VSPACE:
3493                switch(c)
3494                  {
3495                  default: break;
3496                  case 0x0a:      /* LF */
3497                  case 0x0b:      /* VT */
3498                  case 0x0c:      /* FF */
3499                  case 0x0d:      /* CR */
3500                  case 0x85:      /* NEL */
3501                  case 0x2028:    /* LINE SEPARATOR */
3502                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3503                  RRETURN(MATCH_NOMATCH);
3504                  }
3505                break;
3506    
3507                case OP_VSPACE:
3508                switch(c)
3509                  {
3510                  default: RRETURN(MATCH_NOMATCH);
3511                  case 0x0a:      /* LF */
3512                  case 0x0b:      /* VT */
3513                  case 0x0c:      /* FF */
3514                  case 0x0d:      /* CR */
3515                  case 0x85:      /* NEL */
3516                  case 0x2028:    /* LINE SEPARATOR */
3517                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3518                  break;
3519                  }
3520                break;
3521    
3522              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3523              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3524                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 3096  for (;;) Line 3560  for (;;)
3560          {          {
3561          for (fi = min;; fi++)          for (fi = min;; fi++)
3562            {            {
3563            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3564            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3565            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3566                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
# Line 3126  for (;;) Line 3590  for (;;)
3590                }                }
3591              break;              break;
3592    
3593                case OP_NOT_HSPACE:
3594                switch(c)
3595                  {
3596                  default: break;
3597                  case 0x09:      /* HT */
3598                  case 0x20:      /* SPACE */
3599                  case 0xa0:      /* NBSP */
3600                  RRETURN(MATCH_NOMATCH);
3601                  }
3602                break;
3603    
3604                case OP_HSPACE:
3605                switch(c)
3606                  {
3607                  default: RRETURN(MATCH_NOMATCH);
3608                  case 0x09:      /* HT */
3609                  case 0x20:      /* SPACE */
3610                  case 0xa0:      /* NBSP */
3611                  break;
3612                  }
3613                break;
3614    
3615                case OP_NOT_VSPACE:
3616                switch(c)
3617                  {
3618                  default: break;
3619                  case 0x0a:      /* LF */
3620                  case 0x0b:      /* VT */
3621                  case 0x0c:      /* FF */
3622                  case 0x0d:      /* CR */
3623                  case 0x85:      /* NEL */
3624                  RRETURN(MATCH_NOMATCH);
3625                  }
3626                break;
3627    
3628                case OP_VSPACE:
3629                switch(c)
3630                  {
3631                  default: RRETURN(MATCH_NOMATCH);
3632                  case 0x0a:      /* LF */
3633                  case 0x0b:      /* VT */
3634                  case 0x0c:      /* FF */
3635                  case 0x0d:      /* CR */
3636                  case 0x85:      /* NEL */
3637                  break;
3638                  }
3639                break;
3640    
3641              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3642              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3643              break;              break;
# Line 3242  for (;;) Line 3754  for (;;)
3754          if (possessive) continue;          if (possessive) continue;
3755          for(;;)          for(;;)
3756            {            {
3757            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM44);
3758            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3759            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3760            BACKCHAR(eptr);            if (utf8) BACKCHAR(eptr);
3761            }            }
3762          }          }
3763    
# Line 3278  for (;;) Line 3790  for (;;)
3790          if (possessive) continue;          if (possessive) continue;
3791          for(;;)          for(;;)
3792            {            {
3793            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
3794            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3795            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
3796            for (;;)                        /* Move back over one extended */            for (;;)                        /* Move back over one extended */
3797              {              {
3798              int len = 1;              int len = 1;
             BACKCHAR(eptr);  
3799              if (!utf8) c = *eptr; else              if (!utf8) c = *eptr; else
3800                {                {
3801                  BACKCHAR(eptr);
3802                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3803                }                }
3804              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
# Line 3307  for (;;) Line 3819  for (;;)
3819          switch(ctype)          switch(ctype)
3820            {            {
3821            case OP_ANY:            case OP_ANY:
   
           /* Special code is required for UTF8, but when the maximum is  
           unlimited we don't need it, so we repeat the non-UTF8 code. This is  
           probably worth it, because .* is quite a common idiom. */  
   
3822            if (max < INT_MAX)            if (max < INT_MAX)
3823              {              {
3824              if ((ims & PCRE_DOTALL) == 0)              if ((ims & PCRE_DOTALL) == 0)
# Line 3344  for (;;) Line 3851  for (;;)
3851                  {                  {
3852                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3853                  eptr++;                  eptr++;
3854                    while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3855                  }                  }
               break;  
3856                }                }
3857              else              else
3858                {                {
3859                c = max - min;                eptr = md->end_subject;
               if (c > (unsigned int)(md->end_subject - eptr))  
                 c = md->end_subject - eptr;  
               eptr += c;  
3860                }                }
3861              }              }
3862            break;            break;
# Line 3387  for (;;) Line 3891  for (;;)
3891              }              }
3892            break;            break;
3893    
3894              case OP_NOT_HSPACE:
3895              case OP_HSPACE:
3896              for (i = min; i < max; i++)
3897                {
3898                BOOL gotspace;
3899                int len = 1;
3900                if (eptr >= md->end_subject) break;
3901                GETCHARLEN(c, eptr, len);
3902                switch(c)
3903                  {
3904                  default: gotspace = FALSE; break;
3905                  case 0x09:      /* HT */
3906                  case 0x20:      /* SPACE */
3907                  case 0xa0:      /* NBSP */
3908                  case 0x1680:    /* OGHAM SPACE MARK */
3909                  case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
3910                  case 0x2000:    /* EN QUAD */
3911                  case 0x2001:    /* EM QUAD */
3912                  case 0x2002:    /* EN SPACE */
3913                  case 0x2003:    /* EM SPACE */
3914                  case 0x2004:    /* THREE-PER-EM SPACE */
3915                  case 0x2005:    /* FOUR-PER-EM SPACE */
3916                  case 0x2006:    /* SIX-PER-EM SPACE */
3917                  case 0x2007:    /* FIGURE SPACE */
3918                  case 0x2008:    /* PUNCTUATION SPACE */
3919                  case 0x2009:    /* THIN SPACE */
3920                  case 0x200A:    /* HAIR SPACE */
3921                  case 0x202f:    /* NARROW NO-BREAK SPACE */
3922                  case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
3923                  case 0x3000:    /* IDEOGRAPHIC SPACE */
3924                  gotspace = TRUE;
3925                  break;
3926                  }
3927                if (gotspace == (ctype == OP_NOT_HSPACE)) break;
3928                eptr += len;
3929                }
3930              break;
3931    
3932              case OP_NOT_VSPACE:
3933              case OP_VSPACE:
3934              for (i = min; i < max; i++)
3935                {
3936                BOOL gotspace;
3937                int len = 1;
3938                if (eptr >= md->end_subject) break;
3939                GETCHARLEN(c, eptr, len);
3940                switch(c)
3941                  {
3942                  default: gotspace = FALSE; break;
3943                  case 0x0a:      /* LF */
3944                  case 0x0b:      /* VT */
3945                  case 0x0c:      /* FF */
3946                  case 0x0d:      /* CR */
3947                  case 0x85:      /* NEL */
3948                  case 0x2028:    /* LINE SEPARATOR */
3949                  case 0x2029:    /* PARAGRAPH SEPARATOR */
3950                  gotspace = TRUE;
3951                  break;
3952                  }
3953                if (gotspace == (ctype == OP_NOT_VSPACE)) break;
3954                eptr += len;
3955                }
3956              break;
3957    
3958            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3959            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3960              {              {
# Line 3462  for (;;) Line 4030  for (;;)
4030          if (possessive) continue;          if (possessive) continue;
4031          for(;;)          for(;;)
4032            {            {
4033            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM46);
4034            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4035            if (eptr-- == pp) break;        /* Stop if tried at original pos */            if (eptr-- == pp) break;        /* Stop if tried at original pos */
4036            BACKCHAR(eptr);            BACKCHAR(eptr);
4037            }            }
4038          }          }
4039        else        else
4040  #endif  #endif  /* SUPPORT_UTF8 */
4041    
4042        /* Not UTF-8 mode */        /* Not UTF-8 mode */
4043          {          {
# Line 3513  for (;;) Line 4081  for (;;)
4081              }              }
4082            break;            break;
4083    
4084              case OP_NOT_HSPACE:
4085              for (i = min; i < max; i++)
4086                {
4087                if (eptr >= md->end_subject) break;
4088                c = *eptr;
4089                if (c == 0x09 || c == 0x20 || c == 0xa0) break;
4090                eptr++;
4091                }
4092              break;
4093    
4094              case OP_HSPACE:
4095              for (i = min; i < max; i++)
4096                {
4097                if (eptr >= md->end_subject) break;
4098                c = *eptr;
4099                if (c != 0x09 && c != 0x20 && c != 0xa0) break;
4100                eptr++;
4101                }
4102              break;
4103    
4104              case OP_NOT_VSPACE:
4105              for (i = min; i < max; i++)
4106                {
4107                if (eptr >= md->end_subject) break;
4108                c = *eptr;
4109                if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
4110                  break;
4111                eptr++;
4112                }
4113              break;
4114    
4115              case OP_VSPACE:
4116              for (i = min; i < max; i++)
4117                {
4118                if (eptr >= md->end_subject) break;
4119                c = *eptr;
4120                if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
4121                  break;
4122                eptr++;
4123                }
4124              break;
4125    
4126            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
4127            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4128              {              {
# Line 3576  for (;;) Line 4186  for (;;)
4186          if (possessive) continue;          if (possessive) continue;
4187          while (eptr >= pp)          while (eptr >= pp)
4188            {            {
4189            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM47);
4190            eptr--;            eptr--;
4191            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4192            }            }
# Line 3602  for (;;) Line 4212  for (;;)
4212    
4213    }             /* End of main loop */    }             /* End of main loop */
4214  /* Control never reaches here */  /* Control never reaches here */
4215    
4216    
4217    /* When compiling to use the heap rather than the stack for recursive calls to
4218    match(), the RRETURN() macro jumps here. The number that is saved in
4219    frame->Xwhere indicates which label we actually want to return to. */
4220    
4221    #ifdef NO_RECURSE
4222    #define LBL(val) case val: goto L_RM##val;
4223    HEAP_RETURN:
4224    switch (frame->Xwhere)
4225      {
4226      LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4227      LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
4228      LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
4229      LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
4230      LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)
4231      LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47) LBL(48)
4232      LBL(49) LBL(50) LBL(51) LBL(52) LBL(53) LBL(54)
4233      default:
4234      DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4235      return PCRE_ERROR_INTERNAL;
4236      }
4237    #undef LBL
4238    #endif  /* NO_RECURSE */
4239  }  }
4240    
4241    
# Line 3614  Undefine all the macros that were define Line 4248  Undefine all the macros that were define
4248  #ifdef NO_RECURSE  #ifdef NO_RECURSE
4249  #undef eptr  #undef eptr
4250  #undef ecode  #undef ecode
4251    #undef mstart
4252  #undef offset_top  #undef offset_top
4253  #undef ims  #undef ims
4254  #undef eptrb  #undef eptrb
# Line 3686  Returns: > 0 => success; value Line 4321  Returns: > 0 => success; value
4321                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4322  */  */
4323    
4324  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
4325  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4326    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4327    int offsetcount)    int offsetcount)
# Line 3711  const uschar *start_bits = NULL; Line 4346  const uschar *start_bits = NULL;
4346  USPTR start_match = (USPTR)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
4347  USPTR end_subject;  USPTR end_subject;
4348  USPTR req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
 eptrblock eptrchain[EPTR_WORK_SIZE];  
4349    
4350  pcre_study_data internal_study;  pcre_study_data internal_study;
4351  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3797  md->partial = (options & PCRE_PARTIAL) ! Line 4431  md->partial = (options & PCRE_PARTIAL) !
4431  md->hitend = FALSE;  md->hitend = FALSE;
4432    
4433  md->recursive = NULL;                   /* No recursion at top level */  md->recursive = NULL;                   /* No recursion at top level */
 md->eptrchain = eptrchain;              /* Make workspace generally available */  
4434    
4435  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
4436  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
4437    
4438  /* Handle different types of newline. The two bits give four cases. If nothing  /* Handle different types of newline. The three bits give eight cases. If
4439  is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
4440    
4441  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
4442         PCRE_NEWLINE_BITS)         PCRE_NEWLINE_BITS)
4443    {    {
4444    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
# Line 3814  switch ((((options & PCRE_NEWLINE_BITS) Line 4447  switch ((((options & PCRE_NEWLINE_BITS)
4447    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
4448         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
4449    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
4450      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
4451    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
4452    }    }
4453    
4454  if (newline < 0)  if (newline == -2)
4455      {
4456      md->nltype = NLTYPE_ANYCRLF;
4457      }
4458    else if (newline < 0)
4459    {    {
4460    md->nltype = NLTYPE_ANY;    md->nltype = NLTYPE_ANY;
4461    }    }
# Line 3948  the loop runs just once. */ Line 4586  the loop runs just once. */
4586  for(;;)  for(;;)
4587    {    {
4588    USPTR save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
4589      USPTR new_start_match;
4590    
4591    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
4592    
# Line 3992  for(;;) Line 4631  for(;;)
4631        {        {
4632        while (start_match <= end_subject && !WAS_NEWLINE(start_match))        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4633          start_match++;          start_match++;
4634    
4635          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4636          and we are now at a LF, advance the match position by one more character.
4637          */
4638    
4639          if (start_match[-1] == '\r' &&
4640               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4641               start_match < end_subject &&
4642               *start_match == '\n')
4643            start_match++;
4644        }        }
4645      }      }
4646    
# Line 4078  for(;;) Line 4727  for(;;)
4727    
4728    /* OK, we can now run the match. */    /* OK, we can now run the match. */
4729    
4730    md->start_match = start_match;    md->start_match_ptr = start_match;
4731    md->match_call_count = 0;    md->match_call_count = 0;
4732    md->eptrn = 0;                          /* Next free eptrchain slot */    rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
4733    rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);  
4734      switch(rc)
4735        {
4736        /* NOMATCH and PRUNE advance by one character. THEN at this level acts
4737        exactly like PRUNE. */
4738    
4739    /* Any return other than MATCH_NOMATCH breaks the loop. */      case MATCH_NOMATCH:
4740        case MATCH_PRUNE:
4741        case MATCH_THEN:
4742        new_start_match = start_match + 1;
4743    #ifdef SUPPORT_UTF8
4744        if (utf8)
4745          while(new_start_match < end_subject && (*new_start_match & 0xc0) == 0x80)
4746            new_start_match++;
4747    #endif
4748        break;
4749    
4750        /* SKIP passes back the next starting point explicitly. */
4751    
4752        case MATCH_SKIP:
4753        new_start_match = md->start_match_ptr;
4754        break;
4755    
4756    if (rc != MATCH_NOMATCH) break;      /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
4757    
4758        case MATCH_COMMIT:
4759        rc = MATCH_NOMATCH;
4760        goto ENDLOOP;
4761    
4762        /* Any other return is some kind of error. */
4763    
4764        default:
4765        goto ENDLOOP;
4766        }
4767    
4768      /* Control reaches here for the various types of "no match at this point"
4769      result. Reset the code to MATCH_NOMATCH for subsequent checking. */
4770    
4771      rc = MATCH_NOMATCH;
4772    
4773    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first    /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4774    newline in the subject (though it may continue over the newline). Therefore,    newline in the subject (though it may continue over the newline). Therefore,
# Line 4093  for(;;) Line 4776  for(;;)
4776    
4777    if (firstline && IS_NEWLINE(start_match)) break;    if (firstline && IS_NEWLINE(start_match)) break;
4778    
4779    /* Advance the match position by one character. */    /* Advance to new matching position */
4780    
4781    start_match++;    start_match = new_start_match;
 #ifdef SUPPORT_UTF8  
   if (utf8)  
     while(start_match < end_subject && (*start_match & 0xc0) == 0x80)  
       start_match++;  
 #endif  
4782    
4783    /* Break the loop if the pattern is anchored or if we have passed the end of    /* Break the loop if the pattern is anchored or if we have passed the end of
4784    the subject. */    the subject. */
4785    
4786    if (anchored || start_match > end_subject) break;    if (anchored || start_match > end_subject) break;
4787    
4788    /* If we have just passed a CR and the newline option is CRLF or ANY, and we    /* If we have just passed a CR and we are now at a LF, and the pattern does
4789    are now at a LF, advance the match position by one more character. */    not contain any explicit matches for \r or \n, and the newline option is CRLF
4790      or ANY or ANYCRLF, advance the match position by one more character. */
4791    
4792    if (start_match[-1] == '\r' &&    if (start_match[-1] == '\r' &&
4793         (md->nltype == NLTYPE_ANY || md->nllen == 2) &&        start_match < end_subject &&
4794         start_match < end_subject &&        *start_match == '\n' &&
4795         *start_match == '\n')        (re->options & PCRE_HASCRORLF) == 0 &&
4796            (md->nltype == NLTYPE_ANY ||
4797             md->nltype == NLTYPE_ANYCRLF ||
4798             md->nllen == 2))
4799      start_match++;      start_match++;
4800    
4801    }   /* End of for(;;) "bumpalong" loop */    }   /* End of for(;;) "bumpalong" loop */
# Line 4123  for(;;) Line 4805  for(;;)
4805  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping  /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4806  conditions is true:  conditions is true:
4807    
4808  (1) The pattern is anchored;  (1) The pattern is anchored or the match was failed by (*COMMIT);
4809    
4810  (2) We are past the end of the subject;  (2) We are past the end of the subject;
4811    
# Line 4138  processing, copy those that we can. In t Line 4820  processing, copy those that we can. In t
4820  certain parts of the pattern were not used, even though there are more  certain parts of the pattern were not used, even though there are more
4821  capturing parentheses than vector slots. */  capturing parentheses than vector slots. */
4822    
4823    ENDLOOP:
4824    
4825  if (rc == MATCH_MATCH)  if (rc == MATCH_MATCH)
4826    {    {
4827    if (using_temporary_offsets)    if (using_temporary_offsets)
# Line 4158  if (rc == MATCH_MATCH) Line 4842  if (rc == MATCH_MATCH)
4842    
4843    rc = md->offset_overflow? 0 : md->end_offset_top/2;    rc = md->offset_overflow? 0 : md->end_offset_top/2;
4844    
4845    /* If there is space, set up the whole thing as substring 0. */    /* If there is space, set up the whole thing as substring 0. The value of
4846      md->start_match_ptr might be modified if \K was encountered on the success
4847      matching path. */
4848    
4849    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4850      {      {
4851      offsets[0] = start_match - md->start_subject;      offsets[0] = md->start_match_ptr - md->start_subject;
4852      offsets[1] = md->end_match_ptr - md->start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4853      }      }
4854    

Legend:
Removed from v.93  
changed lines
  Added in v.227

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12