/[pcre]/code/tags/pcre-7.6/pcre_exec.c
ViewVC logotype

Diff of /code/tags/pcre-7.6/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 120 by ph10, Mon Mar 12 11:36:14 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  pattern matching using an NFA algorithm, trying to mimic Perl as closely as  pattern matching using an NFA algorithm, trying to mimic Perl as closely as
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45    #define NLBLOCK md             /* Block containing newline information */
46    #define PSSTART start_subject  /* Field containing processed string start */
47    #define PSEND   end_subject    /* Field containing processed string end */
48    
49  #include "pcre_internal.h"  #include "pcre_internal.h"
50    
51    /* The chain of eptrblocks for tail recursions uses memory in stack workspace,
52    obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */
53    
54  /* Structure for building a chain of data that actually lives on the  #define EPTR_WORK_SIZE (1000)
 stack, for holding the values of the subject pointer at the start of each  
 subpattern, so as to detect when an empty string has been matched by a  
 subpattern - to break infinite loops. When NO_RECURSE is set, these blocks  
 are on the heap, not on the stack. */  
   
 typedef struct eptrblock {  
   struct eptrblock *epb_prev;  
   const uschar *epb_saved_eptr;  
 } eptrblock;  
55    
56  /* Flag bits for the match() function */  /* Flag bits for the match() function */
57    
58  #define match_condassert   0x01    /* Called to check a condition assertion */  #define match_condassert     0x01  /* Called to check a condition assertion */
59  #define match_isgroup      0x02    /* Set if start of bracketed group */  #define match_cbegroup       0x02  /* Could-be-empty unlimited repeat group */
60    #define match_tail_recursed  0x04  /* Tail recursive call */
61    
62  /* Non-error returns from the match() function. Error returns are externally  /* Non-error returns from the match() function. Error returns are externally
63  defined PCRE_ERROR_xxx codes, which are all negative. */  defined PCRE_ERROR_xxx codes, which are all negative. */
# Line 101  Returns: nothing Line 98  Returns: nothing
98  static void  static void
99  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)  pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
100  {  {
101  int c;  unsigned int c;
102  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
103  while (length-- > 0)  while (length-- > 0)
104    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);    if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);
# Line 128  Returns: TRUE if matched Line 125  Returns: TRUE if matched
125  */  */
126    
127  static BOOL  static BOOL
128  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register USPTR eptr, int length, match_data *md,
129    unsigned long int ims)    unsigned long int ims)
130  {  {
131  const uschar *p = md->start_subject + md->offset_vector[offset];  USPTR p = md->start_subject + md->offset_vector[offset];
132    
133  #ifdef DEBUG  #ifdef DEBUG
134  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 169  return TRUE; Line 166  return TRUE;
166  ****************************************************************************  ****************************************************************************
167                     RECURSION IN THE match() FUNCTION                     RECURSION IN THE match() FUNCTION
168    
169  The match() function is highly recursive. Some regular expressions can cause  The match() function is highly recursive, though not every recursive call
170  it to recurse thousands of times. I was writing for Unix, so I just let it  increases the recursive depth. Nevertheless, some regular expressions can cause
171  call itself recursively. This uses the stack for saving everything that has  it to recurse to a great depth. I was writing for Unix, so I just let it call
172  to be saved for a recursive call. On Unix, the stack can be large, and this  itself recursively. This uses the stack for saving everything that has to be
173  works fine.  saved for a recursive call. On Unix, the stack can be large, and this works
174    fine.
175  It turns out that on non-Unix systems there are problems with programs that  
176  use a lot of stack. (This despite the fact that every last chip has oodles  It turns out that on some non-Unix-like systems there are problems with
177  of memory these days, and techniques for extending the stack have been known  programs that use a lot of stack. (This despite the fact that every last chip
178  for decades.) So....  has oodles of memory these days, and techniques for extending the stack have
179    been known for decades.) So....
180    
181  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive  There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
182  calls by keeping local variables that need to be preserved in blocks of memory  calls by keeping local variables that need to be preserved in blocks of memory
183  obtained from malloc instead instead of on the stack. Macros are used to  obtained from malloc() instead instead of on the stack. Macros are used to
184  achieve this so that the actual code doesn't look very different to what it  achieve this so that the actual code doesn't look very different to what it
185  always used to.  always used to.
186  ****************************************************************************  ****************************************************************************
187  ***************************************************************************/  ***************************************************************************/
188    
189    
190  /* These versions of the macros use the stack, as normal */  /* These versions of the macros use the stack, as normal. There are debugging
191    versions and production versions. */
192    
193  #ifndef NO_RECURSE  #ifndef NO_RECURSE
194  #define REGISTER register  #define REGISTER register
195  #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)  #ifdef DEBUG
196    #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
197      { \
198      printf("match() called in line %d\n", __LINE__); \
199      rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1); \
200      printf("to line %d\n", __LINE__); \
201      }
202    #define RRETURN(ra) \
203      { \
204      printf("match() returned %d from line %d ", ra, __LINE__); \
205      return ra; \
206      }
207    #else
208    #define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) \
209      rx = match(ra,rb,rc,rd,re,rf,rg,rdepth+1)
210  #define RRETURN(ra) return ra  #define RRETURN(ra) return ra
211    #endif
212    
213  #else  #else
214    
215    
# Line 215  match(), which never changes. */ Line 230  match(), which never changes. */
230      newframe->Xims = re;\      newframe->Xims = re;\
231      newframe->Xeptrb = rf;\      newframe->Xeptrb = rf;\
232      newframe->Xflags = rg;\      newframe->Xflags = rg;\
233        newframe->Xrdepth = frame->Xrdepth + 1;\
234      newframe->Xprevframe = frame;\      newframe->Xprevframe = frame;\
235      frame = newframe;\      frame = newframe;\
236      DPRINTF(("restarting from line %d\n", __LINE__));\      DPRINTF(("restarting from line %d\n", __LINE__));\
# Line 256  typedef struct heapframe { Line 272  typedef struct heapframe {
272    long int Xims;    long int Xims;
273    eptrblock *Xeptrb;    eptrblock *Xeptrb;
274    int Xflags;    int Xflags;
275      unsigned int Xrdepth;
276    
277    /* Function local variables */    /* Function local variables */
278    
# Line 271  typedef struct heapframe { Line 288  typedef struct heapframe {
288    
289    BOOL Xcur_is_word;    BOOL Xcur_is_word;
290    BOOL Xcondition;    BOOL Xcondition;
   BOOL Xminimize;  
291    BOOL Xprev_is_word;    BOOL Xprev_is_word;
292    
293    unsigned long int Xoriginal_ims;    unsigned long int Xoriginal_ims;
294    
295  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
296    int Xprop_type;    int Xprop_type;
297      int Xprop_value;
298    int Xprop_fail_result;    int Xprop_fail_result;
299    int Xprop_category;    int Xprop_category;
300    int Xprop_chartype;    int Xprop_chartype;
301    int Xprop_othercase;    int Xprop_script;
302    int Xprop_test_against;    int Xoclength;
303    int *Xprop_test_variable;    uschar Xocchars[8];
304  #endif  #endif
305    
306    int Xctype;    int Xctype;
307    int Xfc;    unsigned int Xfc;
308    int Xfi;    int Xfi;
309    int Xlength;    int Xlength;
310    int Xmax;    int Xmax;
# Line 320  typedef struct heapframe { Line 337  typedef struct heapframe {
337  *         Match from current position            *  *         Match from current position            *
338  *************************************************/  *************************************************/
339    
340  /* On entry ecode points to the first opcode, and eptr to the first character  /* This function is called recursively in many circumstances. Whenever it
 in the subject string, while eptrb holds the value of eptr at the start of the  
 last bracketed group - used for breaking infinite loops matching zero-length  
 strings. This function is called recursively in many circumstances. Whenever it  
341  returns a negative (error) response, the outer incarnation must also return the  returns a negative (error) response, the outer incarnation must also return the
342  same response.  same response.
343    
# Line 333  performance. Tests using gcc on a SPARC Line 347  performance. Tests using gcc on a SPARC
347  made performance worse.  made performance worse.
348    
349  Arguments:  Arguments:
350     eptr        pointer in subject     eptr        pointer to current character in subject
351     ecode       position in code     ecode       pointer to current position in compiled code
352     offset_top  current top pointer     offset_top  current top pointer
353     md          pointer to "static" info for the match     md          pointer to "static" info for the match
354     ims         current /i, /m, and /s options     ims         current /i, /m, and /s options
# Line 342  Arguments: Line 356  Arguments:
356                   brackets - for testing for empty matches                   brackets - for testing for empty matches
357     flags       can contain     flags       can contain
358                   match_condassert - this is an assertion condition                   match_condassert - this is an assertion condition
359                   match_isgroup - this is the start of a bracketed group                   match_cbegroup - this is the start of an unlimited repeat
360                       group that can match an empty string
361                     match_tail_recursed - this is a tail_recursed group
362       rdepth      the recursion depth
363    
364  Returns:       MATCH_MATCH if matched            )  these values are >= 0  Returns:       MATCH_MATCH if matched            )  these values are >= 0
365                 MATCH_NOMATCH if failed to match  )                 MATCH_NOMATCH if failed to match  )
366                 a negative PCRE_ERROR_xxx value if aborted by an error condition                 a negative PCRE_ERROR_xxx value if aborted by an error condition
367                   (e.g. stopped by recursion limit)                   (e.g. stopped by repeated call or recursion limit)
368  */  */
369    
370  static int  static int
371  match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,  match(REGISTER USPTR eptr, REGISTER const uschar *ecode,
372    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,    int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
373    int flags)    int flags, unsigned int rdepth)
374  {  {
375  /* These variables do not need to be preserved over recursion in this function,  /* These variables do not need to be preserved over recursion in this function,
376  so they can be ordinary variables in all cases. Mark them with "register"  so they can be ordinary variables in all cases. Mark some of them with
377  because they are used a lot in loops. */  "register" because they are used a lot in loops. */
378    
379  register int  rrc;    /* Returns from recursive calls */  register int  rrc;         /* Returns from recursive calls */
380  register int  i;      /* Used for loops not involving calls to RMATCH() */  register int  i;           /* Used for loops not involving calls to RMATCH() */
381  register int  c;      /* Character values not kept over RMATCH() calls */  register unsigned int c;   /* Character values not kept over RMATCH() calls */
382  register BOOL utf8;   /* Local copy of UTF-8 flag for speed */  register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
383    
384    BOOL minimize, possessive; /* Quantifier options */
385    
386  /* When recursion is not being used, all "local" variables that have to be  /* When recursion is not being used, all "local" variables that have to be
387  preserved over calls to RMATCH() are part of a "frame" which is obtained from  preserved over calls to RMATCH() are part of a "frame" which is obtained from
# Line 381  frame->Xoffset_top = offset_top; Line 400  frame->Xoffset_top = offset_top;
400  frame->Xims = ims;  frame->Xims = ims;
401  frame->Xeptrb = eptrb;  frame->Xeptrb = eptrb;
402  frame->Xflags = flags;  frame->Xflags = flags;
403    frame->Xrdepth = rdepth;
404    
405  /* This is where control jumps back to to effect "recursion" */  /* This is where control jumps back to to effect "recursion" */
406    
# Line 394  HEAP_RECURSE: Line 414  HEAP_RECURSE:
414  #define ims                frame->Xims  #define ims                frame->Xims
415  #define eptrb              frame->Xeptrb  #define eptrb              frame->Xeptrb
416  #define flags              frame->Xflags  #define flags              frame->Xflags
417    #define rdepth             frame->Xrdepth
418    
419  /* Ditto for the local variables */  /* Ditto for the local variables */
420    
# Line 411  HEAP_RECURSE: Line 432  HEAP_RECURSE:
432    
433  #define cur_is_word        frame->Xcur_is_word  #define cur_is_word        frame->Xcur_is_word
434  #define condition          frame->Xcondition  #define condition          frame->Xcondition
 #define minimize           frame->Xminimize  
435  #define prev_is_word       frame->Xprev_is_word  #define prev_is_word       frame->Xprev_is_word
436    
437  #define original_ims       frame->Xoriginal_ims  #define original_ims       frame->Xoriginal_ims
438    
439  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
440  #define prop_type          frame->Xprop_type  #define prop_type          frame->Xprop_type
441    #define prop_value         frame->Xprop_value
442  #define prop_fail_result   frame->Xprop_fail_result  #define prop_fail_result   frame->Xprop_fail_result
443  #define prop_category      frame->Xprop_category  #define prop_category      frame->Xprop_category
444  #define prop_chartype      frame->Xprop_chartype  #define prop_chartype      frame->Xprop_chartype
445  #define prop_othercase     frame->Xprop_othercase  #define prop_script        frame->Xprop_script
446  #define prop_test_against  frame->Xprop_test_against  #define oclength           frame->Xoclength
447  #define prop_test_variable frame->Xprop_test_variable  #define occhars            frame->Xocchars
448  #endif  #endif
449    
450  #define ctype              frame->Xctype  #define ctype              frame->Xctype
# Line 447  HEAP_RECURSE: Line 468  HEAP_RECURSE:
468  get preserved during recursion in the normal way. In this environment, fi and  get preserved during recursion in the normal way. In this environment, fi and
469  i, and fc and c, can be the same variables. */  i, and fc and c, can be the same variables. */
470    
471  #else  #else         /* NO_RECURSE not defined */
472  #define fi i  #define fi i
473  #define fc c  #define fc c
474    
475    
476  #ifdef SUPPORT_UTF8                /* Many of these variables are used ony */  #ifdef SUPPORT_UTF8                /* Many of these variables are used only  */
477  const uschar *charptr;             /* small blocks of the code. My normal  */  const uschar *charptr;             /* in small blocks of the code. My normal */
478  #endif                             /* style of coding would have declared  */  #endif                             /* style of coding would have declared    */
479  const uschar *callpat;             /* them within each of those blocks.    */  const uschar *callpat;             /* them within each of those blocks.      */
480  const uschar *data;                /* However, in order to accommodate the */  const uschar *data;                /* However, in order to accommodate the   */
481  const uschar *next;                /* version of this code that uses an    */  const uschar *next;                /* version of this code that uses an      */
482  const uschar *pp;                  /* external "stack" implemented on the  */  USPTR         pp;                  /* external "stack" implemented on the    */
483  const uschar *prev;                /* heap, it is easier to declare them   */  const uschar *prev;                /* heap, it is easier to declare them all */
484  const uschar *saved_eptr;          /* all here, so the declarations can    */  USPTR         saved_eptr;          /* here, so the declarations can be cut   */
485                                     /* be cut out in a block. The only      */                                     /* out in a block. The only declarations  */
486  recursion_info new_recursive;      /* declarations within blocks below are */  recursion_info new_recursive;      /* within blocks below are for variables  */
487                                     /* for variables that do not have to    */                                     /* that do not have to be preserved over  */
488  BOOL cur_is_word;                  /* be preserved over a recursive call   */  BOOL cur_is_word;                  /* a recursive call to RMATCH().          */
489  BOOL condition;                    /* to RMATCH().                         */  BOOL condition;
 BOOL minimize;  
490  BOOL prev_is_word;  BOOL prev_is_word;
491    
492  unsigned long int original_ims;  unsigned long int original_ims;
493    
494  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
495  int prop_type;  int prop_type;
496    int prop_value;
497  int prop_fail_result;  int prop_fail_result;
498  int prop_category;  int prop_category;
499  int prop_chartype;  int prop_chartype;
500  int prop_othercase;  int prop_script;
501  int prop_test_against;  int oclength;
502  int *prop_test_variable;  uschar occhars[8];
503  #endif  #endif
504    
505  int ctype;  int ctype;
# Line 493  int save_offset1, save_offset2, save_off Line 514  int save_offset1, save_offset2, save_off
514  int stacksave[REC_STACK_SAVE_MAX];  int stacksave[REC_STACK_SAVE_MAX];
515    
516  eptrblock newptrb;  eptrblock newptrb;
517  #endif  #endif     /* NO_RECURSE */
518    
519  /* These statements are here to stop the compiler complaining about unitialized  /* These statements are here to stop the compiler complaining about unitialized
520  variables. */  variables. */
521    
522  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
523    prop_value = 0;
524  prop_fail_result = 0;  prop_fail_result = 0;
 prop_test_against = 0;  
 prop_test_variable = NULL;  
525  #endif  #endif
526    
527  /* OK, now we can get on with the real code of the function. Recursion is  
528  specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,  /* This label is used for tail recursion, which is used in a few cases even
529  these just turn into a recursive call to match() and a "return", respectively.  when NO_RECURSE is not defined, in order to reduce the amount of stack that is
530  However, RMATCH isn't like a function call because it's quite a complicated  used. Thanks to Ian Taylor for noticing this possibility and sending the
531  macro. It has to be used in one particular way. This shouldn't, however, impact  original patch. */
532  performance when true recursion is being used. */  
533    TAIL_RECURSE:
534    
535    /* OK, now we can get on with the real code of the function. Recursive calls
536    are specified by the macro RMATCH and RRETURN is used to return. When
537    NO_RECURSE is *not* defined, these just turn into a recursive call to match()
538    and a "return", respectively (possibly with some debugging if DEBUG is
539    defined). However, RMATCH isn't like a function call because it's quite a
540    complicated macro. It has to be used in one particular way. This shouldn't,
541    however, impact performance when true recursion is being used. */
542    
543    /* First check that we haven't called match() too many times, or that we
544    haven't exceeded the recursive call limit. */
545    
546  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);  if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
547    if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
548    
549  original_ims = ims;    /* Save for resetting on ')' */  original_ims = ims;    /* Save for resetting on ')' */
550    
551    #ifdef SUPPORT_UTF8
552  utf8 = md->utf8;       /* Local copy of the flag */  utf8 = md->utf8;       /* Local copy of the flag */
553    #else
554    utf8 = FALSE;
555    #endif
556    
557  /* At the start of a bracketed group, add the current subject pointer to the  /* At the start of a group with an unlimited repeat that may match an empty
558  stack of such pointers, to be re-instated at the end of the group when we hit  string, the match_cbegroup flag is set. When this is the case, add the current
559  the closing ket. When match() is called in other circumstances, we don't add to  subject pointer to the chain of such remembered pointers, to be checked when we
560  this stack. */  hit the closing ket, in order to break infinite loops that match no characters.
561    When match() is called in other circumstances, don't add to the chain. If this
562    is a tail recursion, use a block from the workspace, as the one on the stack is
563    already used. */
564    
565  if ((flags & match_isgroup) != 0)  if ((flags & match_cbegroup) != 0)
566    {    {
567    newptrb.epb_prev = eptrb;    eptrblock *p;
568    newptrb.epb_saved_eptr = eptr;    if ((flags & match_tail_recursed) != 0)
569    eptrb = &newptrb;      {
570        if (md->eptrn >= EPTR_WORK_SIZE) RRETURN(PCRE_ERROR_NULLWSLIMIT);
571        p = md->eptrchain + md->eptrn++;
572        }
573      else p = &newptrb;
574      p->epb_saved_eptr = eptr;
575      p->epb_prev = eptrb;
576      eptrb = p;
577    }    }
578    
579  /* Now start processing the operations. */  /* Now start processing the opcodes. */
580    
581  for (;;)  for (;;)
582    {    {
583      minimize = possessive = FALSE;
584    op = *ecode;    op = *ecode;
   minimize = FALSE;  
585    
586    /* For partial matching, remember if we ever hit the end of the subject after    /* For partial matching, remember if we ever hit the end of the subject after
587    matching at least one subject character. */    matching at least one subject character. */
# Line 543  for (;;) Line 591  for (;;)
591        eptr > md->start_match)        eptr > md->start_match)
592      md->hitend = TRUE;      md->hitend = TRUE;
593    
594    /* Opening capturing bracket. If there is space in the offset vector, save    switch(op)
   the current subject position in the working slot at the top of the vector. We  
   mustn't change the current values of the data slot, because they may be set  
   from a previous iteration of this group, and be referred to by a reference  
   inside the group.  
   
   If the bracket fails to match, we need to restore this value and also the  
   values of the final offsets, in case they were set by a previous iteration of  
   the same bracket.  
   
   If there isn't enough space in the offset vector, treat this as if it were a  
   non-capturing bracket. Don't worry about setting the flag for the error case  
   here; that is handled in the code for KET. */  
   
   if (op > OP_BRA)  
595      {      {
596      number = op - OP_BRA;      /* Handle a capturing bracket. If there is space in the offset vector, save
597        the current subject position in the working slot at the top of the vector.
598      /* For extended extraction brackets (large number), we have to fish out the      We mustn't change the current values of the data slot, because they may be
599      number from a dummy opcode at the start. */      set from a previous iteration of this group, and be referred to by a
600        reference inside the group.
601      if (number > EXTRACT_BASIC_MAX)  
602        number = GET2(ecode, 2+LINK_SIZE);      If the bracket fails to match, we need to restore this value and also the
603        values of the final offsets, in case they were set by a previous iteration
604        of the same bracket.
605    
606        If there isn't enough space in the offset vector, treat this as if it were
607        a non-capturing bracket. Don't worry about setting the flag for the error
608        case here; that is handled in the code for KET. */
609    
610        case OP_CBRA:
611        case OP_SCBRA:
612        number = GET2(ecode, 1+LINK_SIZE);
613      offset = number << 1;      offset = number << 1;
614    
615  #ifdef DEBUG  #ifdef DEBUG
616      printf("start bracket %d subject=", number);      printf("start bracket %d\n", number);
617        printf("subject=");
618      pchars(eptr, 16, TRUE, md);      pchars(eptr, 16, TRUE, md);
619      printf("\n");      printf("\n");
620  #endif  #endif
# Line 584  for (;;) Line 629  for (;;)
629        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));        DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
630        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;        md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
631    
632          flags = (op == OP_SCBRA)? match_cbegroup : 0;
633        do        do
634          {          {
635          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md,
636            match_isgroup);            ims, eptrb, flags);
637          if (rrc != MATCH_NOMATCH) RRETURN(rrc);          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
638          md->capture_last = save_capture_last;          md->capture_last = save_capture_last;
639          ecode += GET(ecode, 1);          ecode += GET(ecode, 1);
# Line 603  for (;;) Line 649  for (;;)
649        RRETURN(MATCH_NOMATCH);        RRETURN(MATCH_NOMATCH);
650        }        }
651    
652      /* Insufficient room for saving captured contents */      /* Insufficient room for saving captured contents. Treat as a non-capturing
653        bracket. */
654    
655      else op = OP_BRA;      DPRINTF(("insufficient capture room: treat as non-capturing\n"));
     }  
656    
657    /* Other types of node can be handled by a switch */      /* Non-capturing bracket. Loop for all the alternatives. When we get to the
658        final alternative within the brackets, we would return the result of a
659        recursive call to match() whatever happened. We can reduce stack usage by
660        turning this into a tail recursion. */
661    
662    switch(op)      case OP_BRA:
663      {      case OP_SBRA:
664      case OP_BRA:     /* Non-capturing bracket: optimized */      DPRINTF(("start non-capturing bracket\n"));
665      DPRINTF(("start bracket 0\n"));      flags = (op >= OP_SBRA)? match_cbegroup : 0;
666      do      for (;;)
667        {        {
668        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,        if (ecode[GET(ecode, 1)] != OP_ALT)
669          match_isgroup);          {
670            ecode += _pcre_OP_lengths[*ecode];
671            flags |= match_tail_recursed;
672            DPRINTF(("bracket 0 tail recursion\n"));
673            goto TAIL_RECURSE;
674            }
675    
676          /* For non-final alternatives, continue the loop for a NOMATCH result;
677          otherwise return. */
678    
679          RMATCH(rrc, eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims,
680            eptrb, flags);
681        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
682        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
683        }        }
684      while (*ecode == OP_ALT);      /* Control never reaches here. */
     DPRINTF(("bracket 0 failed\n"));  
     RRETURN(MATCH_NOMATCH);  
685    
686      /* Conditional group: compilation checked that there are no more than      /* Conditional group: compilation checked that there are no more than
687      two branches. If the condition is false, skipping the first branch takes us      two branches. If the condition is false, skipping the first branch takes us
688      past the end if there is only one branch, but that's OK because that is      past the end if there is only one branch, but that's OK because that is
689      exactly what going to the ket would do. */      exactly what going to the ket would do. As there is only one branch to be
690        obeyed, we can use tail recursion to avoid using another stack frame. */
691    
692      case OP_COND:      case OP_COND:
693      if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */      case OP_SCOND:
694        if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
695          {
696          offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
697          condition = md->recursive != NULL &&
698            (offset == RREF_ANY || offset == md->recursive->group_num);
699          ecode += condition? 3 : GET(ecode, 1);
700          }
701    
702        else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
703        {        {
704        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */        offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
705        condition = (offset == CREF_RECURSE * 2)?        condition = offset < offset_top && md->offset_vector[offset] >= 0;
706          (md->recursive != NULL) :        ecode += condition? 3 : GET(ecode, 1);
707          (offset < offset_top && md->offset_vector[offset] >= 0);        }
708        RMATCH(rrc, eptr, ecode + (condition?  
709          (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),      else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
710          offset_top, md, ims, eptrb, match_isgroup);        {
711        RRETURN(rrc);        condition = FALSE;
712          ecode += GET(ecode, 1);
713        }        }
714    
715      /* The condition is an assertion. Call match() to evaluate it - setting      /* The condition is an assertion. Call match() to evaluate it - setting
716      the final argument TRUE causes it to stop at the end of an assertion. */      the final argument match_condassert causes it to stop at the end of an
717        assertion. */
718    
719      else      else
720        {        {
721        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,
722            match_condassert | match_isgroup);            match_condassert);
723        if (rrc == MATCH_MATCH)        if (rrc == MATCH_MATCH)
724          {          {
725          ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);          condition = TRUE;
726            ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2);
727          while (*ecode == OP_ALT) ecode += GET(ecode, 1);          while (*ecode == OP_ALT) ecode += GET(ecode, 1);
728          }          }
729        else if (rrc != MATCH_NOMATCH)        else if (rrc != MATCH_NOMATCH)
730          {          {
731          RRETURN(rrc);         /* Need braces because of following else */          RRETURN(rrc);         /* Need braces because of following else */
732          }          }
733        else ecode += GET(ecode, 1);        else
734        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,          {
735          match_isgroup);          condition = FALSE;
736        RRETURN(rrc);          ecode += GET(ecode, 1);
737            }
738        }        }
     /* Control never reaches here */  
739    
740      /* Skip over conditional reference or large extraction number data if      /* We are now at the branch that is to be obeyed. As there is only one,
741      encountered. */      we can use tail recursion to avoid using another stack frame. If the second
742        alternative doesn't exist, we can just plough on. */
743    
744      case OP_CREF:      if (condition || *ecode == OP_ALT)
745      case OP_BRANUMBER:        {
746      ecode += 3;        ecode += 1 + LINK_SIZE;
747          flags = match_tail_recursed | ((op == OP_SCOND)? match_cbegroup : 0);
748          goto TAIL_RECURSE;
749          }
750        else
751          {
752          ecode += 1 + LINK_SIZE;
753          }
754      break;      break;
755    
756      /* End of the pattern. If we are in a recursion, we should restore the  
757      offsets appropriately and continue from after the call. */      /* End of the pattern. If we are in a top-level recursion, we should
758        restore the offsets appropriately and continue from after the call. */
759    
760      case OP_END:      case OP_END:
761      if (md->recursive != NULL && md->recursive->group_num == 0)      if (md->recursive != NULL && md->recursive->group_num == 0)
762        {        {
763        recursion_info *rec = md->recursive;        recursion_info *rec = md->recursive;
764        DPRINTF(("Hit the end in a (?0) recursion\n"));        DPRINTF(("End of pattern in a (?0) recursion\n"));
765        md->recursive = rec->prevrec;        md->recursive = rec->prevrec;
766        memmove(md->offset_vector, rec->offset_save,        memmove(md->offset_vector, rec->offset_save,
767          rec->saved_max * sizeof(int));          rec->saved_max * sizeof(int));
# Line 717  for (;;) Line 797  for (;;)
797      case OP_ASSERTBACK:      case OP_ASSERTBACK:
798      do      do
799        {        {
800        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
         match_isgroup);  
801        if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
802        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
803        ecode += GET(ecode, 1);        ecode += GET(ecode, 1);
# Line 744  for (;;) Line 823  for (;;)
823      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
824      do      do
825        {        {
826        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0);
         match_isgroup);  
827        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);        if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
828        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
829        ecode += GET(ecode,1);        ecode += GET(ecode,1);
# Line 766  for (;;) Line 844  for (;;)
844  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
845      if (utf8)      if (utf8)
846        {        {
847        c = GET(ecode,1);        i = GET(ecode, 1);
848        for (i = 0; i < c; i++)        while (i-- > 0)
849          {          {
850          eptr--;          eptr--;
851          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);          if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
# Line 780  for (;;) Line 858  for (;;)
858      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */      /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
859    
860        {        {
861        eptr -= GET(ecode,1);        eptr -= GET(ecode, 1);
862        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);        if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
863        }        }
864    
# Line 800  for (;;) Line 878  for (;;)
878        cb.version          = 1;   /* Version 1 of the callout block */        cb.version          = 1;   /* Version 1 of the callout block */
879        cb.callout_number   = ecode[1];        cb.callout_number   = ecode[1];
880        cb.offset_vector    = md->offset_vector;        cb.offset_vector    = md->offset_vector;
881        cb.subject          = (const char *)md->start_subject;        cb.subject          = (PCRE_SPTR)md->start_subject;
882        cb.subject_length   = md->end_subject - md->start_subject;        cb.subject_length   = md->end_subject - md->start_subject;
883        cb.start_match      = md->start_match - md->start_subject;        cb.start_match      = md->start_match - md->start_subject;
884        cb.current_position = eptr - md->start_subject;        cb.current_position = eptr - md->start_subject;
# Line 837  for (;;) Line 915  for (;;)
915      case OP_RECURSE:      case OP_RECURSE:
916        {        {
917        callpat = md->start_code + GET(ecode, 1);        callpat = md->start_code + GET(ecode, 1);
918        new_recursive.group_num = *callpat - OP_BRA;        new_recursive.group_num = (callpat == md->start_code)? 0 :
919            GET2(callpat, 1 + LINK_SIZE);
       /* For extended extraction brackets (large number), we have to fish out  
       the number from a dummy opcode at the start. */  
   
       if (new_recursive.group_num > EXTRACT_BASIC_MAX)  
         new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);  
920    
921        /* Add to "recursing stack" */        /* Add to "recursing stack" */
922    
# Line 876  for (;;) Line 949  for (;;)
949        restore the offset and recursion data. */        restore the offset and recursion data. */
950    
951        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));        DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
952          flags = (*callpat >= OP_SBRA)? match_cbegroup : 0;
953        do        do
954          {          {
955          RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,          RMATCH(rrc, eptr, callpat + _pcre_OP_lengths[*callpat], offset_top,
956              eptrb, match_isgroup);            md, ims, eptrb, flags);
957          if (rrc == MATCH_MATCH)          if (rrc == MATCH_MATCH)
958            {            {
959              DPRINTF(("Recursion matched\n"));
960            md->recursive = new_recursive.prevrec;            md->recursive = new_recursive.prevrec;
961            if (new_recursive.offset_save != stacksave)            if (new_recursive.offset_save != stacksave)
962              (pcre_free)(new_recursive.offset_save);              (pcre_free)(new_recursive.offset_save);
963            RRETURN(MATCH_MATCH);            RRETURN(MATCH_MATCH);
964            }            }
965          else if (rrc != MATCH_NOMATCH) RRETURN(rrc);          else if (rrc != MATCH_NOMATCH)
966              {
967              DPRINTF(("Recursion gave error %d\n", rrc));
968              RRETURN(rrc);
969              }
970    
971          md->recursive = &new_recursive;          md->recursive = &new_recursive;
972          memcpy(md->offset_vector, new_recursive.offset_save,          memcpy(md->offset_vector, new_recursive.offset_save,
# Line 912  for (;;) Line 991  for (;;)
991      the end of a normal bracket, leaving the subject pointer. */      the end of a normal bracket, leaving the subject pointer. */
992    
993      case OP_ONCE:      case OP_ONCE:
994        {      prev = ecode;
995        prev = ecode;      saved_eptr = eptr;
       saved_eptr = eptr;  
996    
997        do      do
998          {        {
999          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,
1000            eptrb, match_isgroup);          eptrb, 0);
1001          if (rrc == MATCH_MATCH) break;        if (rrc == MATCH_MATCH) break;
1002          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1003          ecode += GET(ecode,1);        ecode += GET(ecode,1);
1004          }        }
1005        while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
1006    
1007        /* If hit the end of the group (which could be repeated), fail */      /* If hit the end of the group (which could be repeated), fail */
1008    
1009        if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);      if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
1010    
1011        /* Continue as from after the assertion, updating the offsets high water      /* Continue as from after the assertion, updating the offsets high water
1012        mark, since extracts may have been taken. */      mark, since extracts may have been taken. */
1013    
1014        do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1015    
1016        offset_top = md->end_offset_top;      offset_top = md->end_offset_top;
1017        eptr = md->end_match_ptr;      eptr = md->end_match_ptr;
1018    
1019        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1020        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1021        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1022        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1023        course of events. */      course of events. */
1024    
1025        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1026          {        {
1027          ecode += 1+LINK_SIZE;        ecode += 1+LINK_SIZE;
1028          break;        break;
1029          }        }
1030    
1031        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1032        preceding bracket, in the appropriate order. We need to reset any options      preceding bracket, in the appropriate order. The second "call" of match()
1033        that changed within the bracket before re-running it, so check the next      uses tail recursion, to avoid using another stack frame. We need to reset
1034        opcode. */      any options that changed within the bracket before re-running it, so
1035        check the next opcode. */
1036    
1037        if (ecode[1+LINK_SIZE] == OP_OPT)      if (ecode[1+LINK_SIZE] == OP_OPT)
1038          {        {
1039          ims = (ims & ~PCRE_IMS) | ecode[4];        ims = (ims & ~PCRE_IMS) | ecode[4];
1040          DPRINTF(("ims set to %02lx at group repeat\n", ims));        DPRINTF(("ims set to %02lx at group repeat\n", ims));
1041          }        }
1042    
1043        if (*ecode == OP_KETRMIN)      if (*ecode == OP_KETRMIN)
1044          {        {
1045          RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);        RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);
1046          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1047          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        ecode = prev;
1048          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        flags = match_tail_recursed;
1049          }        goto TAIL_RECURSE;
1050        else  /* OP_KETRMAX */        }
1051          {      else  /* OP_KETRMAX */
1052          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);        {
1053          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_cbegroup);
1054          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1055          if (rrc != MATCH_NOMATCH) RRETURN(rrc);        ecode += 1 + LINK_SIZE;
1056          }        flags = match_tail_recursed;
1057          goto TAIL_RECURSE;
1058        }        }
1059      RRETURN(MATCH_NOMATCH);      /* Control never gets here */
1060    
1061      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
1062      bracketed group and go to there. */      bracketed group and go to there. */
# Line 994  for (;;) Line 1074  for (;;)
1074      case OP_BRAZERO:      case OP_BRAZERO:
1075        {        {
1076        next = ecode+1;        next = ecode+1;
1077        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);        RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, 0);
1078        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1079        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next,1); while (*next == OP_ALT);
1080        ecode = next + 1+LINK_SIZE;        ecode = next + 1 + LINK_SIZE;
1081        }        }
1082      break;      break;
1083    
1084      case OP_BRAMINZERO:      case OP_BRAMINZERO:
1085        {        {
1086        next = ecode+1;        next = ecode+1;
1087        do next += GET(next,1); while (*next == OP_ALT);        do next += GET(next, 1); while (*next == OP_ALT);
1088        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,        RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
         match_isgroup);  
1089        if (rrc != MATCH_NOMATCH) RRETURN(rrc);        if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1090        ecode++;        ecode++;
1091        }        }
1092      break;      break;
1093    
1094      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. */
     an assertion "group", stop matching and return MATCH_MATCH, but record the  
     current high water mark for use by positive assertions. Do this also  
     for the "once" (not-backup up) groups. */  
1095    
1096      case OP_KET:      case OP_KET:
1097      case OP_KETRMIN:      case OP_KETRMIN:
1098      case OP_KETRMAX:      case OP_KETRMAX:
1099        {      prev = ecode - GET(ecode, 1);
       prev = ecode - GET(ecode, 1);  
       saved_eptr = eptrb->epb_saved_eptr;  
1100    
1101        /* Back up the stack of bracket start pointers. */      /* If this was a group that remembered the subject start, in order to break
1102        infinite repeats of empty string matches, retrieve the subject start from
1103        the chain. Otherwise, set it NULL. */
1104    
1105        eptrb = eptrb->epb_prev;      if (*prev >= OP_SBRA)
1106          {
1107        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||        saved_eptr = eptrb->epb_saved_eptr;   /* Value at start of group */
1108            *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||        eptrb = eptrb->epb_prev;              /* Backup to previous group */
1109            *prev == OP_ONCE)        }
1110          {      else saved_eptr = NULL;
         md->end_match_ptr = eptr;      /* For ONCE */  
         md->end_offset_top = offset_top;  
         RRETURN(MATCH_MATCH);  
         }  
1111    
1112        /* In all other cases except a conditional group we have to check the      /* If we are at the end of an assertion group, stop matching and return
1113        group number back at the start and if necessary complete handling an      MATCH_MATCH, but record the current high water mark for use by positive
1114        extraction by setting the offsets and bumping the high water mark. */      assertions. Do this also for the "once" (atomic) groups. */
1115    
1116        if (*prev != OP_COND)      if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
1117          {          *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
1118          number = *prev - OP_BRA;          *prev == OP_ONCE)
1119          {
1120          md->end_match_ptr = eptr;      /* For ONCE */
1121          md->end_offset_top = offset_top;
1122          RRETURN(MATCH_MATCH);
1123          }
1124    
1125          /* For extended extraction brackets (large number), we have to fish out      /* For capturing groups we have to check the group number back at the start
1126          the number from a dummy opcode at the start. */      and if necessary complete handling an extraction by setting the offsets and
1127        bumping the high water mark. Note that whole-pattern recursion is coded as
1128        a recurse into group 0, so it won't be picked up here. Instead, we catch it
1129        when the OP_END is reached. Other recursion is handled here. */
1130    
1131          if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);      if (*prev == OP_CBRA || *prev == OP_SCBRA)
1132          offset = number << 1;        {
1133          number = GET2(prev, 1+LINK_SIZE);
1134          offset = number << 1;
1135    
1136  #ifdef DEBUG  #ifdef DEBUG
1137          printf("end bracket %d", number);        printf("end bracket %d", number);
1138          printf("\n");        printf("\n");
1139  #endif  #endif
1140    
1141          /* Test for a numbered group. This includes groups called as a result        md->capture_last = number;
1142          of recursion. Note that whole-pattern recursion is coded as a recurse        if (offset >= md->offset_max) md->offset_overflow = TRUE; else
1143          into group 0, so it won't be picked up here. Instead, we catch it when          {
1144          the OP_END is reached. */          md->offset_vector[offset] =
1145              md->offset_vector[md->offset_end - number];
1146          if (number > 0)          md->offset_vector[offset+1] = eptr - md->start_subject;
1147            {          if (offset_top <= offset) offset_top = offset + 2;
1148            md->capture_last = number;          }
1149            if (offset >= md->offset_max) md->offset_overflow = TRUE; else  
1150              {        /* Handle a recursively called group. Restore the offsets
1151              md->offset_vector[offset] =        appropriately and continue from after the call. */
1152                md->offset_vector[md->offset_end - number];  
1153              md->offset_vector[offset+1] = eptr - md->start_subject;        if (md->recursive != NULL && md->recursive->group_num == number)
1154              if (offset_top <= offset) offset_top = offset + 2;          {
1155              }          recursion_info *rec = md->recursive;
1156            DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
1157            /* Handle a recursively called group. Restore the offsets          md->recursive = rec->prevrec;
1158            appropriately and continue from after the call. */          md->start_match = rec->save_start;
1159            memcpy(md->offset_vector, rec->offset_save,
1160            if (md->recursive != NULL && md->recursive->group_num == number)            rec->saved_max * sizeof(int));
1161              {          ecode = rec->after_call;
1162              recursion_info *rec = md->recursive;          ims = original_ims;
1163              DPRINTF(("Recursion (%d) succeeded - continuing\n", number));          break;
             md->recursive = rec->prevrec;  
             md->start_match = rec->save_start;  
             memcpy(md->offset_vector, rec->offset_save,  
               rec->saved_max * sizeof(int));  
             ecode = rec->after_call;  
             ims = original_ims;  
             break;  
             }  
           }  
1164          }          }
1165          }
1166    
1167        /* Reset the value of the ims flags, in case they got changed during      /* For both capturing and non-capturing groups, reset the value of the ims
1168        the group. */      flags, in case they got changed during the group. */
1169    
1170        ims = original_ims;      ims = original_ims;
1171        DPRINTF(("ims reset to %02lx\n", ims));      DPRINTF(("ims reset to %02lx\n", ims));
1172    
1173        /* For a non-repeating ket, just continue at this level. This also      /* For a non-repeating ket, just continue at this level. This also
1174        happens for a repeating ket if no characters were matched in the group.      happens for a repeating ket if no characters were matched in the group.
1175        This is the forcible breaking of infinite loops as implemented in Perl      This is the forcible breaking of infinite loops as implemented in Perl
1176        5.005. If there is an options reset, it will get obeyed in the normal      5.005. If there is an options reset, it will get obeyed in the normal
1177        course of events. */      course of events. */
1178    
1179        if (*ecode == OP_KET || eptr == saved_eptr)      if (*ecode == OP_KET || eptr == saved_eptr)
1180          {        {
1181          ecode += 1 + LINK_SIZE;        ecode += 1 + LINK_SIZE;
1182          break;        break;
1183          }        }
1184    
1185        /* The repeating kets try the rest of the pattern or restart from the      /* The repeating kets try the rest of the pattern or restart from the
1186        preceding bracket, in the appropriate order. */      preceding bracket, in the appropriate order. In the second case, we can use
1187        tail recursion to avoid using another stack frame. */
1188    
1189        if (*ecode == OP_KETRMIN)      flags = (*prev >= OP_SBRA)? match_cbegroup : 0;
         {  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       else  /* OP_KETRMAX */  
         {  
         RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);  
         if (rrc != MATCH_NOMATCH) RRETURN(rrc);  
         }  
       }  
1190    
1191      RRETURN(MATCH_NOMATCH);      if (*ecode == OP_KETRMIN)
1192          {
1193          RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);
1194          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1195          ecode = prev;
1196          flags |= match_tail_recursed;
1197          goto TAIL_RECURSE;
1198          }
1199        else  /* OP_KETRMAX */
1200          {
1201          RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, flags);
1202          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1203          ecode += 1 + LINK_SIZE;
1204          flags = match_tail_recursed;
1205          goto TAIL_RECURSE;
1206          }
1207        /* Control never gets here */
1208    
1209      /* Start of subject unless notbol, or after internal newline if multiline */      /* Start of subject unless notbol, or after internal newline if multiline */
1210    
# Line 1135  for (;;) Line 1212  for (;;)
1212      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);      if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
1213      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1214        {        {
1215        if (eptr != md->start_subject && eptr[-1] != NEWLINE)        if (eptr != md->start_subject &&
1216              (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
1217          RRETURN(MATCH_NOMATCH);          RRETURN(MATCH_NOMATCH);
1218        ecode++;        ecode++;
1219        break;        break;
# Line 1163  for (;;) Line 1241  for (;;)
1241      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
1242        {        {
1243        if (eptr < md->end_subject)        if (eptr < md->end_subject)
1244          { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }          { if (!IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); }
1245        else        else
1246          { if (md->noteol) RRETURN(MATCH_NOMATCH); }          { if (md->noteol) RRETURN(MATCH_NOMATCH); }
1247        ecode++;        ecode++;
# Line 1174  for (;;) Line 1252  for (;;)
1252        if (md->noteol) RRETURN(MATCH_NOMATCH);        if (md->noteol) RRETURN(MATCH_NOMATCH);
1253        if (!md->endonly)        if (!md->endonly)
1254          {          {
1255          if (eptr < md->end_subject - 1 ||          if (eptr != md->end_subject &&
1256             (eptr == md->end_subject - 1 && *eptr != NEWLINE))              (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1257            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1258          ecode++;          ecode++;
1259          break;          break;
1260          }          }
1261        }        }
1262      /* ... else fall through */      /* ... else fall through for endonly */
1263    
1264      /* End of subject assertion (\z) */      /* End of subject assertion (\z) */
1265    
# Line 1193  for (;;) Line 1271  for (;;)
1271      /* End of subject or ending \n assertion (\Z) */      /* End of subject or ending \n assertion (\Z) */
1272    
1273      case OP_EODN:      case OP_EODN:
1274      if (eptr < md->end_subject - 1 ||      if (eptr != md->end_subject &&
1275         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);          (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
1276          RRETURN(MATCH_NOMATCH);
1277      ecode++;      ecode++;
1278      break;      break;
1279    
# Line 1247  for (;;) Line 1326  for (;;)
1326      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1327    
1328      case OP_ANY:      case OP_ANY:
1329      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)      if ((ims & PCRE_DOTALL) == 0)
1330        RRETURN(MATCH_NOMATCH);        {
1331          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1332          }
1333      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
 #ifdef SUPPORT_UTF8  
1334      if (utf8)      if (utf8)
1335        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;        while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
 #endif  
1336      ecode++;      ecode++;
1337      break;      break;
1338    
# Line 1343  for (;;) Line 1422  for (;;)
1422      ecode++;      ecode++;
1423      break;      break;
1424    
1425        case OP_ANYNL:
1426        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1427        GETCHARINCTEST(c, eptr);
1428        switch(c)
1429          {
1430          default: RRETURN(MATCH_NOMATCH);
1431          case 0x000d:
1432          if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1433          break;
1434          case 0x000a:
1435          case 0x000b:
1436          case 0x000c:
1437          case 0x0085:
1438          case 0x2028:
1439          case 0x2029:
1440          break;
1441          }
1442        ecode++;
1443        break;
1444    
1445  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1446      /* Check the next character by Unicode property. We will get here only      /* Check the next character by Unicode property. We will get here only
1447      if the support is in the binary; otherwise a compile-time error occurs. */      if the support is in the binary; otherwise a compile-time error occurs. */
# Line 1352  for (;;) Line 1451  for (;;)
1451      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1452      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1453        {        {
1454        int chartype, rqdtype;        int chartype, script;
1455        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
   
       rqdtype = *(++ecode);  
       ecode++;  
1456    
1457        if (rqdtype >= 128)        switch(ecode[1])
1458          {          {
1459          if ((rqdtype - 128 != category) == (op == OP_PROP))          case PT_ANY:
1460            if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
1461            break;
1462    
1463            case PT_LAMP:
1464            if ((chartype == ucp_Lu ||
1465                 chartype == ucp_Ll ||
1466                 chartype == ucp_Lt) == (op == OP_NOTPROP))
1467            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1468          }           break;
1469        else  
1470          {          case PT_GC:
1471          if ((rqdtype != chartype) == (op == OP_PROP))          if ((ecode[2] != category) == (op == OP_PROP))
1472              RRETURN(MATCH_NOMATCH);
1473            break;
1474    
1475            case PT_PC:
1476            if ((ecode[2] != chartype) == (op == OP_PROP))
1477            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1478            break;
1479    
1480            case PT_SC:
1481            if ((ecode[2] != script) == (op == OP_PROP))
1482              RRETURN(MATCH_NOMATCH);
1483            break;
1484    
1485            default:
1486            RRETURN(PCRE_ERROR_INTERNAL);
1487          }          }
1488    
1489          ecode += 3;
1490        }        }
1491      break;      break;
1492    
# Line 1379  for (;;) Line 1497  for (;;)
1497      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1498      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1499        {        {
1500        int chartype;        int chartype, script;
1501        int othercase;        int category = _pcre_ucp_findprop(c, &chartype, &script);
       int category = _pcre_ucp_findchar(c, &chartype, &othercase);  
1502        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1503        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1504          {          {
# Line 1390  for (;;) Line 1507  for (;;)
1507            {            {
1508            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1509            }            }
1510          category = _pcre_ucp_findchar(c, &chartype, &othercase);          category = _pcre_ucp_findprop(c, &chartype, &script);
1511          if (category != ucp_M) break;          if (category != ucp_M) break;
1512          eptr += len;          eptr += len;
1513          }          }
# Line 1683  for (;;) Line 1800  for (;;)
1800            while (eptr >= pp)            while (eptr >= pp)
1801              {              {
1802              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
             eptr--;  
1803              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1804                eptr--;
1805              }              }
1806            }            }
1807    
# Line 1836  for (;;) Line 1953  for (;;)
1953    
1954        else        else
1955          {          {
1956          int dc;          unsigned int dc;
1957          GETCHARINC(dc, eptr);          GETCHARINC(dc, eptr);
1958          ecode += length;          ecode += length;
1959    
1960          /* If we have Unicode property support, we can use it to test the other          /* If we have Unicode property support, we can use it to test the other
1961          case of the character, if there is one. The result of _pcre_ucp_findchar() is          case of the character, if there is one. */
         < 0 if the char isn't found, and othercase is returned as zero if there  
         isn't one. */  
1962    
1963          if (fc != dc)          if (fc != dc)
1964            {            {
1965  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1966            int chartype;            if (dc != _pcre_ucp_othercase(fc))
           int othercase;  
           if (_pcre_ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)  
1967  #endif  #endif
1968              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
1969            }            }
# Line 1867  for (;;) Line 1980  for (;;)
1980        }        }
1981      break;      break;
1982    
1983      /* Match a single character repeatedly; different opcodes share code. */      /* Match a single character repeatedly. */
1984    
1985      case OP_EXACT:      case OP_EXACT:
1986      min = max = GET2(ecode, 1);      min = max = GET2(ecode, 1);
1987      ecode += 3;      ecode += 3;
1988      goto REPEATCHAR;      goto REPEATCHAR;
1989    
1990        case OP_POSUPTO:
1991        possessive = TRUE;
1992        /* Fall through */
1993    
1994      case OP_UPTO:      case OP_UPTO:
1995      case OP_MINUPTO:      case OP_MINUPTO:
1996      min = 0;      min = 0;
# Line 1882  for (;;) Line 1999  for (;;)
1999      ecode += 3;      ecode += 3;
2000      goto REPEATCHAR;      goto REPEATCHAR;
2001    
2002        case OP_POSSTAR:
2003        possessive = TRUE;
2004        min = 0;
2005        max = INT_MAX;
2006        ecode++;
2007        goto REPEATCHAR;
2008    
2009        case OP_POSPLUS:
2010        possessive = TRUE;
2011        min = 1;
2012        max = INT_MAX;
2013        ecode++;
2014        goto REPEATCHAR;
2015    
2016        case OP_POSQUERY:
2017        possessive = TRUE;
2018        min = 0;
2019        max = 1;
2020        ecode++;
2021        goto REPEATCHAR;
2022    
2023      case OP_STAR:      case OP_STAR:
2024      case OP_MINSTAR:      case OP_MINSTAR:
2025      case OP_PLUS:      case OP_PLUS:
# Line 1913  for (;;) Line 2051  for (;;)
2051    
2052        if (length > 1)        if (length > 1)
2053          {          {
         int oclength = 0;  
         uschar occhars[8];  
   
2054  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2055          int othercase;          unsigned int othercase;
         int chartype;  
2056          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2057               _pcre_ucp_findchar(fc, &chartype, &othercase) >= 0 &&              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
              othercase > 0)  
2058            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2059            else oclength = 0;
2060  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2061    
2062          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2063            {            {
2064            if (memcmp(eptr, charptr, length) == 0) eptr += length;            if (memcmp(eptr, charptr, length) == 0) eptr += length;
2065    #ifdef SUPPORT_UCP
2066            /* Need braces because of following else */            /* Need braces because of following else */
2067            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2068            else            else
# Line 1935  for (;;) Line 2070  for (;;)
2070              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2071              eptr += oclength;              eptr += oclength;
2072              }              }
2073    #else   /* without SUPPORT_UCP */
2074              else { RRETURN(MATCH_NOMATCH); }
2075    #endif  /* SUPPORT_UCP */
2076            }            }
2077    
2078          if (min == max) continue;          if (min == max) continue;
# Line 1947  for (;;) Line 2085  for (;;)
2085              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2086              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2087              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2088    #ifdef SUPPORT_UCP
2089              /* Need braces because of following else */              /* Need braces because of following else */
2090              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }              else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
2091              else              else
# Line 1954  for (;;) Line 2093  for (;;)
2093                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);                if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
2094                eptr += oclength;                eptr += oclength;
2095                }                }
2096    #else   /* without SUPPORT_UCP */
2097                else { RRETURN (MATCH_NOMATCH); }
2098    #endif  /* SUPPORT_UCP */
2099              }              }
2100            /* Control never gets here */            /* Control never gets here */
2101            }            }
2102          else  
2103            else  /* Maximize */
2104            {            {
2105            pp = eptr;            pp = eptr;
2106            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2107              {              {
2108              if (eptr > md->end_subject - length) break;              if (eptr > md->end_subject - length) break;
2109              if (memcmp(eptr, charptr, length) == 0) eptr += length;              if (memcmp(eptr, charptr, length) == 0) eptr += length;
2110    #ifdef SUPPORT_UCP
2111              else if (oclength == 0) break;              else if (oclength == 0) break;
2112              else              else
2113                {                {
2114                if (memcmp(eptr, occhars, oclength) != 0) break;                if (memcmp(eptr, occhars, oclength) != 0) break;
2115                eptr += oclength;                eptr += oclength;
2116                }                }
2117    #else   /* without SUPPORT_UCP */
2118                else break;
2119    #endif  /* SUPPORT_UCP */
2120              }              }
2121            while (eptr >= pp)  
2122              if (possessive) continue;
2123              for(;;)
2124             {             {
2125             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);             RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2126             if (rrc != MATCH_NOMATCH) RRETURN(rrc);             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2127               if (eptr == pp) RRETURN(MATCH_NOMATCH);
2128    #ifdef SUPPORT_UCP
2129               eptr--;
2130               BACKCHAR(eptr);
2131    #else   /* without SUPPORT_UCP */
2132             eptr -= length;             eptr -= length;
2133    #endif  /* SUPPORT_UCP */
2134             }             }
           RRETURN(MATCH_NOMATCH);  
2135            }            }
2136          /* Control never gets here */          /* Control never gets here */
2137          }          }
# Line 2025  for (;;) Line 2179  for (;;)
2179            }            }
2180          /* Control never gets here */          /* Control never gets here */
2181          }          }
2182        else        else  /* Maximize */
2183          {          {
2184          pp = eptr;          pp = eptr;
2185          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2033  for (;;) Line 2187  for (;;)
2187            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;            if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
2188            eptr++;            eptr++;
2189            }            }
2190            if (possessive) continue;
2191          while (eptr >= pp)          while (eptr >= pp)
2192            {            {
2193            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2061  for (;;) Line 2216  for (;;)
2216            }            }
2217          /* Control never gets here */          /* Control never gets here */
2218          }          }
2219        else        else  /* Maximize */
2220          {          {
2221          pp = eptr;          pp = eptr;
2222          for (i = min; i < max; i++)          for (i = min; i < max; i++)
# Line 2069  for (;;) Line 2224  for (;;)
2224            if (eptr >= md->end_subject || fc != *eptr) break;            if (eptr >= md->end_subject || fc != *eptr) break;
2225            eptr++;            eptr++;
2226            }            }
2227            if (possessive) continue;
2228          while (eptr >= pp)          while (eptr >= pp)
2229            {            {
2230            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2121  for (;;) Line 2277  for (;;)
2277      ecode += 3;      ecode += 3;
2278      goto REPEATNOTCHAR;      goto REPEATNOTCHAR;
2279    
2280        case OP_NOTPOSSTAR:
2281        possessive = TRUE;
2282        min = 0;
2283        max = INT_MAX;
2284        ecode++;
2285        goto REPEATNOTCHAR;
2286    
2287        case OP_NOTPOSPLUS:
2288        possessive = TRUE;
2289        min = 1;
2290        max = INT_MAX;
2291        ecode++;
2292        goto REPEATNOTCHAR;
2293    
2294        case OP_NOTPOSQUERY:
2295        possessive = TRUE;
2296        min = 0;
2297        max = 1;
2298        ecode++;
2299        goto REPEATNOTCHAR;
2300    
2301        case OP_NOTPOSUPTO:
2302        possessive = TRUE;
2303        min = 0;
2304        max = GET2(ecode, 1);
2305        ecode += 3;
2306        goto REPEATNOTCHAR;
2307    
2308      case OP_NOTSTAR:      case OP_NOTSTAR:
2309      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2310      case OP_NOTPLUS:      case OP_NOTPLUS:
# Line 2160  for (;;) Line 2344  for (;;)
2344        /* UTF-8 mode */        /* UTF-8 mode */
2345        if (utf8)        if (utf8)
2346          {          {
2347          register int d;          register unsigned int d;
2348          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2349            {            {
2350            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2185  for (;;) Line 2369  for (;;)
2369          /* UTF-8 mode */          /* UTF-8 mode */
2370          if (utf8)          if (utf8)
2371            {            {
2372            register int d;            register unsigned int d;
2373            for (fi = min;; fi++)            for (fi = min;; fi++)
2374              {              {
2375              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2221  for (;;) Line 2405  for (;;)
2405          /* UTF-8 mode */          /* UTF-8 mode */
2406          if (utf8)          if (utf8)
2407            {            {
2408            register int d;            register unsigned int d;
2409            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2410              {              {
2411              int len = 1;              int len = 1;
# Line 2231  for (;;) Line 2415  for (;;)
2415              if (fc == d) break;              if (fc == d) break;
2416              eptr += len;              eptr += len;
2417              }              }
2418            for(;;)          if (possessive) continue;
2419            for(;;)
2420              {              {
2421              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2422              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
# Line 2248  for (;;) Line 2433  for (;;)
2433              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;              if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
2434              eptr++;              eptr++;
2435              }              }
2436              if (possessive) continue;
2437            while (eptr >= pp)            while (eptr >= pp)
2438              {              {
2439              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2269  for (;;) Line 2455  for (;;)
2455        /* UTF-8 mode */        /* UTF-8 mode */
2456        if (utf8)        if (utf8)
2457          {          {
2458          register int d;          register unsigned int d;
2459          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2460            {            {
2461            GETCHARINC(d, eptr);            GETCHARINC(d, eptr);
# Line 2292  for (;;) Line 2478  for (;;)
2478          /* UTF-8 mode */          /* UTF-8 mode */
2479          if (utf8)          if (utf8)
2480            {            {
2481            register int d;            register unsigned int d;
2482            for (fi = min;; fi++)            for (fi = min;; fi++)
2483              {              {
2484              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2327  for (;;) Line 2513  for (;;)
2513          /* UTF-8 mode */          /* UTF-8 mode */
2514          if (utf8)          if (utf8)
2515            {            {
2516            register int d;            register unsigned int d;
2517            for (i = min; i < max; i++)            for (i = min; i < max; i++)
2518              {              {
2519              int len = 1;              int len = 1;
# Line 2336  for (;;) Line 2522  for (;;)
2522              if (fc == d) break;              if (fc == d) break;
2523              eptr += len;              eptr += len;
2524              }              }
2525              if (possessive) continue;
2526            for(;;)            for(;;)
2527              {              {
2528              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2353  for (;;) Line 2540  for (;;)
2540              if (eptr >= md->end_subject || fc == *eptr) break;              if (eptr >= md->end_subject || fc == *eptr) break;
2541              eptr++;              eptr++;
2542              }              }
2543              if (possessive) continue;
2544            while (eptr >= pp)            while (eptr >= pp)
2545              {              {
2546              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2384  for (;;) Line 2572  for (;;)
2572      ecode += 3;      ecode += 3;
2573      goto REPEATTYPE;      goto REPEATTYPE;
2574    
2575        case OP_TYPEPOSSTAR:
2576        possessive = TRUE;
2577        min = 0;
2578        max = INT_MAX;
2579        ecode++;
2580        goto REPEATTYPE;
2581    
2582        case OP_TYPEPOSPLUS:
2583        possessive = TRUE;
2584        min = 1;
2585        max = INT_MAX;
2586        ecode++;
2587        goto REPEATTYPE;
2588    
2589        case OP_TYPEPOSQUERY:
2590        possessive = TRUE;
2591        min = 0;
2592        max = 1;
2593        ecode++;
2594        goto REPEATTYPE;
2595    
2596        case OP_TYPEPOSUPTO:
2597        possessive = TRUE;
2598        min = 0;
2599        max = GET2(ecode, 1);
2600        ecode += 3;
2601        goto REPEATTYPE;
2602    
2603      case OP_TYPESTAR:      case OP_TYPESTAR:
2604      case OP_TYPEMINSTAR:      case OP_TYPEMINSTAR:
2605      case OP_TYPEPLUS:      case OP_TYPEPLUS:
# Line 2408  for (;;) Line 2624  for (;;)
2624        {        {
2625        prop_fail_result = ctype == OP_NOTPROP;        prop_fail_result = ctype == OP_NOTPROP;
2626        prop_type = *ecode++;        prop_type = *ecode++;
2627        if (prop_type >= 128)        prop_value = *ecode++;
         {  
         prop_test_against = prop_type - 128;  
         prop_test_variable = &prop_category;  
         }  
       else  
         {  
         prop_test_against = prop_type;  
         prop_test_variable = &prop_chartype;  
         }  
2628        }        }
2629      else prop_type = -1;      else prop_type = -1;
2630  #endif  #endif
# Line 2434  for (;;) Line 2641  for (;;)
2641      if (min > 0)      if (min > 0)
2642        {        {
2643  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2644        if (prop_type > 0)        if (prop_type >= 0)
2645          {          {
2646          for (i = 1; i <= min; i++)          switch(prop_type)
2647            {            {
2648            GETCHARINC(c, eptr);            case PT_ANY:
2649            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2650            if ((*prop_test_variable == prop_test_against) == prop_fail_result)            for (i = 1; i <= min; i++)
2651              RRETURN(MATCH_NOMATCH);              {
2652            }              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2653          }              GETCHARINC(c, eptr);
2654                }
2655              break;
2656    
2657        /* Match extended Unicode sequences. We will get here only if the            case PT_LAMP:
2658        support is in the binary; otherwise a compile-time error occurs. */            for (i = 1; i <= min; i++)
2659                {
2660                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2661                GETCHARINC(c, eptr);
2662                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2663                if ((prop_chartype == ucp_Lu ||
2664                     prop_chartype == ucp_Ll ||
2665                     prop_chartype == ucp_Lt) == prop_fail_result)
2666                  RRETURN(MATCH_NOMATCH);
2667                }
2668              break;
2669    
2670              case PT_GC:
2671              for (i = 1; i <= min; i++)
2672                {
2673                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2674                GETCHARINC(c, eptr);
2675                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2676                if ((prop_category == prop_value) == prop_fail_result)
2677                  RRETURN(MATCH_NOMATCH);
2678                }
2679              break;
2680    
2681              case PT_PC:
2682              for (i = 1; i <= min; i++)
2683                {
2684                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2685                GETCHARINC(c, eptr);
2686                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2687                if ((prop_chartype == prop_value) == prop_fail_result)
2688                  RRETURN(MATCH_NOMATCH);
2689                }
2690              break;
2691    
2692              case PT_SC:
2693              for (i = 1; i <= min; i++)
2694                {
2695                if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2696                GETCHARINC(c, eptr);
2697                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2698                if ((prop_script == prop_value) == prop_fail_result)
2699                  RRETURN(MATCH_NOMATCH);
2700                }
2701              break;
2702    
2703              default:
2704              RRETURN(PCRE_ERROR_INTERNAL);
2705              }
2706            }
2707    
2708          /* Match extended Unicode sequences. We will get here only if the
2709          support is in the binary; otherwise a compile-time error occurs. */
2710    
2711        else if (ctype == OP_EXTUNI)        else if (ctype == OP_EXTUNI)
2712          {          {
2713          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2714            {            {
2715            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2716            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2717            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2718            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2719              {              {
# Line 2462  for (;;) Line 2722  for (;;)
2722                {                {
2723                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2724                }                }
2725              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2726              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2727              eptr += len;              eptr += len;
2728              }              }
# Line 2481  for (;;) Line 2741  for (;;)
2741          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2742            {            {
2743            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
2744               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
2745              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2746              eptr++;
2747            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2748            }            }
2749          break;          break;
# Line 2491  for (;;) Line 2752  for (;;)
2752          eptr += min;          eptr += min;
2753          break;          break;
2754    
2755            case OP_ANYNL:
2756            for (i = 1; i <= min; i++)
2757              {
2758              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2759              GETCHARINC(c, eptr);
2760              switch(c)
2761                {
2762                default: RRETURN(MATCH_NOMATCH);
2763                case 0x000d:
2764                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2765                break;
2766                case 0x000a:
2767                case 0x000b:
2768                case 0x000c:
2769                case 0x0085:
2770                case 0x2028:
2771                case 0x2029:
2772                break;
2773                }
2774              }
2775            break;
2776    
2777          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2778          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2779            {            {
# Line 2559  for (;;) Line 2842  for (;;)
2842  #endif     /* SUPPORT_UTF8 */  #endif     /* SUPPORT_UTF8 */
2843    
2844        /* Code for the non-UTF-8 case for minimum matching of operators other        /* Code for the non-UTF-8 case for minimum matching of operators other
2845        than OP_PROP and OP_NOTPROP. */        than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
2846          number of bytes present, as this was tested above. */
2847    
2848        switch(ctype)        switch(ctype)
2849          {          {
# Line 2567  for (;;) Line 2851  for (;;)
2851          if ((ims & PCRE_DOTALL) == 0)          if ((ims & PCRE_DOTALL) == 0)
2852            {            {
2853            for (i = 1; i <= min; i++)            for (i = 1; i <= min; i++)
2854              if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);              {
2855                if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
2856                eptr++;
2857                }
2858            }            }
2859          else eptr += min;          else eptr += min;
2860          break;          break;
# Line 2576  for (;;) Line 2863  for (;;)
2863          eptr += min;          eptr += min;
2864          break;          break;
2865    
2866            /* Because of the CRLF case, we can't assume the minimum number of
2867            bytes are present in this case. */
2868    
2869            case OP_ANYNL:
2870            for (i = 1; i <= min; i++)
2871              {
2872              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2873              switch(*eptr++)
2874                {
2875                default: RRETURN(MATCH_NOMATCH);
2876                case 0x000d:
2877                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2878                break;
2879                case 0x000a:
2880                case 0x000b:
2881                case 0x000c:
2882                case 0x0085:
2883                break;
2884                }
2885              }
2886            break;
2887    
2888          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
2889          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2890            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);            if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
# Line 2624  for (;;) Line 2933  for (;;)
2933      if (minimize)      if (minimize)
2934        {        {
2935  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2936        if (prop_type > 0)        if (prop_type >= 0)
2937          {          {
2938          for (fi = min;; fi++)          switch(prop_type)
2939            {            {
2940            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            case PT_ANY:
2941            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            for (fi = min;; fi++)
2942            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              {
2943            GETCHARINC(c, eptr);              RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2944            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2945            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2946              RRETURN(MATCH_NOMATCH);              GETCHARINC(c, eptr);
2947                if (prop_fail_result) RRETURN(MATCH_NOMATCH);
2948                }
2949              /* Control never gets here */
2950    
2951              case PT_LAMP:
2952              for (fi = min;; fi++)
2953                {
2954                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2955                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2956                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2957                GETCHARINC(c, eptr);
2958                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2959                if ((prop_chartype == ucp_Lu ||
2960                     prop_chartype == ucp_Ll ||
2961                     prop_chartype == ucp_Lt) == prop_fail_result)
2962                  RRETURN(MATCH_NOMATCH);
2963                }
2964              /* Control never gets here */
2965    
2966              case PT_GC:
2967              for (fi = min;; fi++)
2968                {
2969                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2970                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2971                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2972                GETCHARINC(c, eptr);
2973                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2974                if ((prop_category == prop_value) == prop_fail_result)
2975                  RRETURN(MATCH_NOMATCH);
2976                }
2977              /* Control never gets here */
2978    
2979              case PT_PC:
2980              for (fi = min;; fi++)
2981                {
2982                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2983                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2984                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2985                GETCHARINC(c, eptr);
2986                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
2987                if ((prop_chartype == prop_value) == prop_fail_result)
2988                  RRETURN(MATCH_NOMATCH);
2989                }
2990              /* Control never gets here */
2991    
2992              case PT_SC:
2993              for (fi = min;; fi++)
2994                {
2995                RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
2996                if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2997                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2998                GETCHARINC(c, eptr);
2999                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3000                if ((prop_script == prop_value) == prop_fail_result)
3001                  RRETURN(MATCH_NOMATCH);
3002                }
3003              /* Control never gets here */
3004    
3005              default:
3006              RRETURN(PCRE_ERROR_INTERNAL);
3007            }            }
3008          }          }
3009    
# Line 2649  for (;;) Line 3018  for (;;)
3018            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3019            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3020            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3021            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3022            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3023            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3024              {              {
# Line 2658  for (;;) Line 3027  for (;;)
3027                {                {
3028                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3029                }                }
3030              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3031              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3032              eptr += len;              eptr += len;
3033              }              }
# Line 2676  for (;;) Line 3045  for (;;)
3045            {            {
3046            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3047            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3048            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3049                   (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&
3050                    IS_NEWLINE(eptr)))
3051                RRETURN(MATCH_NOMATCH);
3052    
3053            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3054            switch(ctype)            switch(ctype)
3055              {              {
3056              case OP_ANY:              case OP_ANY:        /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3057              break;              break;
3058    
3059              case OP_ANYBYTE:              case OP_ANYBYTE:
3060              break;              break;
3061    
3062                case OP_ANYNL:
3063                switch(c)
3064                  {
3065                  default: RRETURN(MATCH_NOMATCH);
3066                  case 0x000d:
3067                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3068                  break;
3069                  case 0x000a:
3070                  case 0x000b:
3071                  case 0x000c:
3072                  case 0x0085:
3073                  case 0x2028:
3074                  case 0x2029:
3075                  break;
3076                  }
3077                break;
3078    
3079              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3080              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)              if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
3081                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
# Line 2731  for (;;) Line 3119  for (;;)
3119            {            {
3120            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
3121            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3122            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject ||
3123                   ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))
3124                RRETURN(MATCH_NOMATCH);
3125    
3126            c = *eptr++;            c = *eptr++;
3127            switch(ctype)            switch(ctype)
3128              {              {
3129              case OP_ANY:              case OP_ANY:   /* This is the DOTALL case */
             if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);  
3130              break;              break;
3131    
3132              case OP_ANYBYTE:              case OP_ANYBYTE:
3133              break;              break;
3134    
3135                case OP_ANYNL:
3136                switch(c)
3137                  {
3138                  default: RRETURN(MATCH_NOMATCH);
3139                  case 0x000d:
3140                  if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3141                  break;
3142                  case 0x000a:
3143                  case 0x000b:
3144                  case 0x000c:
3145                  case 0x0085:
3146                  break;
3147                  }
3148                break;
3149    
3150              case OP_NOT_DIGIT:              case OP_NOT_DIGIT:
3151              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);              if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
3152              break;              break;
# Line 2774  for (;;) Line 3179  for (;;)
3179        /* Control never gets here */        /* Control never gets here */
3180        }        }
3181    
3182      /* If maximizing it is worth using inline code for speed, doing the type      /* If maximizing, it is worth using inline code for speed, doing the type
3183      test once at the start (i.e. keep it out of the loop). Again, keep the      test once at the start (i.e. keep it out of the loop). Again, keep the
3184      UTF-8 and UCP stuff separate. */      UTF-8 and UCP stuff separate. */
3185    
# Line 2783  for (;;) Line 3188  for (;;)
3188        pp = eptr;  /* Remember where we started */        pp = eptr;  /* Remember where we started */
3189    
3190  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3191        if (prop_type > 0)        if (prop_type >= 0)
3192          {          {
3193          for (i = min; i < max; i++)          switch(prop_type)
3194            {            {
3195            int len = 1;            case PT_ANY:
3196            if (eptr >= md->end_subject) break;            for (i = min; i < max; i++)
3197            GETCHARLEN(c, eptr, len);              {
3198            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              int len = 1;
3199            if ((*prop_test_variable == prop_test_against) == prop_fail_result)              if (eptr >= md->end_subject) break;
3200              break;              GETCHARLEN(c, eptr, len);
3201            eptr+= len;              if (prop_fail_result) break;
3202                eptr+= len;
3203                }
3204              break;
3205    
3206              case PT_LAMP:
3207              for (i = min; i < max; i++)
3208                {
3209                int len = 1;
3210                if (eptr >= md->end_subject) break;
3211                GETCHARLEN(c, eptr, len);
3212                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3213                if ((prop_chartype == ucp_Lu ||
3214                     prop_chartype == ucp_Ll ||
3215                     prop_chartype == ucp_Lt) == prop_fail_result)
3216                  break;
3217                eptr+= len;
3218                }
3219              break;
3220    
3221              case PT_GC:
3222              for (i = min; i < max; i++)
3223                {
3224                int len = 1;
3225                if (eptr >= md->end_subject) break;
3226                GETCHARLEN(c, eptr, len);
3227                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3228                if ((prop_category == prop_value) == prop_fail_result)
3229                  break;
3230                eptr+= len;
3231                }
3232              break;
3233    
3234              case PT_PC:
3235              for (i = min; i < max; i++)
3236                {
3237                int len = 1;
3238                if (eptr >= md->end_subject) break;
3239                GETCHARLEN(c, eptr, len);
3240                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3241                if ((prop_chartype == prop_value) == prop_fail_result)
3242                  break;
3243                eptr+= len;
3244                }
3245              break;
3246    
3247              case PT_SC:
3248              for (i = min; i < max; i++)
3249                {
3250                int len = 1;
3251                if (eptr >= md->end_subject) break;
3252                GETCHARLEN(c, eptr, len);
3253                prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3254                if ((prop_script == prop_value) == prop_fail_result)
3255                  break;
3256                eptr+= len;
3257                }
3258              break;
3259            }            }
3260    
3261          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3262    
3263            if (possessive) continue;
3264          for(;;)          for(;;)
3265            {            {
3266            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2816  for (;;) Line 3279  for (;;)
3279            {            {
3280            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3281            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3282            prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3283            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3284            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3285              {              {
# Line 2825  for (;;) Line 3288  for (;;)
3288                {                {
3289                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3290                }                }
3291              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3292              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3293              eptr += len;              eptr += len;
3294              }              }
# Line 2833  for (;;) Line 3296  for (;;)
3296    
3297          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3298    
3299            if (possessive) continue;
3300          for(;;)          for(;;)
3301            {            {
3302            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 2846  for (;;) Line 3310  for (;;)
3310                {                {
3311                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3312                }                }
3313              prop_category = _pcre_ucp_findchar(c, &prop_chartype, &prop_othercase);              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
3314              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3315              eptr--;              eptr--;
3316              }              }
# Line 2865  for (;;) Line 3329  for (;;)
3329            {            {
3330            case OP_ANY:            case OP_ANY:
3331    
3332            /* Special code is required for UTF8, but when the maximum is unlimited            /* Special code is required for UTF8, but when the maximum is
3333            we don't need it, so we repeat the non-UTF8 code. This is probably            unlimited we don't need it, so we repeat the non-UTF8 code. This is
3334            worth it, because .* is quite a common idiom. */            probably worth it, because .* is quite a common idiom. */
3335    
3336            if (max < INT_MAX)            if (max < INT_MAX)
3337              {              {
# Line 2875  for (;;) Line 3339  for (;;)
3339                {                {
3340                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3341                  {                  {
3342                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3343                  eptr++;                  eptr++;
3344                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3345                  }                  }
# Line 2884  for (;;) Line 3348  for (;;)
3348                {                {
3349                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3350                  {                  {
3351                    if (eptr >= md->end_subject) break;
3352                  eptr++;                  eptr++;
3353                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3354                  }                  }
# Line 2898  for (;;) Line 3363  for (;;)
3363                {                {
3364                for (i = min; i < max; i++)                for (i = min; i < max; i++)
3365                  {                  {
3366                  if (eptr >= md->end_subject || *eptr == NEWLINE) break;                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3367                  eptr++;                  eptr++;
3368                  }                  }
3369                break;                break;
# Line 2906  for (;;) Line 3371  for (;;)
3371              else              else
3372                {                {
3373                c = max - min;                c = max - min;
3374                if (c > md->end_subject - eptr) c = md->end_subject - eptr;                if (c > (unsigned int)(md->end_subject - eptr))
3375                    c = md->end_subject - eptr;
3376                eptr += c;                eptr += c;
3377                }                }
3378              }              }
# Line 2916  for (;;) Line 3382  for (;;)
3382    
3383            case OP_ANYBYTE:            case OP_ANYBYTE:
3384            c = max - min;            c = max - min;
3385            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3386                c = md->end_subject - eptr;
3387            eptr += c;            eptr += c;
3388            break;            break;
3389    
3390              case OP_ANYNL:
3391              for (i = min; i < max; i++)
3392                {
3393                int len = 1;
3394                if (eptr >= md->end_subject) break;
3395                GETCHARLEN(c, eptr, len);
3396                if (c == 0x000d)
3397                  {
3398                  if (++eptr >= md->end_subject) break;
3399                  if (*eptr == 0x000a) eptr++;
3400                  }
3401                else
3402                  {
3403                  if (c != 0x000a && c != 0x000b && c != 0x000c &&
3404                      c != 0x0085 && c != 0x2028 && c != 0x2029)
3405                    break;
3406                  eptr += len;
3407                  }
3408                }
3409              break;
3410    
3411            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3412            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3413              {              {
# Line 2992  for (;;) Line 3480  for (;;)
3480    
3481          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3482    
3483            if (possessive) continue;
3484          for(;;)          for(;;)
3485            {            {
3486            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 3012  for (;;) Line 3501  for (;;)
3501              {              {
3502              for (i = min; i < max; i++)              for (i = min; i < max; i++)
3503                {                {
3504                if (eptr >= md->end_subject || *eptr == NEWLINE) break;                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3505                eptr++;                eptr++;
3506                }                }
3507              break;              break;
# Line 3021  for (;;) Line 3510  for (;;)
3510    
3511            case OP_ANYBYTE:            case OP_ANYBYTE:
3512            c = max - min;            c = max - min;
3513            if (c > md->end_subject - eptr) c = md->end_subject - eptr;            if (c > (unsigned int)(md->end_subject - eptr))
3514                c = md->end_subject - eptr;
3515            eptr += c;            eptr += c;
3516            break;            break;
3517    
3518              case OP_ANYNL:
3519              for (i = min; i < max; i++)
3520                {
3521                if (eptr >= md->end_subject) break;
3522                c = *eptr;
3523                if (c == 0x000d)
3524                  {
3525                  if (++eptr >= md->end_subject) break;
3526                  if (*eptr == 0x000a) eptr++;
3527                  }
3528                else
3529                  {
3530                  if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)
3531                    break;
3532                  eptr++;
3533                  }
3534                }
3535              break;
3536    
3537            case OP_NOT_DIGIT:            case OP_NOT_DIGIT:
3538            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3539              {              {
# Line 3085  for (;;) Line 3594  for (;;)
3594    
3595          /* eptr is now past the end of the maximum run */          /* eptr is now past the end of the maximum run */
3596    
3597            if (possessive) continue;
3598          while (eptr >= pp)          while (eptr >= pp)
3599            {            {
3600            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);            RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
# Line 3099  for (;;) Line 3609  for (;;)
3609        }        }
3610      /* Control never gets here */      /* Control never gets here */
3611    
3612      /* There's been some horrible disaster. Since all codes > OP_BRA are      /* There's been some horrible disaster. Arrival here can only mean there is
3613      for capturing brackets, and there shouldn't be any gaps between 0 and      something seriously wrong in the code above or the OP_xxx definitions. */
     OP_BRA, arrival here can only mean there is something seriously wrong  
     in the code above or the OP_xxx definitions. */  
3614    
3615      default:      default:
3616      DPRINTF(("Unknown opcode %d\n", *ecode));      DPRINTF(("Unknown opcode %d\n", *ecode));
3617      RRETURN(PCRE_ERROR_UNKNOWN_NODE);      RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
3618      }      }
3619    
3620    /* Do not stick any code in here without much thought; it is assumed    /* Do not stick any code in here without much thought; it is assumed
# Line 3144  Undefine all the macros that were define Line 3652  Undefine all the macros that were define
3652    
3653  #undef cur_is_word  #undef cur_is_word
3654  #undef condition  #undef condition
 #undef minimize  
3655  #undef prev_is_word  #undef prev_is_word
3656    
3657  #undef original_ims  #undef original_ims
# Line 3200  Returns: > 0 => success; value Line 3707  Returns: > 0 => success; value
3707                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3708  */  */
3709    
3710  PCRE_EXPORT int  PCRE_DATA_SCOPE int
3711  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
3712    const char *subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
3713    int offsetcount)    int offsetcount)
3714  {  {
3715  int rc, resetcount, ocount;  int rc, resetcount, ocount;
3716  int first_byte = -1;  int first_byte = -1;
3717  int req_byte = -1;  int req_byte = -1;
3718  int req_byte2 = -1;  int req_byte2 = -1;
3719  unsigned long int ims = 0;  int newline;
3720    unsigned long int ims;
3721  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
3722  BOOL anchored;  BOOL anchored;
3723  BOOL startline;  BOOL startline;
3724  BOOL firstline;  BOOL firstline;
3725  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
3726  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
3727    BOOL utf8;
3728  match_data match_block;  match_data match_block;
3729    match_data *md = &match_block;
3730  const uschar *tables;  const uschar *tables;
3731  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3732  const uschar *start_match = (const uschar *)subject + start_offset;  USPTR start_match = (USPTR)subject + start_offset;
3733  const uschar *end_subject;  USPTR end_subject;
3734  const uschar *req_byte_ptr = start_match - 1;  USPTR req_byte_ptr = start_match - 1;
3735    eptrblock eptrchain[EPTR_WORK_SIZE];
3736    
3737  pcre_study_data internal_study;  pcre_study_data internal_study;
3738  const pcre_study_data *study;  const pcre_study_data *study;
# Line 3241  if (offsetcount < 0) return PCRE_ERROR_B Line 3752  if (offsetcount < 0) return PCRE_ERROR_B
3752  the default values. */  the default values. */
3753    
3754  study = NULL;  study = NULL;
3755  match_block.match_limit = MATCH_LIMIT;  md->match_limit = MATCH_LIMIT;
3756  match_block.callout_data = NULL;  md->match_limit_recursion = MATCH_LIMIT_RECURSION;
3757    md->callout_data = NULL;
3758    
3759  /* The table pointer is always in native byte order. */  /* The table pointer is always in native byte order. */
3760    
# Line 3254  if (extra_data != NULL) Line 3766  if (extra_data != NULL)
3766    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
3767      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
3768    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
3769      match_block.match_limit = extra_data->match_limit;      md->match_limit = extra_data->match_limit;
3770      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
3771        md->match_limit_recursion = extra_data->match_limit_recursion;
3772    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
3773      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
3774    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;    if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
3775    }    }
3776    
# Line 3286  firstline = (re->options & PCRE_FIRSTLIN Line 3800  firstline = (re->options & PCRE_FIRSTLIN
3800    
3801  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
3802    
3803  match_block.start_code = (const uschar *)external_re + re->name_table_offset +  md->start_code = (const uschar *)external_re + re->name_table_offset +
3804    re->name_count * re->name_entry_size;    re->name_count * re->name_entry_size;
3805    
3806  match_block.start_subject = (const uschar *)subject;  md->start_subject = (USPTR)subject;
3807  match_block.start_offset = start_offset;  md->start_offset = start_offset;
3808  match_block.end_subject = match_block.start_subject + length;  md->end_subject = md->start_subject + length;
3809  end_subject = match_block.end_subject;  end_subject = md->end_subject;
3810    
3811  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
3812  match_block.utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
3813    
3814  match_block.notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
3815  match_block.noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
3816  match_block.notempty = (options & PCRE_NOTEMPTY) != 0;  md->notempty = (options & PCRE_NOTEMPTY) != 0;
3817  match_block.partial = (options & PCRE_PARTIAL) != 0;  md->partial = (options & PCRE_PARTIAL) != 0;
3818  match_block.hitend = FALSE;  md->hitend = FALSE;
3819    
3820    md->recursive = NULL;                   /* No recursion at top level */
3821    md->eptrchain = eptrchain;              /* Make workspace generally available */
3822    
3823  match_block.recursive = NULL;                   /* No recursion at top level */  md->lcc = tables + lcc_offset;
3824    md->ctypes = tables + ctypes_offset;
3825    
3826  match_block.lcc = tables + lcc_offset;  /* Handle different types of newline. The three bits give eight cases. If
3827  match_block.ctypes = tables + ctypes_offset;  nothing is set at run time, whatever was used at compile time applies. */
3828    
3829    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
3830           PCRE_NEWLINE_BITS)
3831      {
3832      case 0: newline = NEWLINE; break;   /* Compile-time default */
3833      case PCRE_NEWLINE_CR: newline = '\r'; break;
3834      case PCRE_NEWLINE_LF: newline = '\n'; break;
3835      case PCRE_NEWLINE_CR+
3836           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
3837      case PCRE_NEWLINE_ANY: newline = -1; break;
3838      default: return PCRE_ERROR_BADNEWLINE;
3839      }
3840    
3841    if (newline < 0)
3842      {
3843      md->nltype = NLTYPE_ANY;
3844      }
3845    else
3846      {
3847      md->nltype = NLTYPE_FIXED;
3848      if (newline > 255)
3849        {
3850        md->nllen = 2;
3851        md->nl[0] = (newline >> 8) & 255;
3852        md->nl[1] = newline & 255;
3853        }
3854      else
3855        {
3856        md->nllen = 1;
3857        md->nl[0] = newline;
3858        }
3859      }
3860    
3861  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
3862  moment. */  moment. */
3863    
3864  if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)
3865    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
3866    
3867  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3868  back the character offset. */  back the character offset. */
3869    
3870  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3871  if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3872    {    {
3873    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
3874      return PCRE_ERROR_BADUTF8;      return PCRE_ERROR_BADUTF8;
# Line 3350  ocount = offsetcount - (offsetcount % 3) Line 3900  ocount = offsetcount - (offsetcount % 3)
3900  if (re->top_backref > 0 && re->top_backref >= ocount/3)  if (re->top_backref > 0 && re->top_backref >= ocount/3)
3901    {    {
3902    ocount = re->top_backref * 3 + 3;    ocount = re->top_backref * 3 + 3;
3903    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    md->offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3904    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3905    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
3906    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
3907    }    }
3908  else match_block.offset_vector = offsets;  else md->offset_vector = offsets;
3909    
3910  match_block.offset_end = ocount;  md->offset_end = ocount;
3911  match_block.offset_max = (2*ocount)/3;  md->offset_max = (2*ocount)/3;
3912  match_block.offset_overflow = FALSE;  md->offset_overflow = FALSE;
3913  match_block.capture_last = -1;  md->capture_last = -1;
3914    
3915  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
3916  this makes a huge difference to execution time when there aren't many brackets  this makes a huge difference to execution time when there aren't many brackets
# Line 3373  if (resetcount > offsetcount) resetcount Line 3923  if (resetcount > offsetcount) resetcount
3923  never be used unless previously set, but they get saved and restored, and so we  never be used unless previously set, but they get saved and restored, and so we
3924  initialize them to avoid reading uninitialized locations. */  initialize them to avoid reading uninitialized locations. */
3925    
3926  if (match_block.offset_vector != NULL)  if (md->offset_vector != NULL)
3927    {    {
3928    register int *iptr = match_block.offset_vector + ocount;    register int *iptr = md->offset_vector + ocount;
3929    register int *iend = iptr - resetcount/2 + 1;    register int *iend = iptr - resetcount/2 + 1;
3930    while (--iptr >= iend) *iptr = -1;    while (--iptr >= iend) *iptr = -1;
3931    }    }
# Line 3392  if (!anchored) Line 3942  if (!anchored)
3942      {      {
3943      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
3944      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
3945        first_byte = match_block.lcc[first_byte];        first_byte = md->lcc[first_byte];
3946      }      }
3947    else    else
3948      if (!startline && study != NULL &&      if (!startline && study != NULL &&
# Line 3410  if ((re->options & PCRE_REQCHSET) != 0) Line 3960  if ((re->options & PCRE_REQCHSET) != 0)
3960    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (tables + fcc_offset)[req_byte];  /* case flipped */
3961    }    }
3962    
3963    
3964    /* ==========================================================================*/
3965    
3966  /* Loop for handling unanchored repeated matching attempts; for anchored regexs  /* Loop for handling unanchored repeated matching attempts; for anchored regexs
3967  the loop runs just once. */  the loop runs just once. */
3968    
3969  do  for(;;)
3970    {    {
3971    const uschar *save_end_subject = end_subject;    USPTR save_end_subject = end_subject;
3972    
3973    /* Reset the maximum number of extractions we might see. */    /* Reset the maximum number of extractions we might see. */
3974    
3975    if (match_block.offset_vector != NULL)    if (md->offset_vector != NULL)
3976      {      {
3977      register int *iptr = match_block.offset_vector;      register int *iptr = md->offset_vector;
3978      register int *iend = iptr + resetcount;      register int *iend = iptr + resetcount;
3979      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
3980      }      }
3981    
3982    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* Advance to a unique first char if possible. If firstline is TRUE, the
3983    start of the match is constrained to the first line of a multiline string.    start of the match is constrained to the first line of a multiline string.
3984    Implement this by temporarily adjusting end_subject so that we stop scanning    That is, the match must be before or at the first newline. Implement this by
3985    at a newline. If the match fails at the newline, later code breaks this loop.    temporarily adjusting end_subject so that we stop scanning at a newline. If
3986    */    the match fails at the newline, later code breaks this loop. */
3987    
3988    if (firstline)    if (firstline)
3989      {      {
3990      const uschar *t = start_match;      USPTR t = start_match;
3991      while (t < save_end_subject && *t != '\n') t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3992      end_subject = t;      end_subject = t;
3993      }      }
3994    
# Line 3445  do Line 3998  do
3998      {      {
3999      if (first_byte_caseless)      if (first_byte_caseless)
4000        while (start_match < end_subject &&        while (start_match < end_subject &&
4001               match_block.lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
4002          start_match++;          start_match++;
4003      else      else
4004        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4005          start_match++;          start_match++;
4006      }      }
4007    
4008    /* Or to just after \n for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
4009    
4010    else if (startline)    else if (startline)
4011      {      {
4012      if (start_match > match_block.start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4013        {        {
4014        while (start_match < end_subject && start_match[-1] != NEWLINE)        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4015          start_match++;          start_match++;
4016        }        }
4017      }      }
# Line 3480  do Line 4033  do
4033    
4034  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
4035    printf(">>>> Match against: ");    printf(">>>> Match against: ");
4036    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, md);
4037    printf("\n");    printf("\n");
4038  #endif  #endif
4039    
# Line 3494  do Line 4047  do
4047    
4048    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end can
4049    take a long time, and give bad performance on quite ordinary patterns. This    take a long time, and give bad performance on quite ordinary patterns. This
4050    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
4051    don't do this when the string is sufficiently long.    string... so we don't do this when the string is sufficiently long.
4052    
4053    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested.
4054    */    */
4055    
4056    if (req_byte >= 0 &&    if (req_byte >= 0 &&
4057        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
4058        !match_block.partial)        !md->partial)
4059      {      {
4060      register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0);      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
4061    
4062      /* We don't need to repeat the search if we haven't yet reached the      /* We don't need to repeat the search if we haven't yet reached the
4063      place we found it at last time. */      place we found it at last time. */
# Line 3527  do Line 4080  do
4080            }            }
4081          }          }
4082    
4083        /* If we can't find the required character, break the matching loop */        /* If we can't find the required character, break the matching loop,
4084          forcing a match failure. */
4085    
4086        if (p >= end_subject) break;        if (p >= end_subject)
4087            {
4088            rc = MATCH_NOMATCH;
4089            break;
4090            }
4091    
4092        /* If we have found the required character, save the point where we        /* If we have found the required character, save the point where we
4093        found it, so that we don't search again next time round the loop if        found it, so that we don't search again next time round the loop if
# Line 3539  do Line 4097  do
4097        }        }
4098      }      }
4099    
4100    /* When a match occurs, substrings will be set for all internal extractions;    /* OK, we can now run the match. */
   we just need to set up the whole thing as substring 0 before returning. If  
   there were too many extractions, set the return code to zero. In the case  
   where we had to get some local store to hold offsets for backreferences, copy  
   those back references that we can. In this case there need not be overflow  
   if certain parts of the pattern were not used. */  
   
   match_block.start_match = start_match;  
   match_block.match_call_count = 0;  
   
   rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,  
     match_isgroup);  
   
   /* When the result is no match, if the subject's first character was a  
   newline and the PCRE_FIRSTLINE option is set, break (which will return  
   PCRE_ERROR_NOMATCH). The option requests that a match occur before the first  
   newline in the subject. Otherwise, advance the pointer to the next character  
   and continue - but the continuation will actually happen only when the  
   pattern is not anchored. */  
4101    
4102    if (rc == MATCH_NOMATCH)    md->start_match = start_match;
4103      {    md->match_call_count = 0;
4104      if (firstline && *start_match == NEWLINE) break;    md->eptrn = 0;                          /* Next free eptrchain slot */
4105      start_match++;    rc = match(start_match, md->start_code, 2, md, ims, NULL, 0, 0);
4106    
4107      /* Any return other than MATCH_NOMATCH breaks the loop. */
4108    
4109      if (rc != MATCH_NOMATCH) break;
4110    
4111      /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
4112      newline in the subject (though it may continue over the newline). Therefore,
4113      if we have just failed to match, starting at a newline, do not continue. */
4114    
4115      if (firstline && IS_NEWLINE(start_match)) break;
4116    
4117      /* Advance the match position by one character. */
4118    
4119      start_match++;
4120  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4121      if (match_block.utf8)    if (utf8)
4122        while(start_match < end_subject && (*start_match & 0xc0) == 0x80)      while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4123          start_match++;        start_match++;
4124  #endif  #endif
     continue;  
     }  
4125    
4126    if (rc != MATCH_MATCH)    /* Break the loop if the pattern is anchored or if we have passed the end of
4127      {    the subject. */
     DPRINTF((">>>> error: returning %d\n", rc));  
     return rc;  
     }  
4128    
4129    /* We have a match! Copy the offset information from temporary store if    if (anchored || start_match > end_subject) break;
   necessary */  
4130    
4131      /* If we have just passed a CR and the newline option is CRLF or ANY, and we
4132      are now at a LF, advance the match position by one more character. */
4133    
4134      if (start_match[-1] == '\r' &&
4135           (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
4136           start_match < end_subject &&
4137           *start_match == '\n')
4138        start_match++;
4139    
4140      }   /* End of for(;;) "bumpalong" loop */
4141    
4142    /* ==========================================================================*/
4143    
4144    /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
4145    conditions is true:
4146    
4147    (1) The pattern is anchored;
4148    
4149    (2) We are past the end of the subject;
4150    
4151    (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
4152        this option requests that a match occur at or before the first newline in
4153        the subject.
4154    
4155    When we have a match and the offset vector is big enough to deal with any
4156    backreferences, captured substring offsets will already be set up. In the case
4157    where we had to get some local store to hold offsets for backreference
4158    processing, copy those that we can. In this case there need not be overflow if
4159    certain parts of the pattern were not used, even though there are more
4160    capturing parentheses than vector slots. */
4161    
4162    if (rc == MATCH_MATCH)
4163      {
4164    if (using_temporary_offsets)    if (using_temporary_offsets)
4165      {      {
4166      if (offsetcount >= 4)      if (offsetcount >= 4)
4167        {        {
4168        memcpy(offsets + 2, match_block.offset_vector + 2,        memcpy(offsets + 2, md->offset_vector + 2,
4169          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
4170        DPRINTF(("Copied offsets from temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
4171        }        }
4172      if (match_block.end_offset_top > offsetcount)      if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE;
       match_block.offset_overflow = TRUE;  
   
4173      DPRINTF(("Freeing temporary memory\n"));      DPRINTF(("Freeing temporary memory\n"));
4174      (pcre_free)(match_block.offset_vector);      (pcre_free)(md->offset_vector);
4175      }      }
4176    
4177    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;    /* Set the return code to the number of captured strings, or 0 if there are
4178      too many to fit into the vector. */
4179    
4180      rc = md->offset_overflow? 0 : md->end_offset_top/2;
4181    
4182      /* If there is space, set up the whole thing as substring 0. */
4183    
4184    if (offsetcount < 2) rc = 0; else    if (offsetcount < 2) rc = 0; else
4185      {      {
4186      offsets[0] = start_match - match_block.start_subject;      offsets[0] = start_match - md->start_subject;
4187      offsets[1] = match_block.end_match_ptr - match_block.start_subject;      offsets[1] = md->end_match_ptr - md->start_subject;
4188      }      }
4189    
4190    DPRINTF((">>>> returning %d\n", rc));    DPRINTF((">>>> returning %d\n", rc));
4191    return rc;    return rc;
4192    }    }
4193    
4194  /* This "while" is the end of the "do" above */  /* Control gets here if there has been an error, or if the overall match
4195    attempt has failed at all permitted starting positions. */
 while (!anchored && start_match <= end_subject);  
4196    
4197  if (using_temporary_offsets)  if (using_temporary_offsets)
4198    {    {
4199    DPRINTF(("Freeing temporary memory\n"));    DPRINTF(("Freeing temporary memory\n"));
4200    (pcre_free)(match_block.offset_vector);    (pcre_free)(md->offset_vector);
4201    }    }
4202    
4203  if (match_block.partial && match_block.hitend)  if (rc != MATCH_NOMATCH)
4204      {
4205      DPRINTF((">>>> error: returning %d\n", rc));
4206      return rc;
4207      }
4208    else if (md->partial && md->hitend)
4209    {    {
4210    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));    DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
4211    return PCRE_ERROR_PARTIAL;    return PCRE_ERROR_PARTIAL;

Legend:
Removed from v.85  
changed lines
  Added in v.120

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12