/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 406 by ph10, Mon Mar 23 12:05:43 2009 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* PCRE is a library of functions to support regular expressions whose syntax  /* PCRE is a library of functions to support regular expressions whose syntax
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language (but see
7    below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 40  POSSIBILITY OF SUCH DAMAGE.
40    
41    
42  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
43  alternative matching function that uses a DFA algorithm. This is NOT Perl-  alternative matching function that uses a sort of DFA algorithm (not a true
44  compatible, but it has advantages in certain applications. */  FSM). This is NOT Perl- compatible, but it has advantages in certain
45    applications. */
46    
47    
48    #ifdef HAVE_CONFIG_H
49    #include "config.h"
50    #endif
51    
52    #define NLBLOCK md             /* Block containing newline information */
53    #define PSSTART start_subject  /* Field containing processed string start */
54    #define PSEND   end_subject    /* Field containing processed string end */
55    
56  #include "pcre_internal.h"  #include "pcre_internal.h"
57    
58    
# Line 51  compatible, but it has advantages in cer Line 61  compatible, but it has advantages in cer
61  #define SP "                   "  #define SP "                   "
62    
63    
   
64  /*************************************************  /*************************************************
65  *      Code parameters and static tables         *  *      Code parameters and static tables         *
66  *************************************************/  *************************************************/
67    
68  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
69  into others, under special conditions. A gap of 10 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
70  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
71    never stored, so we push them well clear of the normal opcodes. */
72  #define OP_PROP_EXTRA    (EXTRACT_BASIC_MAX+1)  
73  #define OP_EXTUNI_EXTRA  (EXTRACT_BASIC_MAX+11)  #define OP_PROP_EXTRA       300
74    #define OP_EXTUNI_EXTRA     320
75    #define OP_ANYNL_EXTRA      340
76    #define OP_HSPACE_EXTRA     360
77    #define OP_VSPACE_EXTRA     380
78    
79    
80  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
81  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
82  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
83  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
84  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
85    that follow must also be modified. */
86    
87  static uschar coptable[] = {  static const uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90    0, 0,                          /* Any, Anybyte                           */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91      0, 0, 0,                       /* Any, AllAny, Anybyte                   */
92    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
95    1,                             /* Char                                   */    1,                             /* Char                                   */
96    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 82  static uschar coptable[] = { Line 98  static uschar coptable[] = {
98    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
99    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
100    3, 3, 3,                       /* upto, minupto, exact                   */    3, 3, 3,                       /* upto, minupto, exact                   */
101      1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
102    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
103    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
104    3, 3, 3,                       /* NOT upto, minupto, exact               */    3, 3, 3,                       /* NOT upto, minupto, exact               */
105      1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
106    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
107    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
108    3, 3, 3,                       /* Type upto, minupto, exact              */    3, 3, 3,                       /* Type upto, minupto, exact              */
109      1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
110    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
111    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
112    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 106  static uschar coptable[] = { Line 125  static uschar coptable[] = {
125    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
126    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
127    0,                             /* Reverse                                */    0,                             /* Reverse                                */
128    0,                             /* Once                                   */    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
129    0,                             /* COND                                   */    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131      0,                             /* RREF                                   */
132      0,                             /* DEF                                    */
133    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134    0,                             /* BRANUMBER                              */    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135    0                              /* BRA                                    */    0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static const uschar toptable1[] = {
142    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
145    ctype_word,  ctype_word,    ctype_word,  ctype_word,
146    0                               /* OP_ANY */    0, 0                            /* OP_ANY, OP_ALLANY */
147  };  };
148    
149  static uschar toptable2[] = {  static const uschar toptable2[] = {
150    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
153    ctype_word,  0,    ctype_word,  0,
154    1                               /* OP_ANY */    1, 1                            /* OP_ANY, OP_ALLANY */
155  };  };
156    
157    
# Line 202  Arguments: Line 223  Arguments:
223    rlevel            function call recursion level    rlevel            function call recursion level
224    recursing         regex recursive call level    recursing         regex recursive call level
225    
226  Returns:            > 0 =>  Returns:            > 0 => number of match offset pairs placed in offsets
227                      = 0 =>                      = 0 => offsets overflowed; longest matches are present
228                       -1 => failed to match                       -1 => failed to match
229                     < -1 => some kind of unexpected problem                     < -1 => some kind of unexpected problem
230    
# Line 277  stateblock *next_active_state, *next_new Line 298  stateblock *next_active_state, *next_new
298    
299  const uschar *ctypes, *lcc, *fcc;  const uschar *ctypes, *lcc, *fcc;
300  const uschar *ptr;  const uschar *ptr;
301  const uschar *end_code;  const uschar *end_code, *first_op;
302    
303  int active_count, new_count, match_count;  int active_count, new_count, match_count;
304    
# Line 288  const uschar *start_subject = md->start_ Line 309  const uschar *start_subject = md->start_
309  const uschar *end_subject = md->end_subject;  const uschar *end_subject = md->end_subject;
310  const uschar *start_code = md->start_code;  const uschar *start_code = md->start_code;
311    
312    #ifdef SUPPORT_UTF8
313  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
314    #else
315    BOOL utf8 = FALSE;
316    #endif
317    
318  rlevel++;  rlevel++;
319  offsetcount &= (-2);  offsetcount &= (-2);
# Line 311  active_states = (stateblock *)(workspace Line 336  active_states = (stateblock *)(workspace
336  next_new_state = new_states = active_states + wscount;  next_new_state = new_states = active_states + wscount;
337  new_count = 0;  new_count = 0;
338    
339    first_op = this_start_code + 1 + LINK_SIZE +
340      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
341    
342  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
343  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
344  makes is possible to use this function recursively, when we want to stop at a  makes is possible to use this function recursively, when we want to stop at a
# Line 320  If the first opcode in the first alterna Line 348  If the first opcode in the first alterna
348  a backward assertion. In that case, we have to find out the maximum amount to  a backward assertion. In that case, we have to find out the maximum amount to
349  move back, and set up each alternative appropriately. */  move back, and set up each alternative appropriately. */
350    
351  if (this_start_code[1+LINK_SIZE] == OP_REVERSE)  if (*first_op == OP_REVERSE)
352    {    {
353    int max_back = 0;    int max_back = 0;
354    int gone_back;    int gone_back;
# Line 402  else Line 430  else
430    
431    else    else
432      {      {
433        int length = 1 + LINK_SIZE +
434          ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
435      do      do
436        {        {
437        ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);        ADD_NEW(end_code - start_code + length, 0);
438        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
439          length = 1 + LINK_SIZE;
440        }        }
441      while (*end_code == OP_ALT);      while (*end_code == OP_ALT);
442      }      }
# Line 421  ptr = current_subject; Line 452  ptr = current_subject;
452  for (;;)  for (;;)
453    {    {
454    int i, j;    int i, j;
455    int c, d, clen, dlen;    int clen, dlen;
456      unsigned int c, d;
457    
458    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
459    new state list. */    new state list. */
# Line 457  for (;;) Line 489  for (;;)
489    
490    if (ptr < end_subject)    if (ptr < end_subject)
491      {      {
492      clen = 1;      clen = 1;        /* Number of bytes in the character */
493  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
494      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf8) { GETCHARLEN(c, ptr, clen); } else
495  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
# Line 465  for (;;) Line 497  for (;;)
497      }      }
498    else    else
499      {      {
500      clen = 0;    /* At end subject */      clen = 0;        /* This indicates the end of the subject */
501      c = -1;      c = NOTACHAR;    /* This value should never actually be used */
502      }      }
503    
504    /* Scan up the active states and act on each one. The result of an action    /* Scan up the active states and act on each one. The result of an action
# Line 479  for (;;) Line 511  for (;;)
511      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
512      const uschar *code;      const uschar *code;
513      int state_offset = current_state->offset;      int state_offset = current_state->offset;
514      int count, codevalue;      int count, codevalue, rrc;
     int chartype, othercase;  
515    
516  #ifdef DEBUG  #ifdef DEBUG
517      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
518      if (c < 0) printf("-1\n");      if (clen == 0) printf("EOL\n");
519        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
520          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
521  #endif  #endif
# Line 528  for (;;) Line 559  for (;;)
559    
560      code = start_code + state_offset;      code = start_code + state_offset;
561      codevalue = *code;      codevalue = *code;
     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */  
562    
563      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
564      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
# Line 536  for (;;) Line 566  for (;;)
566      permitted.      permitted.
567    
568      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
569      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
570      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
571      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
572      opcodes. */      */
573    
574      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
575        {        {
# Line 550  for (;;) Line 580  for (;;)
580        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
581        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
582          {          {
583          if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;          switch(d)
584          if (d >= OP_NOTPROP)            {
585            codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;            case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
586              case OP_NOTPROP:
587              case OP_PROP: codevalue += OP_PROP_EXTRA; break;
588              case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
589              case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
590              case OP_NOT_HSPACE:
591              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
592              case OP_NOT_VSPACE:
593              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
594              default: break;
595              }
596          }          }
597        }        }
598      else      else
599        {        {
600        dlen = 0;         /* Not strictly necessary, but compilers moan */        dlen = 0;         /* Not strictly necessary, but compilers moan */
601        d = -1;           /* if these variables are not set. */        d = NOTACHAR;     /* if these variables are not set. */
602        }        }
603    
604    
# Line 620  for (;;) Line 660  for (;;)
660    
661        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
662        case OP_BRA:        case OP_BRA:
663          case OP_SBRA:
664        do        do
665          {          {
666          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
# Line 629  for (;;) Line 670  for (;;)
670        break;        break;
671    
672        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
673          case OP_CBRA:
674          case OP_SCBRA:
675          ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
676          code += GET(code, 1);
677          while (*code == OP_ALT)
678            {
679            ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
680            code += GET(code, 1);
681            }
682          break;
683    
684          /*-----------------------------------------------------------------*/
685        case OP_BRAZERO:        case OP_BRAZERO:
686        case OP_BRAMINZERO:        case OP_BRAMINZERO:
687        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
# Line 638  for (;;) Line 691  for (;;)
691        break;        break;
692    
693        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
694        case OP_BRANUMBER:        case OP_SKIPZERO:
695        ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);        code += 1 + GET(code, 2);
696          while (*code == OP_ALT) code += GET(code, 1);
697          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
698        break;        break;
699    
700        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
701        case OP_CIRC:        case OP_CIRC:
702        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
703            ((ims & PCRE_MULTILINE) != 0 && ptr[-1] == NEWLINE))            ((ims & PCRE_MULTILINE) != 0 &&
704                ptr != end_subject &&
705                WAS_NEWLINE(ptr)))
706          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
707        break;        break;
708    
# Line 679  for (;;) Line 736  for (;;)
736    
737        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
738        case OP_ANY:        case OP_ANY:
739        if (clen > 0 && (c != NEWLINE || (ims & PCRE_DOTALL) != 0))        if (clen > 0 && !IS_NEWLINE(ptr))
740            { ADD_NEW(state_offset + 1, 0); }
741          break;
742    
743          /*-----------------------------------------------------------------*/
744          case OP_ALLANY:
745          if (clen > 0)
746          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
747        break;        break;
748    
749        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
750        case OP_EODN:        case OP_EODN:
751        if (clen == 0 || (c == NEWLINE && ptr + 1 == end_subject))        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
752          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
753        break;        break;
754    
# Line 693  for (;;) Line 756  for (;;)
756        case OP_DOLL:        case OP_DOLL:
757        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
758          {          {
759          if (clen == 0 || (c == NEWLINE && (ptr + 1 == end_subject ||          if (clen == 0 ||
760                                  (ims & PCRE_MULTILINE) != 0)))              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
761                   ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
762                ))
763            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
764          }          }
765        else if (c == NEWLINE && (ims & PCRE_MULTILINE) != 0)        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
766          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
767        break;        break;
768    
# Line 746  for (;;) Line 811  for (;;)
811        break;        break;
812    
813    
 #ifdef SUPPORT_UCP  
   
814        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
815        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
816        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
817        */        */
818    
819    #ifdef SUPPORT_UCP
820        case OP_PROP:        case OP_PROP:
821        case OP_NOTPROP:        case OP_NOTPROP:
822        if (clen > 0)        if (clen > 0)
823          {          {
824          int rqdtype, category;          BOOL OK;
825          category = _pcre_ucp_findchar(c, &chartype, &othercase);          const ucd_record * prop = GET_UCD(c);
826          rqdtype = code[1];          switch(code[1])
         if (rqdtype >= 128)  
           {  
           if ((rqdtype - 128 == category) == (codevalue == OP_PROP))  
             { ADD_NEW(state_offset + 2, 0); }  
           }  
         else  
827            {            {
828            if ((rqdtype == chartype) == (codevalue == OP_PROP))            case PT_ANY:
829              { ADD_NEW(state_offset + 2, 0); }            OK = TRUE;
830              break;
831    
832              case PT_LAMP:
833              OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
834              break;
835    
836              case PT_GC:
837              OK = _pcre_ucp_gentype[prop->chartype] == code[2];
838              break;
839    
840              case PT_PC:
841              OK = prop->chartype == code[2];
842              break;
843    
844              case PT_SC:
845              OK = prop->script == code[2];
846              break;
847    
848              /* Should never occur, but keep compilers from grumbling. */
849    
850              default:
851              OK = codevalue != OP_PROP;
852              break;
853            }            }
854    
855            if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
856          }          }
857        break;        break;
858  #endif  #endif
# Line 779  for (;;) Line 862  for (;;)
862  /* ========================================================================== */  /* ========================================================================== */
863        /* These opcodes likewise inspect the subject character, but have an        /* These opcodes likewise inspect the subject character, but have an
864        argument that is not a data character. It is one of these opcodes:        argument that is not a data character. It is one of these opcodes:
865        OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,        OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
866        OP_NOT_WORDCHAR. The value is loaded into d. */        OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
867    
868        case OP_TYPEPLUS:        case OP_TYPEPLUS:
869        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
870          case OP_TYPEPOSPLUS:
871        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
872        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
873        if (clen > 0)        if (clen > 0)
874          {          {
875          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
876              (c < 256 &&              (c < 256 &&
877                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
878                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
879            {            {
880              if (count > 0 && codevalue == OP_TYPEPOSPLUS)
881                {
882                active_count--;            /* Remove non-match possibility */
883                next_active_state--;
884                }
885            count++;            count++;
886            ADD_NEW(state_offset, count);            ADD_NEW(state_offset, count);
887            }            }
# Line 802  for (;;) Line 891  for (;;)
891        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
892        case OP_TYPEQUERY:        case OP_TYPEQUERY:
893        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
894          case OP_TYPEPOSQUERY:
895        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
896        if (clen > 0)        if (clen > 0)
897          {          {
898          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
899              (c < 256 &&              (c < 256 &&
900                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
901                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
902            {            {
903              if (codevalue == OP_TYPEPOSQUERY)
904                {
905                active_count--;            /* Remove non-match possibility */
906                next_active_state--;
907                }
908            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
909            }            }
910          }          }
# Line 818  for (;;) Line 913  for (;;)
913        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
914        case OP_TYPESTAR:        case OP_TYPESTAR:
915        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
916          case OP_TYPEPOSSTAR:
917        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
918        if (clen > 0)        if (clen > 0)
919          {          {
920          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
921              (c < 256 &&              (c < 256 &&
922                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
923                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
924            {            {
925              if (codevalue == OP_TYPEPOSSTAR)
926                {
927                active_count--;            /* Remove non-match possibility */
928                next_active_state--;
929                }
930            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
931            }            }
932          }          }
# Line 833  for (;;) Line 934  for (;;)
934    
935        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
936        case OP_TYPEEXACT:        case OP_TYPEEXACT:
937          count = current_state->count;  /* Number already matched */
938          if (clen > 0)
939            {
940            if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
941                (c < 256 &&
942                  (d != OP_ANY || !IS_NEWLINE(ptr)) &&
943                  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
944              {
945              if (++count >= GET2(code, 1))
946                { ADD_NEW(state_offset + 4, 0); }
947              else
948                { ADD_NEW(state_offset, count); }
949              }
950            }
951          break;
952    
953          /*-----------------------------------------------------------------*/
954        case OP_TYPEUPTO:        case OP_TYPEUPTO:
955        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
956        if (codevalue != OP_TYPEEXACT)        case OP_TYPEPOSUPTO:
957          { ADD_ACTIVE(state_offset + 4, 0); }        ADD_ACTIVE(state_offset + 4, 0);
958        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
959        if (clen > 0)        if (clen > 0)
960          {          {
961          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
962              (c < 256 &&              (c < 256 &&
963                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
964                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
965            {            {
966              if (codevalue == OP_TYPEPOSUPTO)
967                {
968                active_count--;           /* Remove non-match possibility */
969                next_active_state--;
970                }
971            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
972              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 4, 0); }
973            else            else
# Line 855  for (;;) Line 978  for (;;)
978    
979  /* ========================================================================== */  /* ========================================================================== */
980        /* These are virtual opcodes that are used when something like        /* These are virtual opcodes that are used when something like
981        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
982        keeps the code above fast for the other cases. The argument is in the        argument. It keeps the code above fast for the other cases. The argument
983        d variable. */        is in the d variable. */
984    
985    #ifdef SUPPORT_UCP
986        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
987        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
988          case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
989        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
990        if (count > 0) { ADD_ACTIVE(state_offset + 3, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
991        if (clen > 0)        if (clen > 0)
992          {          {
993          int category = _pcre_ucp_findchar(c, &chartype, &othercase);          BOOL OK;
994          int rqdtype = code[2];          const ucd_record * prop = GET_UCD(c);
995          if ((d == OP_PROP) ==          switch(code[2])
996              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
997            { count++; ADD_NEW(state_offset, count); }            case PT_ANY:
998              OK = TRUE;
999              break;
1000    
1001              case PT_LAMP:
1002              OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1003              break;
1004    
1005              case PT_GC:
1006              OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1007              break;
1008    
1009              case PT_PC:
1010              OK = prop->chartype == code[3];
1011              break;
1012    
1013              case PT_SC:
1014              OK = prop->script == code[3];
1015              break;
1016    
1017              /* Should never occur, but keep compilers from grumbling. */
1018    
1019              default:
1020              OK = codevalue != OP_PROP;
1021              break;
1022              }
1023    
1024            if (OK == (d == OP_PROP))
1025              {
1026              if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1027                {
1028                active_count--;           /* Remove non-match possibility */
1029                next_active_state--;
1030                }
1031              count++;
1032              ADD_NEW(state_offset, count);
1033              }
1034          }          }
1035        break;        break;
1036    
1037        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1038        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1039        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1040          case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1041        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1042        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1043        if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1044          {          {
1045          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1046          int ncount = 0;          int ncount = 0;
1047            if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1048              {
1049              active_count--;           /* Remove non-match possibility */
1050              next_active_state--;
1051              }
1052          while (nptr < end_subject)          while (nptr < end_subject)
1053            {            {
1054            int nd;            int nd;
1055            int ndlen = 1;            int ndlen = 1;
1056            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1057            if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1058            ncount++;            ncount++;
1059            nptr += ndlen;            nptr += ndlen;
1060            }            }
# Line 895  for (;;) Line 1062  for (;;)
1062          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1063          }          }
1064        break;        break;
1065    #endif
1066    
1067          /*-----------------------------------------------------------------*/
1068          case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1069          case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1070          case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1071          count = current_state->count;  /* Already matched */
1072          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1073          if (clen > 0)
1074            {
1075            int ncount = 0;
1076            switch (c)
1077              {
1078              case 0x000b:
1079              case 0x000c:
1080              case 0x0085:
1081              case 0x2028:
1082              case 0x2029:
1083              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1084              goto ANYNL01;
1085    
1086              case 0x000d:
1087              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1088              /* Fall through */
1089    
1090              ANYNL01:
1091              case 0x000a:
1092              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1093                {
1094                active_count--;           /* Remove non-match possibility */
1095                next_active_state--;
1096                }
1097              count++;
1098              ADD_NEW_DATA(-state_offset, count, ncount);
1099              break;
1100    
1101              default:
1102              break;
1103              }
1104            }
1105          break;
1106    
1107          /*-----------------------------------------------------------------*/
1108          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1109          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1110          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1111          count = current_state->count;  /* Already matched */
1112          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1113          if (clen > 0)
1114            {
1115            BOOL OK;
1116            switch (c)
1117              {
1118              case 0x000a:
1119              case 0x000b:
1120              case 0x000c:
1121              case 0x000d:
1122              case 0x0085:
1123              case 0x2028:
1124              case 0x2029:
1125              OK = TRUE;
1126              break;
1127    
1128              default:
1129              OK = FALSE;
1130              break;
1131              }
1132    
1133            if (OK == (d == OP_VSPACE))
1134              {
1135              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1136                {
1137                active_count--;           /* Remove non-match possibility */
1138                next_active_state--;
1139                }
1140              count++;
1141              ADD_NEW_DATA(-state_offset, count, 0);
1142              }
1143            }
1144          break;
1145    
1146          /*-----------------------------------------------------------------*/
1147          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1148          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1149          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1150          count = current_state->count;  /* Already matched */
1151          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1152          if (clen > 0)
1153            {
1154            BOOL OK;
1155            switch (c)
1156              {
1157              case 0x09:      /* HT */
1158              case 0x20:      /* SPACE */
1159              case 0xa0:      /* NBSP */
1160              case 0x1680:    /* OGHAM SPACE MARK */
1161              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1162              case 0x2000:    /* EN QUAD */
1163              case 0x2001:    /* EM QUAD */
1164              case 0x2002:    /* EN SPACE */
1165              case 0x2003:    /* EM SPACE */
1166              case 0x2004:    /* THREE-PER-EM SPACE */
1167              case 0x2005:    /* FOUR-PER-EM SPACE */
1168              case 0x2006:    /* SIX-PER-EM SPACE */
1169              case 0x2007:    /* FIGURE SPACE */
1170              case 0x2008:    /* PUNCTUATION SPACE */
1171              case 0x2009:    /* THIN SPACE */
1172              case 0x200A:    /* HAIR SPACE */
1173              case 0x202f:    /* NARROW NO-BREAK SPACE */
1174              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1175              case 0x3000:    /* IDEOGRAPHIC SPACE */
1176              OK = TRUE;
1177              break;
1178    
1179              default:
1180              OK = FALSE;
1181              break;
1182              }
1183    
1184            if (OK == (d == OP_HSPACE))
1185              {
1186              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1187                {
1188                active_count--;           /* Remove non-match possibility */
1189                next_active_state--;
1190                }
1191              count++;
1192              ADD_NEW_DATA(-state_offset, count, 0);
1193              }
1194            }
1195          break;
1196    
1197        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1198    #ifdef SUPPORT_UCP
1199        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1200        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1201        count = 3;        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1202          count = 4;
1203        goto QS1;        goto QS1;
1204    
1205        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
1206        case OP_PROP_EXTRA + OP_TYPEMINSTAR:        case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1207          case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1208        count = 0;        count = 0;
1209    
1210        QS1:        QS1:
1211    
1212        ADD_ACTIVE(state_offset + 3, 0);        ADD_ACTIVE(state_offset + 4, 0);
1213        if (clen > 0)        if (clen > 0)
1214          {          {
1215          int category = _pcre_ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1216          int rqdtype = code[2];          const ucd_record * prop = GET_UCD(c);
1217          if ((d == OP_PROP) ==          switch(code[2])
1218              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
1219            { ADD_NEW(state_offset + count, 0); }            case PT_ANY:
1220              OK = TRUE;
1221              break;
1222    
1223              case PT_LAMP:
1224              OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1225              break;
1226    
1227              case PT_GC:
1228              OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1229              break;
1230    
1231              case PT_PC:
1232              OK = prop->chartype == code[3];
1233              break;
1234    
1235              case PT_SC:
1236              OK = prop->script == code[3];
1237              break;
1238    
1239              /* Should never occur, but keep compilers from grumbling. */
1240    
1241              default:
1242              OK = codevalue != OP_PROP;
1243              break;
1244              }
1245    
1246            if (OK == (d == OP_PROP))
1247              {
1248              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1249                  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1250                {
1251                active_count--;           /* Remove non-match possibility */
1252                next_active_state--;
1253                }
1254              ADD_NEW(state_offset + count, 0);
1255              }
1256          }          }
1257        break;        break;
1258    
1259        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1260        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1261        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1262          case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1263        count = 2;        count = 2;
1264        goto QS2;        goto QS2;
1265    
1266        case OP_EXTUNI_EXTRA + OP_TYPESTAR:        case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1267        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1268          case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1269        count = 0;        count = 0;
1270    
1271        QS2:        QS2:
1272    
1273        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1274        if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1275          {          {
1276          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1277          int ncount = 0;          int ncount = 0;
1278            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1279                codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1280              {
1281              active_count--;           /* Remove non-match possibility */
1282              next_active_state--;
1283              }
1284          while (nptr < end_subject)          while (nptr < end_subject)
1285            {            {
1286            int nd;            int nd;
1287            int ndlen = 1;            int ndlen = 1;
1288            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1289            if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1290            ncount++;            ncount++;
1291            nptr += ndlen;            nptr += ndlen;
1292            }            }
1293          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1294          }          }
1295        break;        break;
1296    #endif
1297    
1298          /*-----------------------------------------------------------------*/
1299          case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1300          case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1301          case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1302          count = 2;
1303          goto QS3;
1304    
1305          case OP_ANYNL_EXTRA + OP_TYPESTAR:
1306          case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1307          case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1308          count = 0;
1309    
1310          QS3:
1311          ADD_ACTIVE(state_offset + 2, 0);
1312          if (clen > 0)
1313            {
1314            int ncount = 0;
1315            switch (c)
1316              {
1317              case 0x000b:
1318              case 0x000c:
1319              case 0x0085:
1320              case 0x2028:
1321              case 0x2029:
1322              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1323              goto ANYNL02;
1324    
1325              case 0x000d:
1326              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1327              /* Fall through */
1328    
1329              ANYNL02:
1330              case 0x000a:
1331              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1332                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1333                {
1334                active_count--;           /* Remove non-match possibility */
1335                next_active_state--;
1336                }
1337              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1338              break;
1339    
1340              default:
1341              break;
1342              }
1343            }
1344          break;
1345    
1346          /*-----------------------------------------------------------------*/
1347          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1348          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1349          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1350          count = 2;
1351          goto QS4;
1352    
1353          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1354          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1355          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1356          count = 0;
1357    
1358          QS4:
1359          ADD_ACTIVE(state_offset + 2, 0);
1360          if (clen > 0)
1361            {
1362            BOOL OK;
1363            switch (c)
1364              {
1365              case 0x000a:
1366              case 0x000b:
1367              case 0x000c:
1368              case 0x000d:
1369              case 0x0085:
1370              case 0x2028:
1371              case 0x2029:
1372              OK = TRUE;
1373              break;
1374    
1375              default:
1376              OK = FALSE;
1377              break;
1378              }
1379            if (OK == (d == OP_VSPACE))
1380              {
1381              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1382                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1383                {
1384                active_count--;           /* Remove non-match possibility */
1385                next_active_state--;
1386                }
1387              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1388              }
1389            }
1390          break;
1391    
1392        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1393          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1394          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1395          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1396          count = 2;
1397          goto QS5;
1398    
1399          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1400          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1401          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1402          count = 0;
1403    
1404          QS5:
1405          ADD_ACTIVE(state_offset + 2, 0);
1406          if (clen > 0)
1407            {
1408            BOOL OK;
1409            switch (c)
1410              {
1411              case 0x09:      /* HT */
1412              case 0x20:      /* SPACE */
1413              case 0xa0:      /* NBSP */
1414              case 0x1680:    /* OGHAM SPACE MARK */
1415              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1416              case 0x2000:    /* EN QUAD */
1417              case 0x2001:    /* EM QUAD */
1418              case 0x2002:    /* EN SPACE */
1419              case 0x2003:    /* EM SPACE */
1420              case 0x2004:    /* THREE-PER-EM SPACE */
1421              case 0x2005:    /* FOUR-PER-EM SPACE */
1422              case 0x2006:    /* SIX-PER-EM SPACE */
1423              case 0x2007:    /* FIGURE SPACE */
1424              case 0x2008:    /* PUNCTUATION SPACE */
1425              case 0x2009:    /* THIN SPACE */
1426              case 0x200A:    /* HAIR SPACE */
1427              case 0x202f:    /* NARROW NO-BREAK SPACE */
1428              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1429              case 0x3000:    /* IDEOGRAPHIC SPACE */
1430              OK = TRUE;
1431              break;
1432    
1433              default:
1434              OK = FALSE;
1435              break;
1436              }
1437    
1438            if (OK == (d == OP_HSPACE))
1439              {
1440              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1441                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1442                {
1443                active_count--;           /* Remove non-match possibility */
1444                next_active_state--;
1445                }
1446              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1447              }
1448            }
1449          break;
1450    
1451          /*-----------------------------------------------------------------*/
1452    #ifdef SUPPORT_UCP
1453        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1454        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1455        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1456          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1457        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1458          { ADD_ACTIVE(state_offset + 5, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1459        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1460        if (clen > 0)        if (clen > 0)
1461          {          {
1462          int category = _pcre_ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1463          int rqdtype = code[4];          const ucd_record * prop = GET_UCD(c);
1464          if ((d == OP_PROP) ==          switch(code[4])
             (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))  
1465            {            {
1466              case PT_ANY:
1467              OK = TRUE;
1468              break;
1469    
1470              case PT_LAMP:
1471              OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1472              break;
1473    
1474              case PT_GC:
1475              OK = _pcre_ucp_gentype[prop->chartype] == code[5];
1476              break;
1477    
1478              case PT_PC:
1479              OK = prop->chartype == code[5];
1480              break;
1481    
1482              case PT_SC:
1483              OK = prop->script == code[5];
1484              break;
1485    
1486              /* Should never occur, but keep compilers from grumbling. */
1487    
1488              default:
1489              OK = codevalue != OP_PROP;
1490              break;
1491              }
1492    
1493            if (OK == (d == OP_PROP))
1494              {
1495              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1496                {
1497                active_count--;           /* Remove non-match possibility */
1498                next_active_state--;
1499                }
1500            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1501              { ADD_NEW(state_offset + 5, 0); }              { ADD_NEW(state_offset + 6, 0); }
1502            else            else
1503              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1504            }            }
# Line 975  for (;;) Line 1509  for (;;)
1509        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1510        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1511        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1512          case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1513        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1514          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1515        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1516        if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1517          {          {
1518          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1519          int ncount = 0;          int ncount = 0;
1520            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1521              {
1522              active_count--;           /* Remove non-match possibility */
1523              next_active_state--;
1524              }
1525          while (nptr < end_subject)          while (nptr < end_subject)
1526            {            {
1527            int nd;            int nd;
1528            int ndlen = 1;            int ndlen = 1;
1529            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1530            if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1531            ncount++;            ncount++;
1532            nptr += ndlen;            nptr += ndlen;
1533            }            }
# Line 997  for (;;) Line 1537  for (;;)
1537            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1538          }          }
1539        break;        break;
1540    #endif
1541    
1542          /*-----------------------------------------------------------------*/
1543          case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1544          case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1545          case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1546          case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1547          if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1548            { ADD_ACTIVE(state_offset + 4, 0); }
1549          count = current_state->count;  /* Number already matched */
1550          if (clen > 0)
1551            {
1552            int ncount = 0;
1553            switch (c)
1554              {
1555              case 0x000b:
1556              case 0x000c:
1557              case 0x0085:
1558              case 0x2028:
1559              case 0x2029:
1560              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1561              goto ANYNL03;
1562    
1563              case 0x000d:
1564              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1565              /* Fall through */
1566    
1567              ANYNL03:
1568              case 0x000a:
1569              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1570                {
1571                active_count--;           /* Remove non-match possibility */
1572                next_active_state--;
1573                }
1574              if (++count >= GET2(code, 1))
1575                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1576              else
1577                { ADD_NEW_DATA(-state_offset, count, ncount); }
1578              break;
1579    
1580              default:
1581              break;
1582              }
1583            }
1584          break;
1585    
1586          /*-----------------------------------------------------------------*/
1587          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1588          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1589          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1590          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1591          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1592            { ADD_ACTIVE(state_offset + 4, 0); }
1593          count = current_state->count;  /* Number already matched */
1594          if (clen > 0)
1595            {
1596            BOOL OK;
1597            switch (c)
1598              {
1599              case 0x000a:
1600              case 0x000b:
1601              case 0x000c:
1602              case 0x000d:
1603              case 0x0085:
1604              case 0x2028:
1605              case 0x2029:
1606              OK = TRUE;
1607              break;
1608    
1609              default:
1610              OK = FALSE;
1611              }
1612    
1613            if (OK == (d == OP_VSPACE))
1614              {
1615              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1616                {
1617                active_count--;           /* Remove non-match possibility */
1618                next_active_state--;
1619                }
1620              if (++count >= GET2(code, 1))
1621                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1622              else
1623                { ADD_NEW_DATA(-state_offset, count, 0); }
1624              }
1625            }
1626          break;
1627    
1628          /*-----------------------------------------------------------------*/
1629          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1630          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1631          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1632          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1633          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1634            { ADD_ACTIVE(state_offset + 4, 0); }
1635          count = current_state->count;  /* Number already matched */
1636          if (clen > 0)
1637            {
1638            BOOL OK;
1639            switch (c)
1640              {
1641              case 0x09:      /* HT */
1642              case 0x20:      /* SPACE */
1643              case 0xa0:      /* NBSP */
1644              case 0x1680:    /* OGHAM SPACE MARK */
1645              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1646              case 0x2000:    /* EN QUAD */
1647              case 0x2001:    /* EM QUAD */
1648              case 0x2002:    /* EN SPACE */
1649              case 0x2003:    /* EM SPACE */
1650              case 0x2004:    /* THREE-PER-EM SPACE */
1651              case 0x2005:    /* FOUR-PER-EM SPACE */
1652              case 0x2006:    /* SIX-PER-EM SPACE */
1653              case 0x2007:    /* FIGURE SPACE */
1654              case 0x2008:    /* PUNCTUATION SPACE */
1655              case 0x2009:    /* THIN SPACE */
1656              case 0x200A:    /* HAIR SPACE */
1657              case 0x202f:    /* NARROW NO-BREAK SPACE */
1658              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1659              case 0x3000:    /* IDEOGRAPHIC SPACE */
1660              OK = TRUE;
1661              break;
1662    
1663              default:
1664              OK = FALSE;
1665              break;
1666              }
1667    
1668            if (OK == (d == OP_HSPACE))
1669              {
1670              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1671                {
1672                active_count--;           /* Remove non-match possibility */
1673                next_active_state--;
1674                }
1675              if (++count >= GET2(code, 1))
1676                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1677              else
1678                { ADD_NEW_DATA(-state_offset, count, 0); }
1679              }
1680            }
1681          break;
1682    
1683  /* ========================================================================== */  /* ========================================================================== */
1684        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
# Line 1018  for (;;) Line 1700  for (;;)
1700          {          {
1701          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1702            {            {
1703              unsigned int othercase;
1704            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1705    
1706            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
1707            other case of the character, if there is one. The result of            other case of the character. */
           _pcre_ucp_findchar() is < 0 if the char isn't found, and othercase is  
           returned as zero if there isn't another case. */  
1708    
1709  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1710            if (_pcre_ucp_findchar(c, &chartype, &othercase) < 0)            othercase = UCD_OTHERCASE(c);
1711    #else
1712              othercase = NOTACHAR;
1713  #endif  #endif
             othercase = -1;  
1714    
1715            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
1716            }            }
# Line 1050  for (;;) Line 1732  for (;;)
1732        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
1733    
1734        case OP_EXTUNI:        case OP_EXTUNI:
1735        if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1736          {          {
1737          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1738          int ncount = 0;          int ncount = 0;
# Line 1058  for (;;) Line 1740  for (;;)
1740            {            {
1741            int nclen = 1;            int nclen = 1;
1742            GETCHARLEN(c, nptr, nclen);            GETCHARLEN(c, nptr, nclen);
1743            if (_pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M) break;            if (UCD_CATEGORY(c) != ucp_M) break;
1744            ncount++;            ncount++;
1745            nptr += nclen;            nptr += nclen;
1746            }            }
# Line 1068  for (;;) Line 1750  for (;;)
1750  #endif  #endif
1751    
1752        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1753          /* This is a tricky like EXTUNI because it too can match more than one
1754          character (when CR is followed by LF). In this case, set up a negative
1755          state to wait for one character to pass before continuing. */
1756    
1757          case OP_ANYNL:
1758          if (clen > 0) switch(c)
1759            {
1760            case 0x000b:
1761            case 0x000c:
1762            case 0x0085:
1763            case 0x2028:
1764            case 0x2029:
1765            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1766    
1767            case 0x000a:
1768            ADD_NEW(state_offset + 1, 0);
1769            break;
1770    
1771            case 0x000d:
1772            if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1773              {
1774              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1775              }
1776            else
1777              {
1778              ADD_NEW(state_offset + 1, 0);
1779              }
1780            break;
1781            }
1782          break;
1783    
1784          /*-----------------------------------------------------------------*/
1785          case OP_NOT_VSPACE:
1786          if (clen > 0) switch(c)
1787            {
1788            case 0x000a:
1789            case 0x000b:
1790            case 0x000c:
1791            case 0x000d:
1792            case 0x0085:
1793            case 0x2028:
1794            case 0x2029:
1795            break;
1796    
1797            default:
1798            ADD_NEW(state_offset + 1, 0);
1799            break;
1800            }
1801          break;
1802    
1803          /*-----------------------------------------------------------------*/
1804          case OP_VSPACE:
1805          if (clen > 0) switch(c)
1806            {
1807            case 0x000a:
1808            case 0x000b:
1809            case 0x000c:
1810            case 0x000d:
1811            case 0x0085:
1812            case 0x2028:
1813            case 0x2029:
1814            ADD_NEW(state_offset + 1, 0);
1815            break;
1816    
1817            default: break;
1818            }
1819          break;
1820    
1821          /*-----------------------------------------------------------------*/
1822          case OP_NOT_HSPACE:
1823          if (clen > 0) switch(c)
1824            {
1825            case 0x09:      /* HT */
1826            case 0x20:      /* SPACE */
1827            case 0xa0:      /* NBSP */
1828            case 0x1680:    /* OGHAM SPACE MARK */
1829            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1830            case 0x2000:    /* EN QUAD */
1831            case 0x2001:    /* EM QUAD */
1832            case 0x2002:    /* EN SPACE */
1833            case 0x2003:    /* EM SPACE */
1834            case 0x2004:    /* THREE-PER-EM SPACE */
1835            case 0x2005:    /* FOUR-PER-EM SPACE */
1836            case 0x2006:    /* SIX-PER-EM SPACE */
1837            case 0x2007:    /* FIGURE SPACE */
1838            case 0x2008:    /* PUNCTUATION SPACE */
1839            case 0x2009:    /* THIN SPACE */
1840            case 0x200A:    /* HAIR SPACE */
1841            case 0x202f:    /* NARROW NO-BREAK SPACE */
1842            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1843            case 0x3000:    /* IDEOGRAPHIC SPACE */
1844            break;
1845    
1846            default:
1847            ADD_NEW(state_offset + 1, 0);
1848            break;
1849            }
1850          break;
1851    
1852          /*-----------------------------------------------------------------*/
1853          case OP_HSPACE:
1854          if (clen > 0) switch(c)
1855            {
1856            case 0x09:      /* HT */
1857            case 0x20:      /* SPACE */
1858            case 0xa0:      /* NBSP */
1859            case 0x1680:    /* OGHAM SPACE MARK */
1860            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1861            case 0x2000:    /* EN QUAD */
1862            case 0x2001:    /* EM QUAD */
1863            case 0x2002:    /* EN SPACE */
1864            case 0x2003:    /* EM SPACE */
1865            case 0x2004:    /* THREE-PER-EM SPACE */
1866            case 0x2005:    /* FOUR-PER-EM SPACE */
1867            case 0x2006:    /* SIX-PER-EM SPACE */
1868            case 0x2007:    /* FIGURE SPACE */
1869            case 0x2008:    /* PUNCTUATION SPACE */
1870            case 0x2009:    /* THIN SPACE */
1871            case 0x200A:    /* HAIR SPACE */
1872            case 0x202f:    /* NARROW NO-BREAK SPACE */
1873            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1874            case 0x3000:    /* IDEOGRAPHIC SPACE */
1875            ADD_NEW(state_offset + 1, 0);
1876            break;
1877            }
1878          break;
1879    
1880          /*-----------------------------------------------------------------*/
1881        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1882        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1883        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1075  for (;;) Line 1885  for (;;)
1885        case OP_NOT:        case OP_NOT:
1886        if (clen > 0)        if (clen > 0)
1887          {          {
1888          int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1889          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1890          }          }
1891        break;        break;
# Line 1083  for (;;) Line 1893  for (;;)
1893        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1894        case OP_PLUS:        case OP_PLUS:
1895        case OP_MINPLUS:        case OP_MINPLUS:
1896          case OP_POSPLUS:
1897        case OP_NOTPLUS:        case OP_NOTPLUS:
1898        case OP_NOTMINPLUS:        case OP_NOTMINPLUS:
1899          case OP_NOTPOSPLUS:
1900        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1901        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1902        if (clen > 0)        if (clen > 0)
1903          {          {
1904          int otherd = -1;          unsigned int otherd = NOTACHAR;
1905          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1906            {            {
1907  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1908            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1909              {              {
1910  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1911              if (_pcre_ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;              otherd = UCD_OTHERCASE(d);
1912  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1913              }              }
1914            else            else
# Line 1104  for (;;) Line 1916  for (;;)
1916            otherd = fcc[d];            otherd = fcc[d];
1917            }            }
1918          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1919            { count++; ADD_NEW(state_offset, count); }            {
1920              if (count > 0 &&
1921                  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1922                {
1923                active_count--;             /* Remove non-match possibility */
1924                next_active_state--;
1925                }
1926              count++;
1927              ADD_NEW(state_offset, count);
1928              }
1929          }          }
1930        break;        break;
1931    
1932        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1933        case OP_QUERY:        case OP_QUERY:
1934        case OP_MINQUERY:        case OP_MINQUERY:
1935          case OP_POSQUERY:
1936        case OP_NOTQUERY:        case OP_NOTQUERY:
1937        case OP_NOTMINQUERY:        case OP_NOTMINQUERY:
1938          case OP_NOTPOSQUERY:
1939        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1940        if (clen > 0)        if (clen > 0)
1941          {          {
1942          int otherd = -1;          unsigned int otherd = NOTACHAR;
1943          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1944            {            {
1945  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1946            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1947              {              {
1948  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1949              if (_pcre_ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;              otherd = UCD_OTHERCASE(d);
1950  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1951              }              }
1952            else            else
# Line 1131  for (;;) Line 1954  for (;;)
1954            otherd = fcc[d];            otherd = fcc[d];
1955            }            }
1956          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1957            { ADD_NEW(state_offset + dlen + 1, 0); }            {
1958              if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
1959                {
1960                active_count--;            /* Remove non-match possibility */
1961                next_active_state--;
1962                }
1963              ADD_NEW(state_offset + dlen + 1, 0);
1964              }
1965          }          }
1966        break;        break;
1967    
1968        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1969        case OP_STAR:        case OP_STAR:
1970        case OP_MINSTAR:        case OP_MINSTAR:
1971          case OP_POSSTAR:
1972        case OP_NOTSTAR:        case OP_NOTSTAR:
1973        case OP_NOTMINSTAR:        case OP_NOTMINSTAR:
1974          case OP_NOTPOSSTAR:
1975        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1976        if (clen > 0)        if (clen > 0)
1977          {          {
1978          int otherd = -1;          unsigned int otherd = NOTACHAR;
1979          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1980            {            {
1981  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1982            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1983              {              {
1984  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1985              if (_pcre_ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;              otherd = UCD_OTHERCASE(d);
1986  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1987              }              }
1988            else            else
# Line 1158  for (;;) Line 1990  for (;;)
1990            otherd = fcc[d];            otherd = fcc[d];
1991            }            }
1992          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1993            { ADD_NEW(state_offset, 0); }            {
1994              if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
1995                {
1996                active_count--;            /* Remove non-match possibility */
1997                next_active_state--;
1998                }
1999              ADD_NEW(state_offset, 0);
2000              }
2001          }          }
2002        break;        break;
2003    
2004        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2005        case OP_EXACT:        case OP_EXACT:
2006          case OP_NOTEXACT:
2007          count = current_state->count;  /* Number already matched */
2008          if (clen > 0)
2009            {
2010            unsigned int otherd = NOTACHAR;
2011            if ((ims & PCRE_CASELESS) != 0)
2012              {
2013    #ifdef SUPPORT_UTF8
2014              if (utf8 && d >= 128)
2015                {
2016    #ifdef SUPPORT_UCP
2017                otherd = UCD_OTHERCASE(d);
2018    #endif  /* SUPPORT_UCP */
2019                }
2020              else
2021    #endif  /* SUPPORT_UTF8 */
2022              otherd = fcc[d];
2023              }
2024            if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2025              {
2026              if (++count >= GET2(code, 1))
2027                { ADD_NEW(state_offset + dlen + 3, 0); }
2028              else
2029                { ADD_NEW(state_offset, count); }
2030              }
2031            }
2032          break;
2033    
2034          /*-----------------------------------------------------------------*/
2035        case OP_UPTO:        case OP_UPTO:
2036        case OP_MINUPTO:        case OP_MINUPTO:
2037        case OP_NOTEXACT:        case OP_POSUPTO:
2038        case OP_NOTUPTO:        case OP_NOTUPTO:
2039        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2040        if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)        case OP_NOTPOSUPTO:
2041          { ADD_ACTIVE(state_offset + dlen + 3, 0); }        ADD_ACTIVE(state_offset + dlen + 3, 0);
2042        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2043        if (clen > 0)        if (clen > 0)
2044          {          {
2045          int otherd = -1;          unsigned int otherd = NOTACHAR;
2046          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
2047            {            {
2048  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2049            if (utf8 && c >= 128)            if (utf8 && d >= 128)
2050              {              {
2051  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2052              if (_pcre_ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;              otherd = UCD_OTHERCASE(d);
2053  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2054              }              }
2055            else            else
# Line 1190  for (;;) Line 2058  for (;;)
2058            }            }
2059          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2060            {            {
2061              if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2062                {
2063                active_count--;             /* Remove non-match possibility */
2064                next_active_state--;
2065                }
2066            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2067              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 3, 0); }
2068            else            else
# Line 1267  for (;;) Line 2140  for (;;)
2140              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 5, 0); }
2141            if (isinclass)            if (isinclass)
2142              {              {
2143              if (++count >= GET2(ecode, 3))              int max = GET2(ecode, 3);
2144                if (++count >= max && max != 0)   /* Max 0 => no limit */
2145                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 5, 0); }
2146              else              else
2147                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
# Line 1283  for (;;) Line 2157  for (;;)
2157    
2158  /* ========================================================================== */  /* ========================================================================== */
2159        /* These are the opcodes for fancy brackets of various kinds. We have        /* These are the opcodes for fancy brackets of various kinds. We have
2160        to use recursion in order to handle them. */        to use recursion in order to handle them. The "always failing" assersion
2161          (?!) is optimised when compiling to OP_FAIL, so we have to support that,
2162          though the other "backtracking verbs" are not supported. */
2163    
2164          case OP_FAIL:
2165          break;
2166    
2167        case OP_ASSERT:        case OP_ASSERT:
2168        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
# Line 1317  for (;;) Line 2196  for (;;)
2196    
2197        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2198        case OP_COND:        case OP_COND:
2199          case OP_SCOND:
2200          {          {
2201          int local_offsets[1000];          int local_offsets[1000];
2202          int local_workspace[1000];          int local_workspace[1000];
2203          int condcode = code[LINK_SIZE+1];          int codelink = GET(code, 1);
2204            int condcode;
2205    
2206          /* The only supported version of OP_CREF is for the value 0xffff, which          /* Because of the way auto-callout works during compile, a callout item
2207          means "test if in a recursion". */          is inserted between OP_COND and an assertion condition. This does not
2208            happen for the other conditions. */
2209    
2210          if (condcode == OP_CREF)          if (code[LINK_SIZE+1] == OP_CALLOUT)
2211              {
2212              rrc = 0;
2213              if (pcre_callout != NULL)
2214                {
2215                pcre_callout_block cb;
2216                cb.version          = 1;   /* Version 1 of the callout block */
2217                cb.callout_number   = code[LINK_SIZE+2];
2218                cb.offset_vector    = offsets;
2219                cb.subject          = (PCRE_SPTR)start_subject;
2220                cb.subject_length   = end_subject - start_subject;
2221                cb.start_match      = current_subject - start_subject;
2222                cb.current_position = ptr - start_subject;
2223                cb.pattern_position = GET(code, LINK_SIZE + 3);
2224                cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2225                cb.capture_top      = 1;
2226                cb.capture_last     = -1;
2227                cb.callout_data     = md->callout_data;
2228                if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
2229                }
2230              if (rrc > 0) break;                      /* Fail this thread */
2231              code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */
2232              }
2233    
2234            condcode = code[LINK_SIZE+1];
2235    
2236            /* Back reference conditions are not supported */
2237    
2238            if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
2239    
2240            /* The DEFINE condition is always false */
2241    
2242            if (condcode == OP_DEF)
2243              { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2244    
2245            /* The only supported version of OP_RREF is for the value RREF_ANY,
2246            which means "test if in any recursion". We can't test for specifically
2247            recursed groups. */
2248    
2249            else if (condcode == OP_RREF)
2250            {            {
2251            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2252            if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2253            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0)
2254              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2255              else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2256            }            }
2257    
2258          /* Otherwise, the condition is an assertion */          /* Otherwise, the condition is an assertion */
# Line 1360  for (;;) Line 2282  for (;;)
2282                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2283              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
2284            else            else
2285              { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2286            }            }
2287          }          }
2288        break;        break;
# Line 1512  for (;;) Line 2434  for (;;)
2434        /* Handle callouts */        /* Handle callouts */
2435    
2436        case OP_CALLOUT:        case OP_CALLOUT:
2437          rrc = 0;
2438        if (pcre_callout != NULL)        if (pcre_callout != NULL)
2439          {          {
         int rrc;  
2440          pcre_callout_block cb;          pcre_callout_block cb;
2441          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2442          cb.callout_number   = code[1];          cb.callout_number   = code[1];
2443          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
2444          cb.subject          = (char *)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
2445          cb.subject_length   = end_subject - start_subject;          cb.subject_length   = end_subject - start_subject;
2446          cb.start_match      = current_subject - start_subject;          cb.start_match      = current_subject - start_subject;
2447          cb.current_position = ptr - start_subject;          cb.current_position = ptr - start_subject;
# Line 1529  for (;;) Line 2451  for (;;)
2451          cb.capture_last     = -1;          cb.capture_last     = -1;
2452          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
2453          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
         if (rrc == 0) { ADD_ACTIVE(state_offset + 2 + 2*LINK_SIZE, 0); }  
2454          }          }
2455          if (rrc == 0)
2456            { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }
2457        break;        break;
2458    
2459    
# Line 1567  for (;;) Line 2490  for (;;)
2490      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
2491        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
2492        rlevel*2-2, SP));        rlevel*2-2, SP));
2493      return match_count;      break;        /* In effect, "return", but see the comment below */
2494      }      }
2495    
2496    /* One or more states are active for the next character. */    /* One or more states are active for the next character. */
# Line 1575  for (;;) Line 2498  for (;;)
2498    ptr += clen;    /* Advance to next subject character */    ptr += clen;    /* Advance to next subject character */
2499    }               /* Loop to move along the subject string */    }               /* Loop to move along the subject string */
2500    
2501  /* Control never gets here, but we must keep the compiler happy. */  /* Control gets here from "break" a few lines above. We do it this way because
2502    if we use "return" above, we have compiler trouble. Some compilers warn if
2503    there's nothing here because they think the function doesn't return a value. On
2504    the other hand, if we put a dummy statement here, some more clever compilers
2505    complain that it can't be reached. Sigh. */
2506    
2507  DPRINTF(("%.*s+++ Unexpected end of internal_dfa_exec %d +++\n"  return match_count;
   "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, rlevel*2-2, SP));  
 return PCRE_ERROR_NOMATCH;  
2508  }  }
2509    
2510    
# Line 1595  is not anchored. Line 2520  is not anchored.
2520    
2521  Arguments:  Arguments:
2522    argument_re     points to the compiled expression    argument_re     points to the compiled expression
2523    extra_data      points to extra data or is NULL (not currently used)    extra_data      points to extra data or is NULL
2524    subject         points to the subject string    subject         points to the subject string
2525    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
2526    start_offset    where to start in the subject string    start_offset    where to start in the subject string
# Line 1611  Returns: > 0 => number of match Line 2536  Returns: > 0 => number of match
2536                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2537  */  */
2538    
2539  PCRE_EXPORT int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
2540  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2541    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2542    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
2543  {  {
2544  real_pcre *re = (real_pcre *)argument_re;  real_pcre *re = (real_pcre *)argument_re;
2545  dfa_match_data match_block;  dfa_match_data match_block;
2546    dfa_match_data *md = &match_block;
2547  BOOL utf8, anchored, startline, firstline;  BOOL utf8, anchored, startline, firstline;
2548  const uschar *current_subject, *end_subject, *lcc;  const uschar *current_subject, *end_subject, *lcc;
2549    
# Line 1632  BOOL req_byte_caseless = FALSE; Line 2558  BOOL req_byte_caseless = FALSE;
2558  int first_byte = -1;  int first_byte = -1;
2559  int req_byte = -1;  int req_byte = -1;
2560  int req_byte2 = -1;  int req_byte2 = -1;
2561    int newline;
2562    
2563  /* Plausibility checks */  /* Plausibility checks */
2564    
# Line 1646  flipping, so we scan the extra_data bloc Line 2573  flipping, so we scan the extra_data bloc
2573  match block, so we must initialize them beforehand. However, the other fields  match block, so we must initialize them beforehand. However, the other fields
2574  in the match block must not be set until after the byte flipping. */  in the match block must not be set until after the byte flipping. */
2575    
2576  match_block.tables = re->tables;  md->tables = re->tables;
2577  match_block.callout_data = NULL;  md->callout_data = NULL;
2578    
2579  if (extra_data != NULL)  if (extra_data != NULL)
2580    {    {
# Line 1655  if (extra_data != NULL) Line 2582  if (extra_data != NULL)
2582    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
2583      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
2584    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
2585      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
2586        return PCRE_ERROR_DFA_UMLIMIT;
2587    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
2588      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
2589    if ((flags & PCRE_EXTRA_TABLES) != 0)    if ((flags & PCRE_EXTRA_TABLES) != 0)
2590      match_block.tables = extra_data->tables;      md->tables = extra_data->tables;
2591    }    }
2592    
2593  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
# Line 1679  current_subject = (const unsigned char * Line 2608  current_subject = (const unsigned char *
2608  end_subject = (const unsigned char *)subject + length;  end_subject = (const unsigned char *)subject + length;
2609  req_byte_ptr = current_subject - 1;  req_byte_ptr = current_subject - 1;
2610    
2611    #ifdef SUPPORT_UTF8
2612  utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = (re->options & PCRE_UTF8) != 0;
2613  anchored = (options & PCRE_ANCHORED) != 0 || (re->options & PCRE_ANCHORED) != 0;  #else
2614    utf8 = FALSE;
2615    #endif
2616    
2617    anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
2618      (re->options & PCRE_ANCHORED) != 0;
2619    
2620  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
2621    
2622  match_block.start_code = (const uschar *)argument_re +  md->start_code = (const uschar *)argument_re +
2623      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
2624  match_block.start_subject = (const unsigned char *)subject;  md->start_subject = (const unsigned char *)subject;
2625  match_block.end_subject = end_subject;  md->end_subject = end_subject;
2626  match_block.moptions = options;  md->moptions = options;
2627  match_block.poptions = re->options;  md->poptions = re->options;
2628    
2629    /* If the BSR option is not set at match time, copy what was set
2630    at compile time. */
2631    
2632    if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2633      {
2634      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2635        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2636    #ifdef BSR_ANYCRLF
2637      else md->moptions |= PCRE_BSR_ANYCRLF;
2638    #endif
2639      }
2640    
2641    /* Handle different types of newline. The three bits give eight cases. If
2642    nothing is set at run time, whatever was used at compile time applies. */
2643    
2644    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2645             PCRE_NEWLINE_BITS)
2646      {
2647      case 0: newline = NEWLINE; break;   /* Compile-time default */
2648      case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
2649      case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
2650      case PCRE_NEWLINE_CR+
2651           PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
2652      case PCRE_NEWLINE_ANY: newline = -1; break;
2653      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2654      default: return PCRE_ERROR_BADNEWLINE;
2655      }
2656    
2657    if (newline == -2)
2658      {
2659      md->nltype = NLTYPE_ANYCRLF;
2660      }
2661    else if (newline < 0)
2662      {
2663      md->nltype = NLTYPE_ANY;
2664      }
2665    else
2666      {
2667      md->nltype = NLTYPE_FIXED;
2668      if (newline > 255)
2669        {
2670        md->nllen = 2;
2671        md->nl[0] = (newline >> 8) & 255;
2672        md->nl[1] = newline & 255;
2673        }
2674      else
2675        {
2676        md->nllen = 1;
2677        md->nl[0] = newline;
2678        }
2679      }
2680    
2681  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
2682  back the character offset. */  back the character offset. */
# Line 1715  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 2702  if (utf8 && (options & PCRE_NO_UTF8_CHEC
2702  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
2703  in other programs later. */  in other programs later. */
2704    
2705  if (match_block.tables == NULL) match_block.tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = _pcre_default_tables;
2706    
2707  /* The lower casing table and the "must be at the start of a line" flag are  /* The lower casing table and the "must be at the start of a line" flag are
2708  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2709    
2710  lcc = match_block.tables + lcc_offset;  lcc = md->tables + lcc_offset;
2711  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2712  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2713    
2714  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 1732  studied, there may be a bitmap of possib Line 2719  studied, there may be a bitmap of possib
2719    
2720  if (!anchored)  if (!anchored)
2721    {    {
2722    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2723      {      {
2724      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2725      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 1749  if (!anchored) Line 2736  if (!anchored)
2736  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2737  character" set. */  character" set. */
2738    
2739  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2740    {    {
2741    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2742    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
2743    req_byte2 = (match_block.tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */
2744    }    }
2745    
2746  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
2747  failed match. Unless restarting, optimize by moving to the first match  failed match. If not restarting, perform certain optimizations at the start of
2748  character if possible, when not anchored. Then unless wanting a partial match,  a match. */
 check for a required later character. */  
2749    
2750  for (;;)  for (;;)
2751    {    {
# Line 1769  for (;;) Line 2755  for (;;)
2755      {      {
2756      const uschar *save_end_subject = end_subject;      const uschar *save_end_subject = end_subject;
2757    
2758      /* Advance to a unique first char if possible. If firstline is TRUE, the      /* If firstline is TRUE, the start of the match is constrained to the first
2759      start of the match is constrained to the first line of a multiline string.      line of a multiline string. Implement this by temporarily adjusting
2760      Implement this by temporarily adjusting end_subject so that we stop scanning      end_subject so that we stop scanning at a newline. If the match fails at
2761      at a newline. If the match fails at the newline, later code breaks this loop.      the newline, later code breaks this loop. */
     */  
2762    
2763      if (firstline)      if (firstline)
2764        {        {
2765        const uschar *t = current_subject;        USPTR t = current_subject;
2766        while (t < save_end_subject && *t != '\n') t++;  #ifdef SUPPORT_UTF8
2767          if (utf8)
2768            {
2769            while (t < md->end_subject && !IS_NEWLINE(t))
2770              {
2771              t++;
2772              while (t < end_subject && (*t & 0xc0) == 0x80) t++;
2773              }
2774            }
2775          else
2776    #endif
2777          while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2778        end_subject = t;        end_subject = t;
2779        }        }
2780    
2781      if (first_byte >= 0)      /* There are some optimizations that avoid running the match if a known
2782        starting point is not found, or if a known later character is not present.
2783        However, there is an option that disables these, for testing and for
2784        ensuring that all callouts do actually occur. */
2785    
2786        if ((options & PCRE_NO_START_OPTIMIZE) == 0)
2787        {        {
       if (first_byte_caseless)  
         while (current_subject < end_subject &&  
                lcc[*current_subject] != first_byte)  
           current_subject++;  
       else  
         while (current_subject < end_subject && *current_subject != first_byte)  
           current_subject++;  
       }  
2788    
2789      /* Or to just after \n for a multiline match if possible */        /* Advance to a known first byte. */
2790    
2791      else if (startline)        if (first_byte >= 0)
       {  
       if (current_subject > match_block.start_subject + start_offset)  
2792          {          {
2793          while (current_subject < end_subject && current_subject[-1] != NEWLINE)          if (first_byte_caseless)
2794            current_subject++;            while (current_subject < end_subject &&
2795                     lcc[*current_subject] != first_byte)
2796                current_subject++;
2797            else
2798              while (current_subject < end_subject &&
2799                     *current_subject != first_byte)
2800                current_subject++;
2801          }          }
       }  
2802    
2803      /* Or to a non-unique first char after study */        /* Or to just after a linebreak for a multiline match if possible */
2804    
2805      else if (start_bits != NULL)        else if (startline)
2806        {          {
2807        while (current_subject < end_subject)          if (current_subject > md->start_subject + start_offset)
2808              {
2809    #ifdef SUPPORT_UTF8
2810              if (utf8)
2811                {
2812                while (current_subject < end_subject &&
2813                       !WAS_NEWLINE(current_subject))
2814                  {
2815                  current_subject++;
2816                  while(current_subject < end_subject &&
2817                        (*current_subject & 0xc0) == 0x80)
2818                    current_subject++;
2819                  }
2820                }
2821              else
2822    #endif
2823              while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
2824                current_subject++;
2825    
2826              /* If we have just passed a CR and the newline option is ANY or
2827              ANYCRLF, and we are now at a LF, advance the match position by one
2828              more character. */
2829    
2830              if (current_subject[-1] == CHAR_CR &&
2831                   (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2832                   current_subject < end_subject &&
2833                   *current_subject == CHAR_NL)
2834                current_subject++;
2835              }
2836            }
2837    
2838          /* Or to a non-unique first char after study */
2839    
2840          else if (start_bits != NULL)
2841          {          {
2842          register unsigned int c = *current_subject;          while (current_subject < end_subject)
2843          if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;            {
2844            else break;            register unsigned int c = *current_subject;
2845              if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
2846                else break;
2847              }
2848          }          }
2849        }        }
2850    
# Line 1834  for (;;) Line 2866  for (;;)
2866    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2867    don't do this when the string is sufficiently long.    don't do this when the string is sufficiently long.
2868    
2869    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested, and can
2870    */    also be explicitly deactivated. */
2871    
2872    if (req_byte >= 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
2873          req_byte >= 0 &&
2874        end_subject - current_subject < REQ_BYTE_MAX &&        end_subject - current_subject < REQ_BYTE_MAX &&
2875        (options & PCRE_PARTIAL) == 0)        (options & PCRE_PARTIAL) == 0)
2876      {      {
# Line 1880  for (;;) Line 2913  for (;;)
2913    /* OK, now we can do the business */    /* OK, now we can do the business */
2914    
2915    rc = internal_dfa_exec(    rc = internal_dfa_exec(
2916      &match_block,                              /* fixed match data */      md,                                /* fixed match data */
2917      match_block.start_code,                    /* this subexpression's code */      md->start_code,                    /* this subexpression's code */
2918      current_subject,                           /* where we currently are */      current_subject,                   /* where we currently are */
2919      start_offset,                              /* start offset in subject */      start_offset,                      /* start offset in subject */
2920      offsets,                                   /* offset vector */      offsets,                           /* offset vector */
2921      offsetcount,                               /* size of same */      offsetcount,                       /* size of same */
2922      workspace,                                 /* workspace vector */      workspace,                         /* workspace vector */
2923      wscount,                                   /* size of same */      wscount,                           /* size of same */
2924      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
2925      0,                                         /* function recurse level */      0,                                 /* function recurse level */
2926      0);                                        /* regex recurse level */      0);                                /* regex recurse level */
2927    
2928    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
2929    on only if not anchored. */    on only if not anchored. */
# Line 1900  for (;;) Line 2933  for (;;)
2933    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
2934    and firstline is set. */    and firstline is set. */
2935    
2936    if (firstline && *current_subject == NEWLINE) break;    if (firstline && IS_NEWLINE(current_subject)) break;
2937    current_subject++;    current_subject++;
   
 #ifdef SUPPORT_UTF8  
2938    if (utf8)    if (utf8)
2939      {      {
2940      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
2941        current_subject++;        current_subject++;
2942      }      }
 #endif  
   
2943    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2944    }  
2945      /* If we have just passed a CR and we are now at a LF, and the pattern does
2946      not contain any explicit matches for \r or \n, and the newline option is CRLF
2947      or ANY or ANYCRLF, advance the match position by one more character. */
2948    
2949      if (current_subject[-1] == CHAR_CR &&
2950          current_subject < end_subject &&
2951          *current_subject == CHAR_NL &&
2952          (re->flags & PCRE_HASCRORLF) == 0 &&
2953            (md->nltype == NLTYPE_ANY ||
2954             md->nltype == NLTYPE_ANYCRLF ||
2955             md->nllen == 2))
2956        current_subject++;
2957    
2958      }   /* "Bumpalong" loop */
2959    
2960  return PCRE_ERROR_NOMATCH;  return PCRE_ERROR_NOMATCH;
2961  }  }

Legend:
Removed from v.85  
changed lines
  Added in v.406

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12