/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 305 by ph10, Sun Jan 20 20:07:32 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 39  POSSIBILITY OF SUCH DAMAGE.
39    
40    
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a DFA algorithm. This is NOT Perl-  alternative matching function that uses a sort of DFA algorithm (not a true
43  compatible, but it has advantages in certain applications. */  FSM). This is NOT Perl- compatible, but it has advantages in certain
44    applications. */
45    
46    
47    #ifdef HAVE_CONFIG_H
48    #include "config.h"
49    #endif
50    
51    #define NLBLOCK md             /* Block containing newline information */
52    #define PSSTART start_subject  /* Field containing processed string start */
53    #define PSEND   end_subject    /* Field containing processed string end */
54    
55  #include "pcre_internal.h"  #include "pcre_internal.h"
56    
57    
# Line 57  compatible, but it has advantages in cer Line 66  compatible, but it has advantages in cer
66  *************************************************/  *************************************************/
67    
68  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
69  into others, under special conditions. A gap of 10 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
70  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
71    never stored, so we push them well clear of the normal opcodes. */
72  #define OP_PROP_EXTRA    (EXTRACT_BASIC_MAX+1)  
73  #define OP_EXTUNI_EXTRA  (EXTRACT_BASIC_MAX+11)  #define OP_PROP_EXTRA       300
74    #define OP_EXTUNI_EXTRA     320
75    #define OP_ANYNL_EXTRA      340
76    #define OP_HSPACE_EXTRA     360
77    #define OP_VSPACE_EXTRA     380
78    
79    
80  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
81  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
82  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
83  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
84  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
85    that follow must also be modified. */
86    
87  static uschar coptable[] = {  static uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90      0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91    0, 0,                          /* Any, Anybyte                           */    0, 0,                          /* Any, Anybyte                           */
92    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
95    1,                             /* Char                                   */    1,                             /* Char                                   */
96    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 82  static uschar coptable[] = { Line 98  static uschar coptable[] = {
98    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
99    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
100    3, 3, 3,                       /* upto, minupto, exact                   */    3, 3, 3,                       /* upto, minupto, exact                   */
101      1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
102    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
103    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
104    3, 3, 3,                       /* NOT upto, minupto, exact               */    3, 3, 3,                       /* NOT upto, minupto, exact               */
105      1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
106    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
107    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
108    3, 3, 3,                       /* Type upto, minupto, exact              */    3, 3, 3,                       /* Type upto, minupto, exact              */
109      1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
110    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
111    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
112    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 106  static uschar coptable[] = { Line 125  static uschar coptable[] = {
125    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
126    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
127    0,                             /* Reverse                                */    0,                             /* Reverse                                */
128    0,                             /* Once                                   */    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
129    0,                             /* COND                                   */    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131      0,                             /* RREF                                   */
132      0,                             /* DEF                                    */
133    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134    0,                             /* BRANUMBER                              */    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135    0                              /* BRA                                    */    0, 0                           /* FAIL, ACCEPT                           */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static uschar toptable1[] = {
142    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
145    ctype_word,  ctype_word,    ctype_word,  ctype_word,
# Line 126  static uschar toptable1[] = { Line 147  static uschar toptable1[] = {
147  };  };
148    
149  static uschar toptable2[] = {  static uschar toptable2[] = {
150    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
153    ctype_word,  0,    ctype_word,  0,
# Line 277  stateblock *next_active_state, *next_new Line 298  stateblock *next_active_state, *next_new
298    
299  const uschar *ctypes, *lcc, *fcc;  const uschar *ctypes, *lcc, *fcc;
300  const uschar *ptr;  const uschar *ptr;
301  const uschar *end_code;  const uschar *end_code, *first_op;
302    
303  int active_count, new_count, match_count;  int active_count, new_count, match_count;
304    
# Line 288  const uschar *start_subject = md->start_ Line 309  const uschar *start_subject = md->start_
309  const uschar *end_subject = md->end_subject;  const uschar *end_subject = md->end_subject;
310  const uschar *start_code = md->start_code;  const uschar *start_code = md->start_code;
311    
312    #ifdef SUPPORT_UTF8
313  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
314    #else
315    BOOL utf8 = FALSE;
316    #endif
317    
318  rlevel++;  rlevel++;
319  offsetcount &= (-2);  offsetcount &= (-2);
# Line 311  active_states = (stateblock *)(workspace Line 336  active_states = (stateblock *)(workspace
336  next_new_state = new_states = active_states + wscount;  next_new_state = new_states = active_states + wscount;
337  new_count = 0;  new_count = 0;
338    
339    first_op = this_start_code + 1 + LINK_SIZE +
340      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
341    
342  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
343  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
344  makes is possible to use this function recursively, when we want to stop at a  makes is possible to use this function recursively, when we want to stop at a
# Line 320  If the first opcode in the first alterna Line 348  If the first opcode in the first alterna
348  a backward assertion. In that case, we have to find out the maximum amount to  a backward assertion. In that case, we have to find out the maximum amount to
349  move back, and set up each alternative appropriately. */  move back, and set up each alternative appropriately. */
350    
351  if (this_start_code[1+LINK_SIZE] == OP_REVERSE)  if (*first_op == OP_REVERSE)
352    {    {
353    int max_back = 0;    int max_back = 0;
354    int gone_back;    int gone_back;
# Line 402  else Line 430  else
430    
431    else    else
432      {      {
433        int length = 1 + LINK_SIZE +
434          ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
435      do      do
436        {        {
437        ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);        ADD_NEW(end_code - start_code + length, 0);
438        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
439          length = 1 + LINK_SIZE;
440        }        }
441      while (*end_code == OP_ALT);      while (*end_code == OP_ALT);
442      }      }
# Line 421  ptr = current_subject; Line 452  ptr = current_subject;
452  for (;;)  for (;;)
453    {    {
454    int i, j;    int i, j;
455    int c, d, clen, dlen;    int clen, dlen;
456      unsigned int c, d;
457    
458    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
459    new state list. */    new state list. */
# Line 457  for (;;) Line 489  for (;;)
489    
490    if (ptr < end_subject)    if (ptr < end_subject)
491      {      {
492      clen = 1;      clen = 1;        /* Number of bytes in the character */
493  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
494      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf8) { GETCHARLEN(c, ptr, clen); } else
495  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
# Line 465  for (;;) Line 497  for (;;)
497      }      }
498    else    else
499      {      {
500      clen = 0;    /* At end subject */      clen = 0;        /* This indicates the end of the subject */
501      c = -1;      c = NOTACHAR;    /* This value should never actually be used */
502      }      }
503    
504    /* Scan up the active states and act on each one. The result of an action    /* Scan up the active states and act on each one. The result of an action
# Line 480  for (;;) Line 512  for (;;)
512      const uschar *code;      const uschar *code;
513      int state_offset = current_state->offset;      int state_offset = current_state->offset;
514      int count, codevalue;      int count, codevalue;
515      int chartype, othercase;  #ifdef SUPPORT_UCP
516        int chartype, script;
517    #endif
518    
519  #ifdef DEBUG  #ifdef DEBUG
520      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
521      if (c < 0) printf("-1\n");      if (clen == 0) printf("EOL\n");
522        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
523          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
524  #endif  #endif
# Line 528  for (;;) Line 562  for (;;)
562    
563      code = start_code + state_offset;      code = start_code + state_offset;
564      codevalue = *code;      codevalue = *code;
     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */  
565    
566      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
567      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
# Line 536  for (;;) Line 569  for (;;)
569      permitted.      permitted.
570    
571      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
572      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
573      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
574      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
575      opcodes. */      */
576    
577      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
578        {        {
# Line 550  for (;;) Line 583  for (;;)
583        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
584        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
585          {          {
586          if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;          switch(d)
587          if (d >= OP_NOTPROP)            {
588            codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;            case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
589              case OP_NOTPROP:
590              case OP_PROP: codevalue += OP_PROP_EXTRA; break;
591              case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
592              case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
593              case OP_NOT_HSPACE:
594              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
595              case OP_NOT_VSPACE:
596              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
597              default: break;
598              }
599          }          }
600        }        }
601      else      else
602        {        {
603        dlen = 0;         /* Not strictly necessary, but compilers moan */        dlen = 0;         /* Not strictly necessary, but compilers moan */
604        d = -1;           /* if these variables are not set. */        d = NOTACHAR;     /* if these variables are not set. */
605        }        }
606    
607    
# Line 620  for (;;) Line 663  for (;;)
663    
664        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
665        case OP_BRA:        case OP_BRA:
666          case OP_SBRA:
667        do        do
668          {          {
669          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
# Line 629  for (;;) Line 673  for (;;)
673        break;        break;
674    
675        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
676          case OP_CBRA:
677          case OP_SCBRA:
678          ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
679          code += GET(code, 1);
680          while (*code == OP_ALT)
681            {
682            ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
683            code += GET(code, 1);
684            }
685          break;
686    
687          /*-----------------------------------------------------------------*/
688        case OP_BRAZERO:        case OP_BRAZERO:
689        case OP_BRAMINZERO:        case OP_BRAMINZERO:
690        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
# Line 638  for (;;) Line 694  for (;;)
694        break;        break;
695    
696        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
       case OP_BRANUMBER:  
       ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);  
       break;  
   
       /*-----------------------------------------------------------------*/  
697        case OP_CIRC:        case OP_CIRC:
698        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
699            ((ims & PCRE_MULTILINE) != 0 && ptr[-1] == NEWLINE))            ((ims & PCRE_MULTILINE) != 0 &&
700                ptr != end_subject &&
701                WAS_NEWLINE(ptr)))
702          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
703        break;        break;
704    
# Line 679  for (;;) Line 732  for (;;)
732    
733        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
734        case OP_ANY:        case OP_ANY:
735        if (clen > 0 && (c != NEWLINE || (ims & PCRE_DOTALL) != 0))        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))
736          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
737        break;        break;
738    
739        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
740        case OP_EODN:        case OP_EODN:
741        if (clen == 0 || (c == NEWLINE && ptr + 1 == end_subject))        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
742          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
743        break;        break;
744    
# Line 693  for (;;) Line 746  for (;;)
746        case OP_DOLL:        case OP_DOLL:
747        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
748          {          {
749          if (clen == 0 || (c == NEWLINE && (ptr + 1 == end_subject ||          if (clen == 0 ||
750                                  (ims & PCRE_MULTILINE) != 0)))              (IS_NEWLINE(ptr) &&
751                   ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
752                ))
753            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
754          }          }
755        else if (c == NEWLINE && (ims & PCRE_MULTILINE) != 0)        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
756          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
757        break;        break;
758    
# Line 746  for (;;) Line 801  for (;;)
801        break;        break;
802    
803    
 #ifdef SUPPORT_UCP  
   
804        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
805        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
806        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
807        */        */
808    
809    #ifdef SUPPORT_UCP
810        case OP_PROP:        case OP_PROP:
811        case OP_NOTPROP:        case OP_NOTPROP:
812        if (clen > 0)        if (clen > 0)
813          {          {
814          int rqdtype, category;          BOOL OK;
815          category = ucp_findchar(c, &chartype, &othercase);          int category = _pcre_ucp_findprop(c, &chartype, &script);
816          rqdtype = code[1];          switch(code[1])
         if (rqdtype >= 128)  
           {  
           if ((rqdtype - 128 == category) == (codevalue == OP_PROP))  
             { ADD_NEW(state_offset + 2, 0); }  
           }  
         else  
817            {            {
818            if ((rqdtype == chartype) == (codevalue == OP_PROP))            case PT_ANY:
819              { ADD_NEW(state_offset + 2, 0); }            OK = TRUE;
820              break;
821    
822              case PT_LAMP:
823              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
824              break;
825    
826              case PT_GC:
827              OK = category == code[2];
828              break;
829    
830              case PT_PC:
831              OK = chartype == code[2];
832              break;
833    
834              case PT_SC:
835              OK = script == code[2];
836              break;
837    
838              /* Should never occur, but keep compilers from grumbling. */
839    
840              default:
841              OK = codevalue != OP_PROP;
842              break;
843            }            }
844    
845            if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
846          }          }
847        break;        break;
848  #endif  #endif
# Line 784  for (;;) Line 857  for (;;)
857    
858        case OP_TYPEPLUS:        case OP_TYPEPLUS:
859        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
860          case OP_TYPEPOSPLUS:
861        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
862        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
863        if (clen > 0)        if (clen > 0)
864          {          {
865          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
866              (c < 256 &&              (c < 256 &&
867                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
868                   (ims & PCRE_DOTALL) != 0 ||
869                   !IS_NEWLINE(ptr)
870                  ) &&
871                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
872            {            {
873              if (count > 0 && codevalue == OP_TYPEPOSPLUS)
874                {
875                active_count--;            /* Remove non-match possibility */
876                next_active_state--;
877                }
878            count++;            count++;
879            ADD_NEW(state_offset, count);            ADD_NEW(state_offset, count);
880            }            }
# Line 802  for (;;) Line 884  for (;;)
884        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
885        case OP_TYPEQUERY:        case OP_TYPEQUERY:
886        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
887          case OP_TYPEPOSQUERY:
888        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
889        if (clen > 0)        if (clen > 0)
890          {          {
891          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
892              (c < 256 &&              (c < 256 &&
893                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
894                   (ims & PCRE_DOTALL) != 0 ||
895                   !IS_NEWLINE(ptr)
896                  ) &&
897                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
898            {            {
899              if (codevalue == OP_TYPEPOSQUERY)
900                {
901                active_count--;            /* Remove non-match possibility */
902                next_active_state--;
903                }
904            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
905            }            }
906          }          }
# Line 818  for (;;) Line 909  for (;;)
909        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
910        case OP_TYPESTAR:        case OP_TYPESTAR:
911        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
912          case OP_TYPEPOSSTAR:
913        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
914        if (clen > 0)        if (clen > 0)
915          {          {
916          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
917              (c < 256 &&              (c < 256 &&
918                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
919                   (ims & PCRE_DOTALL) != 0 ||
920                   !IS_NEWLINE(ptr)
921                  ) &&
922                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
923            {            {
924              if (codevalue == OP_TYPEPOSSTAR)
925                {
926                active_count--;            /* Remove non-match possibility */
927                next_active_state--;
928                }
929            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
930            }            }
931          }          }
# Line 833  for (;;) Line 933  for (;;)
933    
934        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
935        case OP_TYPEEXACT:        case OP_TYPEEXACT:
936          count = current_state->count;  /* Number already matched */
937          if (clen > 0)
938            {
939            if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
940                (c < 256 &&
941                  (d != OP_ANY ||
942                   (ims & PCRE_DOTALL) != 0 ||
943                   !IS_NEWLINE(ptr)
944                  ) &&
945                  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
946              {
947              if (++count >= GET2(code, 1))
948                { ADD_NEW(state_offset + 4, 0); }
949              else
950                { ADD_NEW(state_offset, count); }
951              }
952            }
953          break;
954    
955          /*-----------------------------------------------------------------*/
956        case OP_TYPEUPTO:        case OP_TYPEUPTO:
957        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
958        if (codevalue != OP_TYPEEXACT)        case OP_TYPEPOSUPTO:
959          { ADD_ACTIVE(state_offset + 4, 0); }        ADD_ACTIVE(state_offset + 4, 0);
960        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
961        if (clen > 0)        if (clen > 0)
962          {          {
963          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
964              (c < 256 &&              (c < 256 &&
965                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
966                   (ims & PCRE_DOTALL) != 0 ||
967                   !IS_NEWLINE(ptr)
968                  ) &&
969                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
970            {            {
971              if (codevalue == OP_TYPEPOSUPTO)
972                {
973                active_count--;           /* Remove non-match possibility */
974                next_active_state--;
975                }
976            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
977              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 4, 0); }
978            else            else
# Line 855  for (;;) Line 983  for (;;)
983    
984  /* ========================================================================== */  /* ========================================================================== */
985        /* These are virtual opcodes that are used when something like        /* These are virtual opcodes that are used when something like
986        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
987        keeps the code above fast for the other cases. The argument is in the        argument. It keeps the code above fast for the other cases. The argument
988        d variable. */        is in the d variable. */
989    
990    #ifdef SUPPORT_UCP
991        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
992        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
993          case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
994        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
995        if (count > 0) { ADD_ACTIVE(state_offset + 3, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
996        if (clen > 0)        if (clen > 0)
997          {          {
998          int category = ucp_findchar(c, &chartype, &othercase);          BOOL OK;
999          int rqdtype = code[2];          int category = _pcre_ucp_findprop(c, &chartype, &script);
1000          if ((d == OP_PROP) ==          switch(code[2])
1001              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
1002            { count++; ADD_NEW(state_offset, count); }            case PT_ANY:
1003              OK = TRUE;
1004              break;
1005    
1006              case PT_LAMP:
1007              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1008              break;
1009    
1010              case PT_GC:
1011              OK = category == code[3];
1012              break;
1013    
1014              case PT_PC:
1015              OK = chartype == code[3];
1016              break;
1017    
1018              case PT_SC:
1019              OK = script == code[3];
1020              break;
1021    
1022              /* Should never occur, but keep compilers from grumbling. */
1023    
1024              default:
1025              OK = codevalue != OP_PROP;
1026              break;
1027              }
1028    
1029            if (OK == (d == OP_PROP))
1030              {
1031              if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1032                {
1033                active_count--;           /* Remove non-match possibility */
1034                next_active_state--;
1035                }
1036              count++;
1037              ADD_NEW(state_offset, count);
1038              }
1039          }          }
1040        break;        break;
1041    
1042        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1043        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1044        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1045          case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1046        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1047        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1048        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1049          {          {
1050          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1051          int ncount = 0;          int ncount = 0;
1052            if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1053              {
1054              active_count--;           /* Remove non-match possibility */
1055              next_active_state--;
1056              }
1057          while (nptr < end_subject)          while (nptr < end_subject)
1058            {            {
1059            int nd;            int nd;
1060            int ndlen = 1;            int ndlen = 1;
1061            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1062            if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1063            ncount++;            ncount++;
1064            nptr += ndlen;            nptr += ndlen;
1065            }            }
# Line 895  for (;;) Line 1067  for (;;)
1067          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1068          }          }
1069        break;        break;
1070    #endif
1071    
1072        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1073          case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1074          case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1075          case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1076          count = current_state->count;  /* Already matched */
1077          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1078          if (clen > 0)
1079            {
1080            int ncount = 0;
1081            switch (c)
1082              {
1083              case 0x000b:
1084              case 0x000c:
1085              case 0x0085:
1086              case 0x2028:
1087              case 0x2029:
1088              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1089              goto ANYNL01;
1090    
1091              case 0x000d:
1092              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1093              /* Fall through */
1094    
1095              ANYNL01:
1096              case 0x000a:
1097              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1098                {
1099                active_count--;           /* Remove non-match possibility */
1100                next_active_state--;
1101                }
1102              count++;
1103              ADD_NEW_DATA(-state_offset, count, ncount);
1104              break;
1105    
1106              default:
1107              break;
1108              }
1109            }
1110          break;
1111    
1112          /*-----------------------------------------------------------------*/
1113          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1114          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1115          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1116          count = current_state->count;  /* Already matched */
1117          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1118          if (clen > 0)
1119            {
1120            BOOL OK;
1121            switch (c)
1122              {
1123              case 0x000a:
1124              case 0x000b:
1125              case 0x000c:
1126              case 0x000d:
1127              case 0x0085:
1128              case 0x2028:
1129              case 0x2029:
1130              OK = TRUE;
1131              break;
1132    
1133              default:
1134              OK = FALSE;
1135              break;
1136              }
1137    
1138            if (OK == (d == OP_VSPACE))
1139              {
1140              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1141                {
1142                active_count--;           /* Remove non-match possibility */
1143                next_active_state--;
1144                }
1145              count++;
1146              ADD_NEW_DATA(-state_offset, count, 0);
1147              }
1148            }
1149          break;
1150    
1151          /*-----------------------------------------------------------------*/
1152          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1153          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1154          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1155          count = current_state->count;  /* Already matched */
1156          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1157          if (clen > 0)
1158            {
1159            BOOL OK;
1160            switch (c)
1161              {
1162              case 0x09:      /* HT */
1163              case 0x20:      /* SPACE */
1164              case 0xa0:      /* NBSP */
1165              case 0x1680:    /* OGHAM SPACE MARK */
1166              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1167              case 0x2000:    /* EN QUAD */
1168              case 0x2001:    /* EM QUAD */
1169              case 0x2002:    /* EN SPACE */
1170              case 0x2003:    /* EM SPACE */
1171              case 0x2004:    /* THREE-PER-EM SPACE */
1172              case 0x2005:    /* FOUR-PER-EM SPACE */
1173              case 0x2006:    /* SIX-PER-EM SPACE */
1174              case 0x2007:    /* FIGURE SPACE */
1175              case 0x2008:    /* PUNCTUATION SPACE */
1176              case 0x2009:    /* THIN SPACE */
1177              case 0x200A:    /* HAIR SPACE */
1178              case 0x202f:    /* NARROW NO-BREAK SPACE */
1179              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1180              case 0x3000:    /* IDEOGRAPHIC SPACE */
1181              OK = TRUE;
1182              break;
1183    
1184              default:
1185              OK = FALSE;
1186              break;
1187              }
1188    
1189            if (OK == (d == OP_HSPACE))
1190              {
1191              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1192                {
1193                active_count--;           /* Remove non-match possibility */
1194                next_active_state--;
1195                }
1196              count++;
1197              ADD_NEW_DATA(-state_offset, count, 0);
1198              }
1199            }
1200          break;
1201    
1202          /*-----------------------------------------------------------------*/
1203    #ifdef SUPPORT_UCP
1204        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1205        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1206        count = 3;        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1207          count = 4;
1208        goto QS1;        goto QS1;
1209    
1210        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
1211        case OP_PROP_EXTRA + OP_TYPEMINSTAR:        case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1212          case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1213        count = 0;        count = 0;
1214    
1215        QS1:        QS1:
1216    
1217        ADD_ACTIVE(state_offset + 3, 0);        ADD_ACTIVE(state_offset + 4, 0);
1218        if (clen > 0)        if (clen > 0)
1219          {          {
1220          int category = ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1221          int rqdtype = code[2];          int category = _pcre_ucp_findprop(c, &chartype, &script);
1222          if ((d == OP_PROP) ==          switch(code[2])
1223              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
1224            { ADD_NEW(state_offset + count, 0); }            case PT_ANY:
1225              OK = TRUE;
1226              break;
1227    
1228              case PT_LAMP:
1229              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1230              break;
1231    
1232              case PT_GC:
1233              OK = category == code[3];
1234              break;
1235    
1236              case PT_PC:
1237              OK = chartype == code[3];
1238              break;
1239    
1240              case PT_SC:
1241              OK = script == code[3];
1242              break;
1243    
1244              /* Should never occur, but keep compilers from grumbling. */
1245    
1246              default:
1247              OK = codevalue != OP_PROP;
1248              break;
1249              }
1250    
1251            if (OK == (d == OP_PROP))
1252              {
1253              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1254                  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1255                {
1256                active_count--;           /* Remove non-match possibility */
1257                next_active_state--;
1258                }
1259              ADD_NEW(state_offset + count, 0);
1260              }
1261          }          }
1262        break;        break;
1263    
1264        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1265        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1266        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1267          case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1268        count = 2;        count = 2;
1269        goto QS2;        goto QS2;
1270    
1271        case OP_EXTUNI_EXTRA + OP_TYPESTAR:        case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1272        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1273          case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1274        count = 0;        count = 0;
1275    
1276        QS2:        QS2:
1277    
1278        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1279        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1280          {          {
1281          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1282          int ncount = 0;          int ncount = 0;
1283            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1284                codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1285              {
1286              active_count--;           /* Remove non-match possibility */
1287              next_active_state--;
1288              }
1289          while (nptr < end_subject)          while (nptr < end_subject)
1290            {            {
1291            int nd;            int nd;
1292            int ndlen = 1;            int ndlen = 1;
1293            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1294            if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1295            ncount++;            ncount++;
1296            nptr += ndlen;            nptr += ndlen;
1297            }            }
1298          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1299          }          }
1300        break;        break;
1301    #endif
1302    
1303          /*-----------------------------------------------------------------*/
1304          case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1305          case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1306          case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1307          count = 2;
1308          goto QS3;
1309    
1310          case OP_ANYNL_EXTRA + OP_TYPESTAR:
1311          case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1312          case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1313          count = 0;
1314    
1315          QS3:
1316          ADD_ACTIVE(state_offset + 2, 0);
1317          if (clen > 0)
1318            {
1319            int ncount = 0;
1320            switch (c)
1321              {
1322              case 0x000b:
1323              case 0x000c:
1324              case 0x0085:
1325              case 0x2028:
1326              case 0x2029:
1327              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1328              goto ANYNL02;
1329    
1330              case 0x000d:
1331              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1332              /* Fall through */
1333    
1334              ANYNL02:
1335              case 0x000a:
1336              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1337                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1338                {
1339                active_count--;           /* Remove non-match possibility */
1340                next_active_state--;
1341                }
1342              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1343              break;
1344    
1345              default:
1346              break;
1347              }
1348            }
1349          break;
1350    
1351          /*-----------------------------------------------------------------*/
1352          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1353          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1354          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1355          count = 2;
1356          goto QS4;
1357    
1358          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1359          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1360          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1361          count = 0;
1362    
1363          QS4:
1364          ADD_ACTIVE(state_offset + 2, 0);
1365          if (clen > 0)
1366            {
1367            BOOL OK;
1368            switch (c)
1369              {
1370              case 0x000a:
1371              case 0x000b:
1372              case 0x000c:
1373              case 0x000d:
1374              case 0x0085:
1375              case 0x2028:
1376              case 0x2029:
1377              OK = TRUE;
1378              break;
1379    
1380              default:
1381              OK = FALSE;
1382              break;
1383              }
1384            if (OK == (d == OP_VSPACE))
1385              {
1386              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1387                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1388                {
1389                active_count--;           /* Remove non-match possibility */
1390                next_active_state--;
1391                }
1392              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1393              }
1394            }
1395          break;
1396    
1397        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1398          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1399          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1400          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1401          count = 2;
1402          goto QS5;
1403    
1404          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1405          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1406          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1407          count = 0;
1408    
1409          QS5:
1410          ADD_ACTIVE(state_offset + 2, 0);
1411          if (clen > 0)
1412            {
1413            BOOL OK;
1414            switch (c)
1415              {
1416              case 0x09:      /* HT */
1417              case 0x20:      /* SPACE */
1418              case 0xa0:      /* NBSP */
1419              case 0x1680:    /* OGHAM SPACE MARK */
1420              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1421              case 0x2000:    /* EN QUAD */
1422              case 0x2001:    /* EM QUAD */
1423              case 0x2002:    /* EN SPACE */
1424              case 0x2003:    /* EM SPACE */
1425              case 0x2004:    /* THREE-PER-EM SPACE */
1426              case 0x2005:    /* FOUR-PER-EM SPACE */
1427              case 0x2006:    /* SIX-PER-EM SPACE */
1428              case 0x2007:    /* FIGURE SPACE */
1429              case 0x2008:    /* PUNCTUATION SPACE */
1430              case 0x2009:    /* THIN SPACE */
1431              case 0x200A:    /* HAIR SPACE */
1432              case 0x202f:    /* NARROW NO-BREAK SPACE */
1433              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1434              case 0x3000:    /* IDEOGRAPHIC SPACE */
1435              OK = TRUE;
1436              break;
1437    
1438              default:
1439              OK = FALSE;
1440              break;
1441              }
1442    
1443            if (OK == (d == OP_HSPACE))
1444              {
1445              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1446                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1447                {
1448                active_count--;           /* Remove non-match possibility */
1449                next_active_state--;
1450                }
1451              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1452              }
1453            }
1454          break;
1455    
1456          /*-----------------------------------------------------------------*/
1457    #ifdef SUPPORT_UCP
1458        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1459        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1460        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1461          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1462        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1463          { ADD_ACTIVE(state_offset + 5, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1464        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1465        if (clen > 0)        if (clen > 0)
1466          {          {
1467          int category = ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1468          int rqdtype = code[4];          int category = _pcre_ucp_findprop(c, &chartype, &script);
1469          if ((d == OP_PROP) ==          switch(code[4])
             (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))  
1470            {            {
1471              case PT_ANY:
1472              OK = TRUE;
1473              break;
1474    
1475              case PT_LAMP:
1476              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1477              break;
1478    
1479              case PT_GC:
1480              OK = category == code[5];
1481              break;
1482    
1483              case PT_PC:
1484              OK = chartype == code[5];
1485              break;
1486    
1487              case PT_SC:
1488              OK = script == code[5];
1489              break;
1490    
1491              /* Should never occur, but keep compilers from grumbling. */
1492    
1493              default:
1494              OK = codevalue != OP_PROP;
1495              break;
1496              }
1497    
1498            if (OK == (d == OP_PROP))
1499              {
1500              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1501                {
1502                active_count--;           /* Remove non-match possibility */
1503                next_active_state--;
1504                }
1505            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1506              { ADD_NEW(state_offset + 5, 0); }              { ADD_NEW(state_offset + 6, 0); }
1507            else            else
1508              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1509            }            }
# Line 975  for (;;) Line 1514  for (;;)
1514        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1515        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1516        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1517          case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1518        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1519          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1520        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1521        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1522          {          {
1523          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1524          int ncount = 0;          int ncount = 0;
1525            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1526              {
1527              active_count--;           /* Remove non-match possibility */
1528              next_active_state--;
1529              }
1530          while (nptr < end_subject)          while (nptr < end_subject)
1531            {            {
1532            int nd;            int nd;
1533            int ndlen = 1;            int ndlen = 1;
1534            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1535            if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1536            ncount++;            ncount++;
1537            nptr += ndlen;            nptr += ndlen;
1538            }            }
# Line 997  for (;;) Line 1542  for (;;)
1542            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1543          }          }
1544        break;        break;
1545    #endif
1546    
1547          /*-----------------------------------------------------------------*/
1548          case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1549          case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1550          case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1551          case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1552          if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1553            { ADD_ACTIVE(state_offset + 4, 0); }
1554          count = current_state->count;  /* Number already matched */
1555          if (clen > 0)
1556            {
1557            int ncount = 0;
1558            switch (c)
1559              {
1560              case 0x000b:
1561              case 0x000c:
1562              case 0x0085:
1563              case 0x2028:
1564              case 0x2029:
1565              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1566              goto ANYNL03;
1567    
1568              case 0x000d:
1569              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1570              /* Fall through */
1571    
1572              ANYNL03:
1573              case 0x000a:
1574              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1575                {
1576                active_count--;           /* Remove non-match possibility */
1577                next_active_state--;
1578                }
1579              if (++count >= GET2(code, 1))
1580                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1581              else
1582                { ADD_NEW_DATA(-state_offset, count, ncount); }
1583              break;
1584    
1585              default:
1586              break;
1587              }
1588            }
1589          break;
1590    
1591          /*-----------------------------------------------------------------*/
1592          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1593          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1594          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1595          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1596          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1597            { ADD_ACTIVE(state_offset + 4, 0); }
1598          count = current_state->count;  /* Number already matched */
1599          if (clen > 0)
1600            {
1601            BOOL OK;
1602            switch (c)
1603              {
1604              case 0x000a:
1605              case 0x000b:
1606              case 0x000c:
1607              case 0x000d:
1608              case 0x0085:
1609              case 0x2028:
1610              case 0x2029:
1611              OK = TRUE;
1612              break;
1613    
1614              default:
1615              OK = FALSE;
1616              }
1617    
1618            if (OK == (d == OP_VSPACE))
1619              {
1620              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1621                {
1622                active_count--;           /* Remove non-match possibility */
1623                next_active_state--;
1624                }
1625              if (++count >= GET2(code, 1))
1626                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1627              else
1628                { ADD_NEW_DATA(-state_offset, count, 0); }
1629              }
1630            }
1631          break;
1632    
1633          /*-----------------------------------------------------------------*/
1634          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1635          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1636          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1637          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1638          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1639            { ADD_ACTIVE(state_offset + 4, 0); }
1640          count = current_state->count;  /* Number already matched */
1641          if (clen > 0)
1642            {
1643            BOOL OK;
1644            switch (c)
1645              {
1646              case 0x09:      /* HT */
1647              case 0x20:      /* SPACE */
1648              case 0xa0:      /* NBSP */
1649              case 0x1680:    /* OGHAM SPACE MARK */
1650              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1651              case 0x2000:    /* EN QUAD */
1652              case 0x2001:    /* EM QUAD */
1653              case 0x2002:    /* EN SPACE */
1654              case 0x2003:    /* EM SPACE */
1655              case 0x2004:    /* THREE-PER-EM SPACE */
1656              case 0x2005:    /* FOUR-PER-EM SPACE */
1657              case 0x2006:    /* SIX-PER-EM SPACE */
1658              case 0x2007:    /* FIGURE SPACE */
1659              case 0x2008:    /* PUNCTUATION SPACE */
1660              case 0x2009:    /* THIN SPACE */
1661              case 0x200A:    /* HAIR SPACE */
1662              case 0x202f:    /* NARROW NO-BREAK SPACE */
1663              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1664              case 0x3000:    /* IDEOGRAPHIC SPACE */
1665              OK = TRUE;
1666              break;
1667    
1668              default:
1669              OK = FALSE;
1670              break;
1671              }
1672    
1673            if (OK == (d == OP_HSPACE))
1674              {
1675              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1676                {
1677                active_count--;           /* Remove non-match possibility */
1678                next_active_state--;
1679                }
1680              if (++count >= GET2(code, 1))
1681                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1682              else
1683                { ADD_NEW_DATA(-state_offset, count, 0); }
1684              }
1685            }
1686          break;
1687    
1688  /* ========================================================================== */  /* ========================================================================== */
1689        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
# Line 1018  for (;;) Line 1705  for (;;)
1705          {          {
1706          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1707            {            {
1708              unsigned int othercase;
1709            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1710    
1711            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
1712            other case of the character, if there is one. The result of            other case of the character. */
           ucp_findchar() is < 0 if the char isn't found, and othercase is  
           returned as zero if there isn't another case. */  
1713    
1714  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1715            if (ucp_findchar(c, &chartype, &othercase) < 0)            othercase = _pcre_ucp_othercase(c);
1716    #else
1717              othercase = NOTACHAR;
1718  #endif  #endif
             othercase = -1;  
1719    
1720            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
1721            }            }
# Line 1050  for (;;) Line 1737  for (;;)
1737        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
1738    
1739        case OP_EXTUNI:        case OP_EXTUNI:
1740        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1741          {          {
1742          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1743          int ncount = 0;          int ncount = 0;
# Line 1058  for (;;) Line 1745  for (;;)
1745            {            {
1746            int nclen = 1;            int nclen = 1;
1747            GETCHARLEN(c, nptr, nclen);            GETCHARLEN(c, nptr, nclen);
1748            if (ucp_findchar(c, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
1749            ncount++;            ncount++;
1750            nptr += nclen;            nptr += nclen;
1751            }            }
# Line 1068  for (;;) Line 1755  for (;;)
1755  #endif  #endif
1756    
1757        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1758          /* This is a tricky like EXTUNI because it too can match more than one
1759          character (when CR is followed by LF). In this case, set up a negative
1760          state to wait for one character to pass before continuing. */
1761    
1762          case OP_ANYNL:
1763          if (clen > 0) switch(c)
1764            {
1765            case 0x000b:
1766            case 0x000c:
1767            case 0x0085:
1768            case 0x2028:
1769            case 0x2029:
1770            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1771    
1772            case 0x000a:
1773            ADD_NEW(state_offset + 1, 0);
1774            break;
1775    
1776            case 0x000d:
1777            if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1778              {
1779              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1780              }
1781            else
1782              {
1783              ADD_NEW(state_offset + 1, 0);
1784              }
1785            break;
1786            }
1787          break;
1788    
1789          /*-----------------------------------------------------------------*/
1790          case OP_NOT_VSPACE:
1791          if (clen > 0) switch(c)
1792            {
1793            case 0x000a:
1794            case 0x000b:
1795            case 0x000c:
1796            case 0x000d:
1797            case 0x0085:
1798            case 0x2028:
1799            case 0x2029:
1800            break;
1801    
1802            default:
1803            ADD_NEW(state_offset + 1, 0);
1804            break;
1805            }
1806          break;
1807    
1808          /*-----------------------------------------------------------------*/
1809          case OP_VSPACE:
1810          if (clen > 0) switch(c)
1811            {
1812            case 0x000a:
1813            case 0x000b:
1814            case 0x000c:
1815            case 0x000d:
1816            case 0x0085:
1817            case 0x2028:
1818            case 0x2029:
1819            ADD_NEW(state_offset + 1, 0);
1820            break;
1821    
1822            default: break;
1823            }
1824          break;
1825    
1826          /*-----------------------------------------------------------------*/
1827          case OP_NOT_HSPACE:
1828          if (clen > 0) switch(c)
1829            {
1830            case 0x09:      /* HT */
1831            case 0x20:      /* SPACE */
1832            case 0xa0:      /* NBSP */
1833            case 0x1680:    /* OGHAM SPACE MARK */
1834            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1835            case 0x2000:    /* EN QUAD */
1836            case 0x2001:    /* EM QUAD */
1837            case 0x2002:    /* EN SPACE */
1838            case 0x2003:    /* EM SPACE */
1839            case 0x2004:    /* THREE-PER-EM SPACE */
1840            case 0x2005:    /* FOUR-PER-EM SPACE */
1841            case 0x2006:    /* SIX-PER-EM SPACE */
1842            case 0x2007:    /* FIGURE SPACE */
1843            case 0x2008:    /* PUNCTUATION SPACE */
1844            case 0x2009:    /* THIN SPACE */
1845            case 0x200A:    /* HAIR SPACE */
1846            case 0x202f:    /* NARROW NO-BREAK SPACE */
1847            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1848            case 0x3000:    /* IDEOGRAPHIC SPACE */
1849            break;
1850    
1851            default:
1852            ADD_NEW(state_offset + 1, 0);
1853            break;
1854            }
1855          break;
1856    
1857          /*-----------------------------------------------------------------*/
1858          case OP_HSPACE:
1859          if (clen > 0) switch(c)
1860            {
1861            case 0x09:      /* HT */
1862            case 0x20:      /* SPACE */
1863            case 0xa0:      /* NBSP */
1864            case 0x1680:    /* OGHAM SPACE MARK */
1865            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1866            case 0x2000:    /* EN QUAD */
1867            case 0x2001:    /* EM QUAD */
1868            case 0x2002:    /* EN SPACE */
1869            case 0x2003:    /* EM SPACE */
1870            case 0x2004:    /* THREE-PER-EM SPACE */
1871            case 0x2005:    /* FOUR-PER-EM SPACE */
1872            case 0x2006:    /* SIX-PER-EM SPACE */
1873            case 0x2007:    /* FIGURE SPACE */
1874            case 0x2008:    /* PUNCTUATION SPACE */
1875            case 0x2009:    /* THIN SPACE */
1876            case 0x200A:    /* HAIR SPACE */
1877            case 0x202f:    /* NARROW NO-BREAK SPACE */
1878            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1879            case 0x3000:    /* IDEOGRAPHIC SPACE */
1880            ADD_NEW(state_offset + 1, 0);
1881            break;
1882            }
1883          break;
1884    
1885          /*-----------------------------------------------------------------*/
1886        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1887        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1888        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1075  for (;;) Line 1890  for (;;)
1890        case OP_NOT:        case OP_NOT:
1891        if (clen > 0)        if (clen > 0)
1892          {          {
1893          int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1894          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1895          }          }
1896        break;        break;
# Line 1083  for (;;) Line 1898  for (;;)
1898        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1899        case OP_PLUS:        case OP_PLUS:
1900        case OP_MINPLUS:        case OP_MINPLUS:
1901          case OP_POSPLUS:
1902        case OP_NOTPLUS:        case OP_NOTPLUS:
1903        case OP_NOTMINPLUS:        case OP_NOTMINPLUS:
1904          case OP_NOTPOSPLUS:
1905        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1906        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1907        if (clen > 0)        if (clen > 0)
1908          {          {
1909          int otherd = -1;          unsigned int otherd = NOTACHAR;
1910          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1911            {            {
1912  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1913            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1914              {              {
1915  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1916              if (ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1917  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1918              }              }
1919            else            else
# Line 1104  for (;;) Line 1921  for (;;)
1921            otherd = fcc[d];            otherd = fcc[d];
1922            }            }
1923          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1924            { count++; ADD_NEW(state_offset, count); }            {
1925              if (count > 0 &&
1926                  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1927                {
1928                active_count--;             /* Remove non-match possibility */
1929                next_active_state--;
1930                }
1931              count++;
1932              ADD_NEW(state_offset, count);
1933              }
1934          }          }
1935        break;        break;
1936    
1937        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1938        case OP_QUERY:        case OP_QUERY:
1939        case OP_MINQUERY:        case OP_MINQUERY:
1940          case OP_POSQUERY:
1941        case OP_NOTQUERY:        case OP_NOTQUERY:
1942        case OP_NOTMINQUERY:        case OP_NOTMINQUERY:
1943          case OP_NOTPOSQUERY:
1944        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1945        if (clen > 0)        if (clen > 0)
1946          {          {
1947          int otherd = -1;          unsigned int otherd = NOTACHAR;
1948          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1949            {            {
1950  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1951            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1952              {              {
1953  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1954              if (ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1955  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1956              }              }
1957            else            else
# Line 1131  for (;;) Line 1959  for (;;)
1959            otherd = fcc[d];            otherd = fcc[d];
1960            }            }
1961          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1962            { ADD_NEW(state_offset + dlen + 1, 0); }            {
1963              if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
1964                {
1965                active_count--;            /* Remove non-match possibility */
1966                next_active_state--;
1967                }
1968              ADD_NEW(state_offset + dlen + 1, 0);
1969              }
1970          }          }
1971        break;        break;
1972    
1973        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1974        case OP_STAR:        case OP_STAR:
1975        case OP_MINSTAR:        case OP_MINSTAR:
1976          case OP_POSSTAR:
1977        case OP_NOTSTAR:        case OP_NOTSTAR:
1978        case OP_NOTMINSTAR:        case OP_NOTMINSTAR:
1979          case OP_NOTPOSSTAR:
1980        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1981        if (clen > 0)        if (clen > 0)
1982          {          {
1983          int otherd = -1;          unsigned int otherd = NOTACHAR;
1984          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1985            {            {
1986  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1987            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1988              {              {
1989  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1990              if (ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1991  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1992              }              }
1993            else            else
# Line 1158  for (;;) Line 1995  for (;;)
1995            otherd = fcc[d];            otherd = fcc[d];
1996            }            }
1997          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1998            { ADD_NEW(state_offset, 0); }            {
1999              if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
2000                {
2001                active_count--;            /* Remove non-match possibility */
2002                next_active_state--;
2003                }
2004              ADD_NEW(state_offset, 0);
2005              }
2006          }          }
2007        break;        break;
2008    
2009        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2010        case OP_EXACT:        case OP_EXACT:
2011          case OP_NOTEXACT:
2012          count = current_state->count;  /* Number already matched */
2013          if (clen > 0)
2014            {
2015            unsigned int otherd = NOTACHAR;
2016            if ((ims & PCRE_CASELESS) != 0)
2017              {
2018    #ifdef SUPPORT_UTF8
2019              if (utf8 && d >= 128)
2020                {
2021    #ifdef SUPPORT_UCP
2022                otherd = _pcre_ucp_othercase(d);
2023    #endif  /* SUPPORT_UCP */
2024                }
2025              else
2026    #endif  /* SUPPORT_UTF8 */
2027              otherd = fcc[d];
2028              }
2029            if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2030              {
2031              if (++count >= GET2(code, 1))
2032                { ADD_NEW(state_offset + dlen + 3, 0); }
2033              else
2034                { ADD_NEW(state_offset, count); }
2035              }
2036            }
2037          break;
2038    
2039          /*-----------------------------------------------------------------*/
2040        case OP_UPTO:        case OP_UPTO:
2041        case OP_MINUPTO:        case OP_MINUPTO:
2042        case OP_NOTEXACT:        case OP_POSUPTO:
2043        case OP_NOTUPTO:        case OP_NOTUPTO:
2044        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2045        if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)        case OP_NOTPOSUPTO:
2046          { ADD_ACTIVE(state_offset + dlen + 3, 0); }        ADD_ACTIVE(state_offset + dlen + 3, 0);
2047        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2048        if (clen > 0)        if (clen > 0)
2049          {          {
2050          int otherd = -1;          unsigned int otherd = NOTACHAR;
2051          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
2052            {            {
2053  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2054            if (utf8 && c >= 128)            if (utf8 && d >= 128)
2055              {              {
2056  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2057              if (ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
2058  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2059              }              }
2060            else            else
# Line 1190  for (;;) Line 2063  for (;;)
2063            }            }
2064          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2065            {            {
2066              if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2067                {
2068                active_count--;             /* Remove non-match possibility */
2069                next_active_state--;
2070                }
2071            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2072              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 3, 0); }
2073            else            else
# Line 1267  for (;;) Line 2145  for (;;)
2145              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 5, 0); }
2146            if (isinclass)            if (isinclass)
2147              {              {
2148              if (++count >= GET2(ecode, 3))              int max = GET2(ecode, 3);
2149                if (++count >= max && max != 0)   /* Max 0 => no limit */
2150                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 5, 0); }
2151              else              else
2152                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
# Line 1317  for (;;) Line 2196  for (;;)
2196    
2197        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2198        case OP_COND:        case OP_COND:
2199          case OP_SCOND:
2200          {          {
2201          int local_offsets[1000];          int local_offsets[1000];
2202          int local_workspace[1000];          int local_workspace[1000];
2203          int condcode = code[LINK_SIZE+1];          int condcode = code[LINK_SIZE+1];
2204    
2205          /* The only supported version of OP_CREF is for the value 0xffff, which          /* Back reference conditions are not supported */
2206          means "test if in a recursion". */  
2207            if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
2208    
2209            /* The DEFINE condition is always false */
2210    
2211            if (condcode == OP_DEF)
2212              {
2213              ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);
2214              }
2215    
2216            /* The only supported version of OP_RREF is for the value RREF_ANY,
2217            which means "test if in any recursion". We can't test for specifically
2218            recursed groups. */
2219    
2220          if (condcode == OP_CREF)          else if (condcode == OP_RREF)
2221            {            {
2222            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2223            if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2224            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2225              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
2226            }            }
# Line 1424  for (;;) Line 2316  for (;;)
2316        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2317        case OP_ONCE:        case OP_ONCE:
2318          {          {
         const uschar *endcode;  
2319          int local_offsets[2];          int local_offsets[2];
2320          int local_workspace[1000];          int local_workspace[1000];
2321    
# Line 1446  for (;;) Line 2337  for (;;)
2337            const uschar *end_subpattern = code;            const uschar *end_subpattern = code;
2338            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2339            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
           BOOL is_repeated;  
2340    
2341            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
2342              while (*end_subpattern == OP_ALT);              while (*end_subpattern == OP_ALT);
# Line 1521  for (;;) Line 2411  for (;;)
2411          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2412          cb.callout_number   = code[1];          cb.callout_number   = code[1];
2413          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
2414          cb.subject          = (char *)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
2415          cb.subject_length   = end_subject - start_subject;          cb.subject_length   = end_subject - start_subject;
2416          cb.start_match      = current_subject - start_subject;          cb.start_match      = current_subject - start_subject;
2417          cb.current_position = ptr - start_subject;          cb.current_position = ptr - start_subject;
# Line 1569  for (;;) Line 2459  for (;;)
2459      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
2460        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
2461        rlevel*2-2, SP));        rlevel*2-2, SP));
2462      return match_count;      break;        /* In effect, "return", but see the comment below */
2463      }      }
2464    
2465    /* One or more states are active for the next character. */    /* One or more states are active for the next character. */
# Line 1577  for (;;) Line 2467  for (;;)
2467    ptr += clen;    /* Advance to next subject character */    ptr += clen;    /* Advance to next subject character */
2468    }               /* Loop to move along the subject string */    }               /* Loop to move along the subject string */
2469    
2470  /* Control never gets here, but we must keep the compiler happy. */  /* Control gets here from "break" a few lines above. We do it this way because
2471    if we use "return" above, we have compiler trouble. Some compilers warn if
2472    there's nothing here because they think the function doesn't return a value. On
2473    the other hand, if we put a dummy statement here, some more clever compilers
2474    complain that it can't be reached. Sigh. */
2475    
2476  DPRINTF(("%.*s+++ Unexpected end of internal_dfa_exec %d +++\n"  return match_count;
   "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, rlevel*2-2, SP));  
 return PCRE_ERROR_NOMATCH;  
2477  }  }
2478    
2479    
# Line 1597  is not anchored. Line 2489  is not anchored.
2489    
2490  Arguments:  Arguments:
2491    argument_re     points to the compiled expression    argument_re     points to the compiled expression
2492    extra_data      points to extra data or is NULL (not currently used)    extra_data      points to extra data or is NULL
2493    subject         points to the subject string    subject         points to the subject string
2494    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
2495    start_offset    where to start in the subject string    start_offset    where to start in the subject string
# Line 1613  Returns: > 0 => number of match Line 2505  Returns: > 0 => number of match
2505                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2506  */  */
2507    
2508  EXPORT int  PCRE_EXP_DEFN int
2509  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2510    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2511    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
2512  {  {
2513  real_pcre *re = (real_pcre *)argument_re;  real_pcre *re = (real_pcre *)argument_re;
2514  dfa_match_data match_block;  dfa_match_data match_block;
2515    dfa_match_data *md = &match_block;
2516  BOOL utf8, anchored, startline, firstline;  BOOL utf8, anchored, startline, firstline;
2517  const uschar *current_subject, *end_subject, *lcc;  const uschar *current_subject, *end_subject, *lcc;
2518    
# Line 1634  BOOL req_byte_caseless = FALSE; Line 2527  BOOL req_byte_caseless = FALSE;
2527  int first_byte = -1;  int first_byte = -1;
2528  int req_byte = -1;  int req_byte = -1;
2529  int req_byte2 = -1;  int req_byte2 = -1;
2530    int newline;
2531    
2532  /* Plausibility checks */  /* Plausibility checks */
2533    
# Line 1648  flipping, so we scan the extra_data bloc Line 2542  flipping, so we scan the extra_data bloc
2542  match block, so we must initialize them beforehand. However, the other fields  match block, so we must initialize them beforehand. However, the other fields
2543  in the match block must not be set until after the byte flipping. */  in the match block must not be set until after the byte flipping. */
2544    
2545  match_block.tables = re->tables;  md->tables = re->tables;
2546  match_block.callout_data = NULL;  md->callout_data = NULL;
2547    
2548  if (extra_data != NULL)  if (extra_data != NULL)
2549    {    {
# Line 1657  if (extra_data != NULL) Line 2551  if (extra_data != NULL)
2551    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
2552      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
2553    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
2554      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
2555        return PCRE_ERROR_DFA_UMLIMIT;
2556    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
2557      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
2558    if ((flags & PCRE_EXTRA_TABLES) != 0)    if ((flags & PCRE_EXTRA_TABLES) != 0)
2559      match_block.tables = extra_data->tables;      md->tables = extra_data->tables;
2560    }    }
2561    
2562  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
# Line 1681  current_subject = (const unsigned char * Line 2577  current_subject = (const unsigned char *
2577  end_subject = (const unsigned char *)subject + length;  end_subject = (const unsigned char *)subject + length;
2578  req_byte_ptr = current_subject - 1;  req_byte_ptr = current_subject - 1;
2579    
2580    #ifdef SUPPORT_UTF8
2581  utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = (re->options & PCRE_UTF8) != 0;
2582  anchored = (options & PCRE_ANCHORED) != 0 || (re->options & PCRE_ANCHORED) != 0;  #else
2583    utf8 = FALSE;
2584    #endif
2585    
2586    anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
2587      (re->options & PCRE_ANCHORED) != 0;
2588    
2589  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
2590    
2591  match_block.start_code = (const uschar *)argument_re +  md->start_code = (const uschar *)argument_re +
2592      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
2593  match_block.start_subject = (const unsigned char *)subject;  md->start_subject = (const unsigned char *)subject;
2594  match_block.end_subject = end_subject;  md->end_subject = end_subject;
2595  match_block.moptions = options;  md->moptions = options;
2596  match_block.poptions = re->options;  md->poptions = re->options;
2597    
2598    /* If the BSR option is not set at match time, copy what was set
2599    at compile time. */
2600    
2601    if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2602      {
2603      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2604        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2605    #ifdef BSR_ANYCRLF
2606      else md->moptions |= PCRE_BSR_ANYCRLF;
2607    #endif
2608      }
2609    
2610    /* Handle different types of newline. The three bits give eight cases. If
2611    nothing is set at run time, whatever was used at compile time applies. */
2612    
2613    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2614             PCRE_NEWLINE_BITS)
2615      {
2616      case 0: newline = NEWLINE; break;   /* Compile-time default */
2617      case PCRE_NEWLINE_CR: newline = '\r'; break;
2618      case PCRE_NEWLINE_LF: newline = '\n'; break;
2619      case PCRE_NEWLINE_CR+
2620           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2621      case PCRE_NEWLINE_ANY: newline = -1; break;
2622      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2623      default: return PCRE_ERROR_BADNEWLINE;
2624      }
2625    
2626    if (newline == -2)
2627      {
2628      md->nltype = NLTYPE_ANYCRLF;
2629      }
2630    else if (newline < 0)
2631      {
2632      md->nltype = NLTYPE_ANY;
2633      }
2634    else
2635      {
2636      md->nltype = NLTYPE_FIXED;
2637      if (newline > 255)
2638        {
2639        md->nllen = 2;
2640        md->nl[0] = (newline >> 8) & 255;
2641        md->nl[1] = newline & 255;
2642        }
2643      else
2644        {
2645        md->nllen = 1;
2646        md->nl[0] = newline;
2647        }
2648      }
2649    
2650  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
2651  back the character offset. */  back the character offset. */
# Line 1717  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 2671  if (utf8 && (options & PCRE_NO_UTF8_CHEC
2671  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
2672  in other programs later. */  in other programs later. */
2673    
2674  if (match_block.tables == NULL) match_block.tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = _pcre_default_tables;
2675    
2676  /* The lower casing table and the "must be at the start of a line" flag are  /* The lower casing table and the "must be at the start of a line" flag are
2677  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2678    
2679  lcc = match_block.tables + lcc_offset;  lcc = md->tables + lcc_offset;
2680  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2681  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2682    
2683  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 1734  studied, there may be a bitmap of possib Line 2688  studied, there may be a bitmap of possib
2688    
2689  if (!anchored)  if (!anchored)
2690    {    {
2691    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2692      {      {
2693      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2694      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 1751  if (!anchored) Line 2705  if (!anchored)
2705  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2706  character" set. */  character" set. */
2707    
2708  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2709    {    {
2710    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2711    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
2712    req_byte2 = (match_block.tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */
2713    }    }
2714    
2715  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 1773  for (;;) Line 2727  for (;;)
2727    
2728      /* Advance to a unique first char if possible. If firstline is TRUE, the      /* Advance to a unique first char if possible. If firstline is TRUE, the
2729      start of the match is constrained to the first line of a multiline string.      start of the match is constrained to the first line of a multiline string.
2730      Implement this by temporarily adjusting end_subject so that we stop scanning      Implement this by temporarily adjusting end_subject so that we stop
2731      at a newline. If the match fails at the newline, later code breaks this loop.      scanning at a newline. If the match fails at the newline, later code breaks
2732      */      this loop. */
2733    
2734      if (firstline)      if (firstline)
2735        {        {
2736        const uschar *t = current_subject;        const uschar *t = current_subject;
2737        while (t < save_end_subject && *t != '\n') t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2738        end_subject = t;        end_subject = t;
2739        }        }
2740    
# Line 1795  for (;;) Line 2749  for (;;)
2749            current_subject++;            current_subject++;
2750        }        }
2751    
2752      /* Or to just after \n for a multiline match if possible */      /* Or to just after a linebreak for a multiline match if possible */
2753    
2754      else if (startline)      else if (startline)
2755        {        {
2756        if (current_subject > match_block.start_subject + start_offset)        if (current_subject > md->start_subject + start_offset)
2757          {          {
2758          while (current_subject < end_subject && current_subject[-1] != NEWLINE)          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2759              current_subject++;
2760    
2761            /* If we have just passed a CR and the newline option is ANY or
2762            ANYCRLF, and we are now at a LF, advance the match position by one more
2763            character. */
2764    
2765            if (current_subject[-1] == '\r' &&
2766                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2767                 current_subject < end_subject &&
2768                 *current_subject == '\n')
2769            current_subject++;            current_subject++;
2770          }          }
2771        }        }
# Line 1882  for (;;) Line 2846  for (;;)
2846    /* OK, now we can do the business */    /* OK, now we can do the business */
2847    
2848    rc = internal_dfa_exec(    rc = internal_dfa_exec(
2849      &match_block,                              /* fixed match data */      md,                                /* fixed match data */
2850      match_block.start_code,                    /* this subexpression's code */      md->start_code,                    /* this subexpression's code */
2851      current_subject,                           /* where we currently are */      current_subject,                   /* where we currently are */
2852      start_offset,                              /* start offset in subject */      start_offset,                      /* start offset in subject */
2853      offsets,                                   /* offset vector */      offsets,                           /* offset vector */
2854      offsetcount,                               /* size of same */      offsetcount,                       /* size of same */
2855      workspace,                                 /* workspace vector */      workspace,                         /* workspace vector */
2856      wscount,                                   /* size of same */      wscount,                           /* size of same */
2857      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
2858      0,                                         /* function recurse level */      0,                                 /* function recurse level */
2859      0);                                        /* regex recurse level */      0);                                /* regex recurse level */
2860    
2861    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
2862    on only if not anchored. */    on only if not anchored. */
# Line 1902  for (;;) Line 2866  for (;;)
2866    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
2867    and firstline is set. */    and firstline is set. */
2868    
2869    if (firstline && *current_subject == NEWLINE) break;    if (firstline && IS_NEWLINE(current_subject)) break;
2870    current_subject++;    current_subject++;
   
 #ifdef SUPPORT_UTF8  
2871    if (utf8)    if (utf8)
2872      {      {
2873      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
2874        current_subject++;        current_subject++;
2875      }      }
 #endif  
   
2876    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2877    }  
2878      /* If we have just passed a CR and we are now at a LF, and the pattern does
2879      not contain any explicit matches for \r or \n, and the newline option is CRLF
2880      or ANY or ANYCRLF, advance the match position by one more character. */
2881    
2882      if (current_subject[-1] == '\r' &&
2883          current_subject < end_subject &&
2884          *current_subject == '\n' &&
2885          (re->flags & PCRE_HASCRORLF) == 0 &&
2886            (md->nltype == NLTYPE_ANY ||
2887             md->nltype == NLTYPE_ANYCRLF ||
2888             md->nllen == 2))
2889        current_subject++;
2890    
2891      }   /* "Bumpalong" loop */
2892    
2893  return PCRE_ERROR_NOMATCH;  return PCRE_ERROR_NOMATCH;
2894  }  }

Legend:
Removed from v.77  
changed lines
  Added in v.305

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12